diff --git a/.github/scripts/build-recommendation-object.ps1 b/.github/scripts/build-recommendation-object.ps1 index 6f177115e..dacba8049 100644 --- a/.github/scripts/build-recommendation-object.ps1 +++ b/.github/scripts/build-recommendation-object.ps1 @@ -1,8 +1,8 @@ -#install-module powershell-yaml -force -scope currentuser +install-module powershell-yaml -force -scope currentuser function Build-APRLJsonObject { param ( - [string[]]$path + [string]$path ) $kqlfiles = Get-ChildItem -Path $path -Recurse -Filter "*.kql" @@ -10,7 +10,7 @@ function Build-APRLJsonObject { $yamlobj = foreach($file in $yamlfiles){ $content = Get-Content $file.FullName -Raw | ConvertFrom-Yaml - $content | Select-Object publishedToAdvisor,aprlGuid,recommendationTypeId,recommendationMetadataState,learnMoreLink,recommendationControl,longDescription,pgVerified,description,potentialBenefits,publishedToLearn,tags,recommendationResourceType,recommendationImpact,automationAvailable,query + $content | Select-Object publishedToAdvisor,aprlGuid,recommendationTypeId,recommendationMetadataState,learnMoreLink,recommendationControl,longDescription,pgVerified,description,potentialBenefits,tags,recommendationResourceType,recommendationImpact,automationAvailable,query } $kqlobj = foreach($file in $kqlfiles){ @@ -30,7 +30,7 @@ function Build-APRLJsonObject { #Try to build and export the object. If it fails, catch the error and exit with code 1 try{ - Build-APRLJsonObject -path @("./azure-resources","./azure-specialized-workloads","./azure-waf") | ConvertTo-Json -Depth 10 | Out-File -FilePath "./tools/data/recommendations.json" -Force + Build-APRLJsonObject -path "./azure-resources" | ConvertTo-Json -Depth 10 | Out-File -FilePath "./tools/data/recommendations.json" -Force exit 0 } catch{ diff --git a/.github/scripts/schemas/azure-resources-and-waf-schema.yaml b/.github/scripts/schemas/azure-resources-and-waf-schema.yaml index c11900331..96de6c2df 100644 --- a/.github/scripts/schemas/azure-resources-and-waf-schema.yaml +++ b/.github/scripts/schemas/azure-resources-and-waf-schema.yaml @@ -12,7 +12,6 @@ recommendation: longDescription: str(max=300) potentialBenefits: str(max=60) pgVerified: bool() - publishedToLearn: bool() automationAvailable: bool() tags: null() learnMoreLink: list() diff --git a/.github/scripts/schemas/azure-specialized-workloads-schema.yaml b/.github/scripts/schemas/azure-specialized-workloads-schema.yaml index c6033a93d..dce846a8b 100644 --- a/.github/scripts/schemas/azure-specialized-workloads-schema.yaml +++ b/.github/scripts/schemas/azure-specialized-workloads-schema.yaml @@ -12,7 +12,6 @@ recommendation: longDescription: str() potentialBenefits: str(max=60) pgVerified: bool() - publishedToLearn: bool() automationAvailable: bool() tags: null() learnMoreLink: list() diff --git a/.github/workflows/ado-sync-workitems.yml b/.github/workflows/ado-sync-workitems.yml index e6896d572..1cbc0f2a9 100644 --- a/.github/workflows/ado-sync-workitems.yml +++ b/.github/workflows/ado-sync-workitems.yml @@ -26,6 +26,7 @@ jobs: env: ado_token: '${{ secrets.ADO_PERSONAL_ACCESS_TOKEN }}' github_token: '${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}' + ado.autoCreate: 'true' config_file: './.github/actions-config/gh-ado-sync-config.json' with: ado: ${{ secrets.ADO_MAPPINGS_HANDLES }} diff --git a/azure-resources/AAD/domainServices/recommendations.yaml b/azure-resources/AAD/domainServices/recommendations.yaml index f0e73920f..5926c5f51 100644 --- a/azure-resources/AAD/domainServices/recommendations.yaml +++ b/azure-resources/AAD/domainServices/recommendations.yaml @@ -9,7 +9,6 @@ You need to use a minimum of Enterprise SKU for your managed domain to support replica sets. potentialBenefits: The Enterprise SKU enables creation of replica sets. pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -28,7 +27,6 @@ You can add a replica set to any peered virtual network in any Azure region that supports Domain Services. potentialBenefits: The replica sets provide geographical resiliency. pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/AVS/privateClouds/recommendations.yaml b/azure-resources/AVS/privateClouds/recommendations.yaml index 94168a3fe..410f6f03e 100644 --- a/azure-resources/AVS/privateClouds/recommendations.yaml +++ b/azure-resources/AVS/privateClouds/recommendations.yaml @@ -9,7 +9,6 @@ Ensure Azure Service Health notifications are set for Azure VMware Solution across all used regions and subscriptions. This communicates service/security issues and maintenance activities like host replacements and upgrades, reducing service request submissions. potentialBenefits: Prompt mitigation of issues. pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Set an alert for when the node count in Azure VMware Solution Private Cloud hits or exceeds 90 hosts, enabling timely planning for a new private cloud. potentialBenefits: Proactive capacity planning pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Alert when the cluster size reaches 14 hosts. Set up periodic alerts for planning new clusters or datastores due to growth, especially from storage needs. Beyond 14 hosts, trigger alerts for each new host addition for proactive resource monitoring. potentialBenefits: Proactive resource management pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ For Azure VMware Solution, enabling Stretched Clusters offers 99.99% SLA, synchronous storage replication (RPO=0), and spreads vSAN datastore across two AZs. Must be done at initial setup, needing double quota due to extension across AZs. potentialBenefits: 99.99% SLA, 0 RPO, Multi-AZ pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -83,7 +79,6 @@ Ensure VMware vSAN datastore slack space is maintained for SLA by monitoring storage utilization and setting alerts at 70% and 75% utilization to allow for capacity planning. To expand, add hosts or external storage like Azure Elastic SAN, Azure NetApp Files, if CPU and RAM requirements are met. potentialBenefits: Optimized capacity planning for vSAN pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -101,7 +96,6 @@ Ensure Diagnostic Settings are configured for each private cloud to send syslogs to external sources for analysis and/or archiving. Azure VMware Solution Syslogs contain data for troubleshooting and performance, aiding quicker issue resolution and early detection of issues. potentialBenefits: Faster issue resolution, early detection pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -119,7 +113,6 @@ Ensure sufficient compute resources to avoid host resource exhaustion in Azure VMware Solution, which utilizes vSphere DRS and HA for dynamic workload resource management. However, sustained CPU utilization over 95% may increase CPU Ready times, impacting workloads. potentialBenefits: Avoids resource exhaustion, optimizes performance pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -137,7 +130,6 @@ Ensure sufficient memory resources to prevent host resource exhaustion in Azure VMware Solution. It uses vSphere DRS and vSphere HA for dynamic workload management. Yet, continuous memory use over 95% leads to disk swapping, affecting workloads. potentialBenefits: Avoids host exhaustion and swapping pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -155,7 +147,6 @@ Applying a resource delete lock to the Azure VMware Solution Private Cloud resource group prevents unauthorized or accidental deletion by anyone with contributor access, ensuring the protection and reliability of the Azure VMware Solution Private Cloud. potentialBenefits: Prevents accidental deletion pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -173,7 +164,6 @@ When using customer-managed keys for encrypting vSAN datastores, leveraging Azure Key Vault for central management and accessing them via a managed identity linked to the private cloud is advised. The expiration of these keys can render the vSAN datastore and its associated workloads inaccessible. potentialBenefits: Avoid outages with key auto-rotation pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -191,7 +181,6 @@ Azure VMware Solution private clouds support up to three DNS servers for a single FQDN, preventing a single DNS server from becoming a point of failure. It's crucial to use multiple DNS servers for on-premises FQDN resolution from each private cloud. potentialBenefits: Enhances reliability and avoids failure pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/ApiManagement/service/recommendations.yaml b/azure-resources/ApiManagement/service/recommendations.yaml index 32ad53d3b..46adaa805 100644 --- a/azure-resources/ApiManagement/service/recommendations.yaml +++ b/azure-resources/ApiManagement/service/recommendations.yaml @@ -9,7 +9,6 @@ Upgrading the API Management instance to the Premium SKU adds support for Availability Zones, enhancing availability and resilience by distributing services across physically separate locations within Azure regions. potentialBenefits: Enhanced availability and resilience pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Zone redundancy for APIM instances ensures the gateway and control plane (Management API, developer portal, Git configuration) are replicated across datacenters in physically separated zones, boosting resilience to zone failures. potentialBenefits: Improved resilience to zone failures pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -49,7 +47,6 @@ Upgrading to API Management stv2 is required as stv1 retires on 31 Aug 2024, offering enhanced capabilities with the new platform version. potentialBenefits: Ensures service continuity pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -69,7 +66,6 @@ Use API Management with auto-scale for high availability in workloads that experience variable traffic patterns. There are several limitations with auto-scale, so review the documentation to ensure it meets your requirements. potentialBenefits: Enhanced availability and resilience pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/App/containerApps/recommendations.yaml b/azure-resources/App/containerApps/recommendations.yaml index bb809b2c5..24ac68f05 100644 --- a/azure-resources/App/containerApps/recommendations.yaml +++ b/azure-resources/App/containerApps/recommendations.yaml @@ -9,7 +9,6 @@ Enable container health probes to monitor the health of your container apps and ensure that unhealthy containers are restarted automatically. potentialBenefits: Enhanced availability and resilience pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/App/managedEnvironments/recommendations.yaml b/azure-resources/App/managedEnvironments/recommendations.yaml index 70890f5de..c6222d0bc 100644 --- a/azure-resources/App/managedEnvironments/recommendations.yaml +++ b/azure-resources/App/managedEnvironments/recommendations.yaml @@ -9,7 +9,6 @@ To take advantage of availability zones, you must enable zone redundancy when you create a Container Apps environment. The environment must include a virtual network with an available subnet. To ensure proper distribution of replicas, set your app's minimum replica count to three. potentialBenefits: Enhances app resiliency and reliability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/AppConfiguration/configurationStores/recommendations.yaml b/azure-resources/AppConfiguration/configurationStores/recommendations.yaml index 0163ed5af..404acab84 100644 --- a/azure-resources/AppConfiguration/configurationStores/recommendations.yaml +++ b/azure-resources/AppConfiguration/configurationStores/recommendations.yaml @@ -9,7 +9,6 @@ With Purge protection enabled, soft deleted stores can't be purged in the retention period. If disabled, the soft deleted store can be purged before the retention period expires. potentialBenefits: Prevent accidental deletion of configuration stores. pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ SLA is not available for Free tier. Upgrade to the Standard tier to get an SLA of 99.9% potentialBenefits: High availability, more storage, higher request quota. pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Automation/automationAccounts/recommendations.yaml b/azure-resources/Automation/automationAccounts/recommendations.yaml index 09f507f43..39275f921 100644 --- a/azure-resources/Automation/automationAccounts/recommendations.yaml +++ b/azure-resources/Automation/automationAccounts/recommendations.yaml @@ -9,7 +9,6 @@ Set up disaster recovery for Automation accounts and resources like Modules, Connections, Credentials, Certificates, Variables, and Schedules to deal with region or zone failures. A replica Automation account should be ready in a secondary region for failover. potentialBenefits: Ensures continuity during outages pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Batch/batchAccounts/recommendations.yaml b/azure-resources/Batch/batchAccounts/recommendations.yaml index 8b3a8dc12..4191ac2a9 100644 --- a/azure-resources/Batch/batchAccounts/recommendations.yaml +++ b/azure-resources/Batch/batchAccounts/recommendations.yaml @@ -9,7 +9,6 @@ To ensure cross-region disaster recovery and business continuity, set the right quotas for all Batch accounts to allocate necessary core numbers upfront, preventing execution interruptions from reaching quota limits. potentialBenefits: Ensures business continuity pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ When using Virtual Machine Configuration for Azure Batch pools, opting to distribute your pool across Availability Zones bolsters your compute nodes against Azure datacenter failures. potentialBenefits: Enhanced reliability and failure protection pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Cache/Redis/recommendations.yaml b/azure-resources/Cache/Redis/recommendations.yaml index a6a6fe072..c23b0e3fb 100644 --- a/azure-resources/Cache/Redis/recommendations.yaml +++ b/azure-resources/Cache/Redis/recommendations.yaml @@ -9,7 +9,6 @@ Azure Cache for Redis offers zone redundancy in Premium and Enterprise tiers, using VMs across multiple Availability Zones to ensure greater resilience and availability. potentialBenefits: Higher resilience and availability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -26,7 +25,6 @@ Azure Cache for Redis allows for specifying maintenance windows. A maintenance window allows you to control the days and times of a week during which the VMs hosting your cache can be updated. potentialBenefits: Higher resilience and availability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -44,7 +42,6 @@ Use private endpoints for secure connection to cache via a private link, avoiding the public internet. potentialBenefits: Secure, private VNet ingress, efficient data transfer pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Cdn/profiles/recommendations.yaml b/azure-resources/Cdn/profiles/recommendations.yaml index 253536316..ab2dc8842 100644 --- a/azure-resources/Cdn/profiles/recommendations.yaml +++ b/azure-resources/Cdn/profiles/recommendations.yaml @@ -9,7 +9,6 @@ For most solutions, choose either Azure Front Door for content caching, CDN, TLS termination, and WAF, or Traffic Manager for simple global load balancing. potentialBenefits: Optimized network routing and security pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -33,7 +32,6 @@ Front Door's features perform optimally when traffic exclusively comes through Front Door. It's advised to set up your origin to deny access to traffic that bypasses Front Door. potentialBenefits: Enhances security and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -51,7 +49,6 @@ When working with Azure Front Door through APIs, ARM templates, Bicep, or SDKs, using the latest API or SDK version is crucial. Updates bring new functions, important security patches, and bug fixes. potentialBenefits: Enhanced security and features pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -73,7 +70,6 @@ Front Door logs offer comprehensive telemetry on each request, crucial for understanding your solution's performance and responses, especially when caching is enabled, as origin servers might not receive every request. potentialBenefits: Enhanced insights and solution monitoring pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -95,7 +91,6 @@ Front Door terminates TCP and TLS connections from clients and establishes new connections from each PoP to the origin. Securing these connections with TLS, even for Azure-hosted origins, ensures data is always encrypted during transit. potentialBenefits: Ensures data encryption in transit pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -113,7 +108,6 @@ Using HTTPS is ideal for secure connections. However, for compatibility with older clients, HTTP requests may be necessary. Azure Front Door enables auto redirection of HTTP to HTTPS, enhancing security without sacrificing accessibility. potentialBenefits: Enhances security and compliance pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -131,7 +125,6 @@ When Front Door manages your TLS certificates, it reduces your operational costs and helps you to avoid costly outages caused by forgetting to renew a certificate. Front Door automatically issues and rotates the managed TLS certificates. potentialBenefits: Lowers costs, avoids outages pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -149,7 +142,6 @@ If you use your own TLS certificates, set the Key Vault certificate version to 'Latest' to avoid reconfiguring Azure Front Door for new certificate versions and waiting for deployment across Front Door's environments. potentialBenefits: Saves time and automates TLS updates pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -167,7 +159,6 @@ Front Door can rewrite Host headers for custom domain names routing to a single origin, useful for avoiding custom domain configuration at both Front Door and the origin. potentialBenefits: Improves session/auth handling pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -185,7 +176,6 @@ For internet-facing applications, enabling the Front Door web application firewall (WAF) and configuring it to use managed rules is recommended for protection against a wide range of attacks using Microsoft-managed rules. potentialBenefits: Enhances web app security pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -203,7 +193,6 @@ Front Door health probes help detect unavailable or unhealthy origins, directing traffic to alternate origins if needed. potentialBenefits: Reduces unnecessary origin traffic pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -221,7 +210,6 @@ Consider selecting a webpage or location specifically designed for health monitoring as the endpoint for Azure Front Door's health probes. This should encompass the status of critical components like application servers, databases, and caches to serve production traffic efficiently. potentialBenefits: Improves traffic routing and uptime pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -239,7 +227,6 @@ Health probes in Azure Front Door can use GET or HEAD HTTP methods. Using the HEAD method for health probes is a recommended practice because it reduces the traffic load on your origins, being less resource-intensive. potentialBenefits: Reduces traffic load on origins pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -257,7 +244,6 @@ Azure Front Door's geo-filtering through WAF enables defining custom access rules by country/region to restrict or allow web app access. potentialBenefits: Enhanced regional access control pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -275,7 +261,6 @@ Azure Private Link enables secure access to Azure PaaS and services over a private endpoint in your virtual network, ensuring traffic goes over the Microsoft backbone network, not the public internet. potentialBenefits: Enhanced security and private connectivity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -293,7 +278,6 @@ Azure Front Door standard is ~45% cheaper then AFD classic and has many additional benefits. Classic is also scheduled to be retired on March 31, 2027. potentialBenefits: Costs savings and additional supported features pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/CognitiveServices/accounts/recommendations.yaml b/azure-resources/CognitiveServices/accounts/recommendations.yaml index aa77584bc..35118bf47 100644 --- a/azure-resources/CognitiveServices/accounts/recommendations.yaml +++ b/azure-resources/CognitiveServices/accounts/recommendations.yaml @@ -9,7 +9,6 @@ All Logs and Metrics should be configured. These logs provide rich, frequent data about the operation of a resource that are used for issue identification and debugging. potentialBenefits: Enhanced monitoring and troubleshooting capabilities pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Compute/galleries/recommendations.yaml b/azure-resources/Compute/galleries/recommendations.yaml index d952a6f72..6e9986b3c 100644 --- a/azure-resources/Compute/galleries/recommendations.yaml +++ b/azure-resources/Compute/galleries/recommendations.yaml @@ -9,7 +9,6 @@ Keeping a minimum of 3 replicas for production images in Azure's Compute Gallery ensures scalability and prevents throttling in multi-VM deployments by distributing VM deployments across different replicas. This reduces the risk of overloading a single replica. potentialBenefits: Enhances scalability and avoids throttling pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Use ZRS for high availability when creating image/VM versions in Azure Compute Gallery, offering resilience against Availability Zone failures. ZRS accounts are advisable in regions with Availability Zones, with the choice of Standard_ZRS recommended over Standard_LRS for these regions. potentialBenefits: Enhances image version availability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ We recommend creating Trusted Launch Supported Images for benefits like Secure Boot, vTPM, trusted launch VMs, large boot volume. These are Gen 2 Images by default and you cannot change a VM's generation after creation, so review the considerations first. potentialBenefits: Enhances VM security and features pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -69,7 +66,6 @@ On multi-region deployments, replicate Image Versions to a secondary region to ensure disaster recovery capability. This ensures that the Image Versions are available in the secondary region in case of a disaster in the primary region. potentialBenefits: Enhances disaster recovery capability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -87,7 +83,6 @@ You can set a different replica count in each target region, based on the scale needs for the region. For every 20 VMs that you create concurrently, we recommend you keep one replica. potentialBenefits: Enhances disaster recovery capability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Compute/virtualMachineScaleSets/recommendations.yaml b/azure-resources/Compute/virtualMachineScaleSets/recommendations.yaml index c1010ec24..6c1d9b9bc 100644 --- a/azure-resources/Compute/virtualMachineScaleSets/recommendations.yaml +++ b/azure-resources/Compute/virtualMachineScaleSets/recommendations.yaml @@ -9,7 +9,6 @@ Deploying even single instance VMs into a scale set with Flexible orchestration mode future-proofs applications for scaling and availability. This mode guarantees high availability (up to 1000 VMs) by distributing VMs across fault domains in a region or within an Availability Zone. potentialBenefits: Higher scalability and availability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Monitoring application health in Azure Virtual Machine Scale Sets is crucial for deployment management. It supports rolling upgrades such as automatic OS-image upgrades and VM guest patching, leveraging health monitoring for upgrading. potentialBenefits: Enhances deployment management and upgrades pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ Enabling automatic instance repairs in Azure Virtual Machine Scale Sets enhances application availability through a continuous health check and maintenance process. potentialBenefits: Boosts app availability by auto-repair pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ Use custom autoscale for VMSS based on metrics and schedules to improve performance and cost effectiveness, adjusting instances as demand changes. potentialBenefits: Enhances performance and cost-efficiency pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -85,7 +81,6 @@ Predictive autoscale utilizes machine learning to efficiently manage and scale Azure Virtual Machine Scale Sets by forecasting CPU load through historical usage analysis, ensuring timely scale-out to meet demand. potentialBenefits: Optimizes scaling with ML predictions pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -103,7 +98,6 @@ Microsoft advises disabling strictly even VM instance distribution across Availability Zones in VMSS to improve scalability and flexibility, noting that uneven distribution may better serve application load demands despite the potential trade-off in resilience. potentialBenefits: Improves scaling, reduces fail attempts pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -121,7 +115,6 @@ When creating VMSS, implement availability zones as a protection measure for your applications and data against the rare event of datacenter failure. potentialBenefits: Enhances disaster resilience pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -141,7 +134,6 @@ Enabling automatic VM guest patching eases update management by safely, automatically patching virtual machines to maintain security compliance, while limiting blast radius of VMs. Note, the KQL will not return sets using Uniform orchestration. potentialBenefits: Eases patch management, enhances security pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -161,7 +153,6 @@ Ensure current versions of images are in use to avoid disruption after image deprecation. Please review the publisher, offer, sku information of the VM to ensure you are running on a supported image. Enable Auto Guest Patching or Image Upgrades, to get notifications about image deprecation. potentialBenefits: Avoid disruptions by updating VMSS images. pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Compute/virtualMachines/recommendations.yaml b/azure-resources/Compute/virtualMachines/recommendations.yaml index 28e541fff..d0110e9d7 100644 --- a/azure-resources/Compute/virtualMachines/recommendations.yaml +++ b/azure-resources/Compute/virtualMachines/recommendations.yaml @@ -9,7 +9,6 @@ Production VM workloads should be deployed on multiple VMs and grouped in a VMSS Flex instance to intelligently distribute across the platform, minimizing the impact of platform faults and updates. potentialBenefits: Enhanced fault/update resilience pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Azure Availability Zones, within each Azure region, are tolerant to local failures, protecting applications and data against unlikely Datacenter failures by being physically separate. potentialBenefits: Enhanced VM resilience to failures pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ While availability sets are not scheduled for immediate deprecation, they are planned to be deprecated in the future. Migrate workloads from VMs to VMSS Flex for deployment across zones or within the same zone across different fault domains (FDs) for better reliability. potentialBenefits: Enhances reliability and future-proofs VMs pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ Replicating Azure VMs via Site Recovery entails continuous, asynchronous disk replication to a target region. Recovery points are generated every few minutes, ensuring a Recovery Point Objective (RPO) in minutes. potentialBenefits: Minimize downtime in disasters pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -85,7 +81,6 @@ Azure is retiring unmanaged disks on September 30, 2025. Users should plan the migration to avoid disruptions and maintain service reliability. potentialBenefits: Avoid retirement disruption, enhance reliability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -107,7 +102,6 @@ A data disk is a managed disk attached to a virtual machine for storing database or other essential data. These disks are SCSI drives labeled as per choice. potentialBenefits: Enhances performance, recovery, migration flexibility pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -127,7 +121,6 @@ Enable backups for your virtual machines with Azure Backup to secure and quickly recover your data. This service offers simple, secure, and cost-effective solutions for backing up and recovering data from the Microsoft Azure cloud. potentialBenefits: Secure data recovery and backup pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -145,7 +138,6 @@ Azure Virtual Machines (VM) instances have various states, like provisioning and power states. A non-running VM may indicate issues or it being unnecessary, suggesting removal could help cut costs. potentialBenefits: Reduce costs by removing unused VMs pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -163,7 +155,6 @@ Accelerated networking enables SR-IOV to a VM, greatly improving its networking performance by bypassing the host from the data path, which reduces latency, jitter, and CPU utilization for demanding network workloads on supported VM types. potentialBenefits: Reduces latency, jitter and CPU use pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -181,7 +172,6 @@ When Accelerated Networking is enabled, the default Azure VNet interface in GuestOS is swapped for a Mellanox, and its driver comes from a 3rd party. Marketplace images have the latest Mellanox drivers, but post-deployment, updating the driver is the user's responsibility. potentialBenefits: Enhanced VM network efficiency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -199,7 +189,6 @@ For outbound internet connectivity of Virtual Machines, using NAT Gateway or Azure Firewall is recommended to enhance security and service resilience, thanks to their higher availability and SNAT ports. potentialBenefits: Enhanced security and service resiliency pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -217,7 +206,6 @@ Unless you have a specific reason, it's advised to associate a network security group to a subnet or a network interface, but not both, to avoid unexpected communication issues and troubleshooting due to potential rule conflicts between the two associations. potentialBenefits: Reduces communication problems pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -235,7 +223,6 @@ IP forwarding allows a virtual machine network interface to receive and send network traffic not destined for or originating from its assigned IP addresses. potentialBenefits: Enhances network appliance function pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -253,7 +240,6 @@ Configure the DNS Server at the Virtual Network level to prevent any inconsistency across the environment. potentialBenefits: Ensures DNS consistency pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -271,7 +257,6 @@ Azure shared disks let you attach a disk to multiple VMs at once for deploying or migrating clustered applications, suitable only when a disk is shared among VM cluster members. potentialBenefits: Enhances clustered server performance pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -291,7 +276,6 @@ Recommended changing to "Disable public access and enable private access" and creating a Private Endpoint to improve security by restricting direct public access and ensuring connections are made privately, enhancing data protection and minimizing potential external threats. potentialBenefits: Enhances VM security and privacy pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -309,7 +293,6 @@ Keeping your virtual machine (VM) secure is crucial for the applications you run. This involves using various Azure services and features to ensure secure access to your VMs and the secure storage of your data, aiming for overall security of your VM and applications. potentialBenefits: Secure VMs and applications pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -329,7 +312,6 @@ Consider enabling Azure Disk Encryption (ADE) for encrypting Azure VM disks using DM-Crypt (Linux) or BitLocker (Windows). Additionally, consider Encryption at host and Confidential disk encryption for enhanced data security. potentialBenefits: Enhances data security and integrity pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -347,7 +329,6 @@ VM Insights monitors VM and scale set performance, health, running processes, and dependencies. It enhances the predictability of application performance and availability by pinpointing performance bottlenecks and network issues, and it clarifies if problems are related to other dependencies. potentialBenefits: Improves VM performance and health pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -367,7 +348,6 @@ Azure Monitor Metrics automatically receives platform metrics, but platform logs, which offer detailed diagnostics and auditing for resources and their Azure platform, need to be manually routed for collection. potentialBenefits: Enhanced diagnostics and auditing capability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -385,7 +365,6 @@ The maintenance configuration settings let users schedule and manage updates, making sure the updates or interruptions on the VM are performed within a planned timeframe. potentialBenefits: Scheduled updates for VMs pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -403,7 +382,6 @@ A-series VMs are tailored for entry-level workloads like development and testing, including use cases such as development and test servers, low traffic web servers, and small to medium databases. potentialBenefits: Ensures full CPU usage for heavy tasks pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -421,7 +399,6 @@ Compared to Standard HDD and SSD, Premium SSD, SSD v2, and Ultra Disks offer improved performance, configurability, and higher single-instance VM uptime SLAs. The lowest SLA of all disks on a VM applies, so it is best to use Premium or Ultra Disks for the highest uptime SLA. potentialBenefits: Enhanced performance, cost efficiency, and uptime SLA pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -439,7 +416,6 @@ If the workload is Maintenance sensitive, consider Azure Boost compatible VMs. Azure Boost is designed to lessen the impact on customers when Azure maintenance activities occur on the host. potentialBenefits: Less maintenance impact pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -459,7 +435,6 @@ If your workload is Maintenance sensitive, enable Scheduled Events. This Azure Metadata Service lets your app prepare for virtual machine maintenance by providing information on upcoming events like reboots, reducing disruptions. potentialBenefits: Minimize downtime for VMs pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -481,7 +456,6 @@ Azure disks offers a zone-redundant storage (ZRS) option for workloads that need to be resilient to an entire zone being down. Due to the cross-zone data replication, ZRS disks have higher write latency when compared to the locally-redundant option (LRS), so make sure to benchmark your disks. potentialBenefits: Enhanced Disk resilience to failures pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -499,7 +473,6 @@ Azure Capacity Reservations ensure high availability for virtual machines by reserving compute capacity in advance within a specific region or availability zone. This guarantees that VMs will have the necessary resources during peak demand or maintenance events, enhancing reliability and uptime. potentialBenefits: Guaranteed capacity in constrained regions/zones pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -517,7 +490,6 @@ If you've installed the Azure Linux Agent or are using an endorsed distribution image, ensure your agent version is up-to-date. Some Linux distributions may disable auto-update or use older agent versions. potentialBenefits: Reduces complications with VM provisioning pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -535,7 +507,6 @@ On-Demand Capacity Reservations ensure recovery of virtual machines in the event of a natural disaster by reserving compute capacity in advance within a specific region or zone. This guarantees that VMs have the necessary resources during disaster recovery failover events thus reducing downtime. potentialBenefits: Guaranteed capacity in disaster recovery regions pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/ContainerRegistry/registries/recommendations.yaml b/azure-resources/ContainerRegistry/registries/recommendations.yaml index 178f124bb..b9b88a690 100644 --- a/azure-resources/ContainerRegistry/registries/recommendations.yaml +++ b/azure-resources/ContainerRegistry/registries/recommendations.yaml @@ -9,7 +9,6 @@ Choose a service tier of Azure Container Registry to meet your performance needs. Premium offers the most bandwidth and highest rate of read and write operations for high-volume deployments. Use Basic to start, Standard for production, and Premium for hyper-scale performance and geo-replication. potentialBenefits: High-volume support and geo-replication pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Azure Container Registry's optional zone redundancy enhances resiliency and high availability for registries or replication resources in a specific region by distributing resources across multiple zones. potentialBenefits: Enhances resiliency and high availability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Use Azure Container Registry's geo-replication for multi-region deployments to simplify registry management and minimize latency. It enables serving global customers from local data centers and supports distributed development teams. Regional webhooks can notify of events in replicas. potentialBenefits: Simplifies management, reduces latency pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ Using repository namespaces allows a single registry to be shared across multiple groups and deployments within an organization, supporting nested namespaces for group isolation. However, repositories are managed independently, not hierarchically. potentialBenefits: Enables sharing and group isolation pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -83,7 +79,6 @@ Container registries, used across multiple hosts, should be in their own resource group to prevent accidental deletion of images when container instances are deleted, preserving the image collection while experimenting with hosts. potentialBenefits: Safeguards image collection pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -101,7 +96,6 @@ The storage constraints of Azure Container Registry's service tiers align with usage scenarios: Basic for starters, Standard for production, and Premium for high-scale performance and geo-replication. potentialBenefits: Reduce costs, optimize storage pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -121,7 +115,6 @@ By default, Azure container registry requires authentication for pull/push actions. Enabling anonymous pull access exposes all content for public read actions. This applies to all repositories, potentially allowing unrestricted access if repository-scoped tokens are used. potentialBenefits: Enhanced security and controlled access pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -139,7 +132,6 @@ Resource Logs are not collected and stored until you create a diagnostic setting and route them to one or more locations. potentialBenefits: Enhanced tracking and debugging pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -159,7 +151,6 @@ Monitoring Azure resources using Azure Monitor enhances their availability, performance, and operation. Azure Container Registry, a full-stack monitoring service, provides features for Azure and other cloud and on-premises resources. potentialBenefits: Enhanced monitoring and operation pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -179,7 +170,6 @@ Enabling soft delete in Azure Container Registry (ACR) allows for the management of deleted artifacts with a specified retention period. Users can list, filter, and restore these artifacts until automatically purged post-retention. potentialBenefits: Recovery of deleted artifacts pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/ContainerService/managedClusters/recommendations.yaml b/azure-resources/ContainerService/managedClusters/recommendations.yaml index 1bf1e40fc..4ab77b259 100644 --- a/azure-resources/ContainerService/managedClusters/recommendations.yaml +++ b/azure-resources/ContainerService/managedClusters/recommendations.yaml @@ -9,7 +9,6 @@ Azure Availability Zones ensure high availability by offering independent locations within regions, equipped with their own power, cooling, and networking to ensure applications and data are protected from datacenter-level failures. potentialBenefits: Enhanced fault tolerance for AKS pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ AKS assigns the kubernetes.azure.com/mode: system label to nodes in system node pools signaling the preference for system pods should be scheduled there. The CriticalAddonsOnly=true:NoSchedule taint can be added to your system nodes to prohibit application pods from being scheduled on them. potentialBenefits: Enhanced reliability via pod isolation pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ Local Kubernetes accounts in AKS, being non-auditable and legacy, are discouraged. Microsoft Entra's integration offers centralized management, multi-factor authentication, RBAC for detailed access, and a secure, scalable authentication system compatible with Azure and external identity providers. potentialBenefits: Enhanced security and access control pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -69,7 +66,6 @@ Azure CNI enhances cluster IP and network management, allowing dynamic IP allocation, scalable subnets, direct pod-VNET connectivity, and supports diverse network policies for pods and nodes with Azure Network Policies and Calico, optimizing network efficiency and security potentialBenefits: Dynamic IP allocation, scalable subnets, direct VNET access pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -89,7 +85,6 @@ The cluster auto-scaler in AKS adjusts node counts based on pod resource needs and available capacity, enabling scaling as per demand to prevent outages. potentialBenefits: Optimizes scaling and prevents outages pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -113,7 +108,6 @@ AKS, popular for stateful apps needing backups, can now use Azure Backup to secure clusters and attached volumes through an installed Backup Extension, enabling backup and restore operations via a Backup Vault. potentialBenefits: Ensures data safety for AKS pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -133,7 +127,6 @@ ZRS ensures data replication across three zones, protecting against zonal outages. It's available for Azure Disks, Container Storage, Files, and Blob by setting the SKU to ZRS in storage classes, enhancing multi-zone AKS clusters from v1.29. potentialBenefits: Increases data durability and availability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -159,7 +152,6 @@ From Kubernetes 1.26, Azure Disk and Azure File in-tree drivers are deprecated in favor of CSI drivers. Existing deployments remain operational but untested; users should switch to CSI drivers for new features and SKUs. potentialBenefits: Ensures future compatibility pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -179,7 +171,6 @@ A ResourceQuota object sets limits on resource use per namespace, controlling the number and type of objects created, and the total compute resources available. potentialBenefits: Limits AKS resource usage per namespace pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -197,7 +188,6 @@ To rapidly scale AKS workloads, utilize virtual nodes for quick pod provisioning, unlike Kubernetes auto-scaler. For clusters with availability zones, ensure one nodepool per AZ due to persistent volumes not working across AZs, preventing auto-scaler pod creation failures if lacking access. potentialBenefits: Faster scaling with virtual nodes pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -217,7 +207,6 @@ Production AKS clusters require the Standard or Premium tier for a financially backed SLA and enhanced node scalability, as the free service lacks these features. Use the Premium tier for mission-critical workloads. potentialBenefits: SLA guarantee and better scalability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -237,7 +226,6 @@ Azure Monitor enables real-time health and performance insights for AKS by collecting events, capturing container logs, and gathering CPU/Memory data from the Metrics API. It allows data visualization using Azure Monitor Container Insights, Prometheus, Grafana, or others. potentialBenefits: Real-time AKS health/performance insights pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -255,7 +243,6 @@ Ephemeral OS disks on AKS offer lower read/write latency due to local attachment, eliminating the need for replication seen with managed disks. This enhances performance and speeds up cluster operations such as scaling or upgrading due to quicker re-imaging and boot times. potentialBenefits: Lower latency, faster re-imaging and booting pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -277,7 +264,6 @@ Azure Policies in AKS clusters help enforce governance best practices concerning security, authentication, provisioning, networking, and more, ensuring a robust and secure environment for operations. potentialBenefits: Enhanced AKS governance and security pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -297,7 +283,6 @@ GitOps, an operating model for cloud-native apps, uses Git for storing application and infrastructure code as a source of truth for continuous delivery. potentialBenefits: Ensures AKS config consistency pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -317,7 +302,6 @@ Enhance availability and reliability by using pod topology spread constraints to control pod distribution based on node or zone topology, ensuring pods are spread across your cluster. potentialBenefits: Ensures high availability and efficient use pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -337,7 +321,6 @@ AKS kubelet controller uses liveness probes to validate containers and applications health, ensuring the system knows when to restart a container based on its health status. potentialBenefits: Enhances container health monitoring pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -357,7 +340,6 @@ Configuring multiple replicas in Pod or Deployment manifests stabilizes the number of replica Pods, ensuring that a specified number of identical Pods are always available, thereby guaranteeing their availability. potentialBenefits: Ensures stable pod availability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -375,7 +357,6 @@ The system node pool should be configured with a minimum node count of two to ensure critical system pods are resilient to node outages. potentialBenefits: Ensures pod resilience pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -393,7 +374,6 @@ Configuring the user node pool with at least two nodes is essential for applications needing high availability, ensuring they remain operational and accessible without interruption. potentialBenefits: Ensures high app availability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -411,7 +391,6 @@ A Pod Disruption Budget is a Kubernetes resource configuring the minimum number or percentage of pods that should remain available during disruptions like maintenance or scaling, ensuring a minimum number of pods are always available in the cluster. potentialBenefits: Ensures cluster resiliency during disruptions pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -431,7 +410,6 @@ Nodepool subnets sized for max auto-scale settings enable AKS to efficiently scale out nodes, meeting increased demand while reducing resource constraints and potential service disruptions. potentialBenefits: Efficient scaling, reduced disruptions pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -449,7 +427,6 @@ Node pool settings should not exceed the subscription core quota to ensure AKS can scale out nodes efficiently, meeting increased demand while reducing resource constraints and potential service disruptions. potentialBenefits: Reduced disruptions pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -467,7 +444,6 @@ Azure Linux on AKS boosts resiliency with a native image using validated, source-built components. It's lightweight, reducing the attack surface and maintenance. A Microsoft-hardened kernel, optimized for Azure, enhances stability and security for container workloads. potentialBenefits: Reduced disruptions pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -485,7 +461,6 @@ Deploying at least two replicas of your application ensures that your application is highly available and can tolerate node failures. potentialBenefits: Ensures high app availability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/DBforMySQL/flexibleServers/recommendations.yaml b/azure-resources/DBforMySQL/flexibleServers/recommendations.yaml index 71a89e7ad..8cb2095d8 100644 --- a/azure-resources/DBforMySQL/flexibleServers/recommendations.yaml +++ b/azure-resources/DBforMySQL/flexibleServers/recommendations.yaml @@ -9,7 +9,6 @@ Enable HA with zone redundancy on flexible server instances to deploy a standby replica in a different zone, offering automatic failover capability for improved reliability and disaster recovery. potentialBenefits: Enhanced uptime and data protection pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Use custom maintenance schedule on flexible server instances to select a preferred time for service updates to be applied. potentialBenefits: Control update timings pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Configure GRS to ensure that your database meets its availability and durability targets even in the face of failures or disasters. potentialBenefits: Recover from regional failure and/or disaster pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -63,7 +60,6 @@ Configure one or more read replicas to ensure that your database meets its availability and durability targets even in the face of failures or disasters. potentialBenefits: Recover from regional failure and/or disaster pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -81,7 +77,6 @@ Configure storage auto-grow to prevent the server from running out of storage and becoming read-only. potentialBenefits: Scale storage automatically to meet increasing demand pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/DBforPostgreSQL/flexibleServers/recommendations.yaml b/azure-resources/DBforPostgreSQL/flexibleServers/recommendations.yaml index 92fe007d3..ea6fe1194 100644 --- a/azure-resources/DBforPostgreSQL/flexibleServers/recommendations.yaml +++ b/azure-resources/DBforPostgreSQL/flexibleServers/recommendations.yaml @@ -9,7 +9,6 @@ Enable HA with zone redundancy on flexible server instances to deploy a standby replica in a different zone, offering automatic failover capability for improved reliability and disaster recovery. potentialBenefits: Enhanced uptime and data protection pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Use custom maintenance schedule on flexible server instances to select a preferred time for service updates to be applied. potentialBenefits: Control update timings pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Configure GRS to ensure that your database meets its availability and durability targets even in the face of failures or disasters. potentialBenefits: Recover from regional failure and/or disaster pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -63,7 +60,6 @@ Configure one or more read replicas to ensure that your database meets its availability and durability targets even in the face of failures or disasters. potentialBenefits: Recover from regional failure and/or disaster pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -81,7 +77,6 @@ Configure storage auto-grow to prevent the server from running out of storage and becoming read-only. potentialBenefits: Scale storage automatically to meet increasing demand pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Dashboard/grafana/recommendations.yaml b/azure-resources/Dashboard/grafana/recommendations.yaml index be213a1a9..c0f15ff6f 100644 --- a/azure-resources/Dashboard/grafana/recommendations.yaml +++ b/azure-resources/Dashboard/grafana/recommendations.yaml @@ -9,7 +9,6 @@ Managed Grafana Standard tier is hosted on a dedicated set of VMs to provide redundancy. With zone redundancy enabled, VMs are spread across availability zones (AZ). Related resources are also configured for AZ. Zone redundancy can only be enabled when creating the Azure Managed Grafana instance. potentialBenefits: Enhanced Managed Grafana resilience to failures pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Databricks/workspaces/recommendations.yaml b/azure-resources/Databricks/workspaces/recommendations.yaml index df297dbba..d85cd7474 100644 --- a/azure-resources/Databricks/workspaces/recommendations.yaml +++ b/azure-resources/Databricks/workspaces/recommendations.yaml @@ -9,7 +9,6 @@ Databricks recommends migrating workloads to the latest or LTS version of its runtime for enhanced stability and support. If on Runtime 11.3 LTS or above, move directly to the latest 12.x version. If below, first migrate to 11.3 LTS, then to the latest 12.x version as per the migration guide. potentialBenefits: Enhanced stability and support pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Databricks pools pre-provision VMs, reducing risks of provisioning errors during cluster start or scale, enhancing reliability. potentialBenefits: Reduces provisioning errors pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Upgrade HDDs in premium VMs to SSDs for better speed and reliability. Premium SSDs boost IO-heavy apps; Standard SSDs balance cost and performance. Ideal for critical workloads, upgrading improves connectivity with brief reboot. Consider for vital VMs potentialBenefits: Faster, reliable VM performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ Autoscaling adjusts cluster sizes automatically based on workload demands, offering benefits for many use cases in terms of costs and performance. It includes guidance on when and how to best utilize Autoscaling. For streaming, Delta Live Tables with autoscaling is advised. potentialBenefits: Cost and performance optimization pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -81,7 +77,6 @@ The scaling parameter of a SQL warehouse defines the min and max number of clusters for distributing queries. By default, it's set to one. Increasing the cluster count can accommodate more concurrent users effectively. potentialBenefits: Improves concurrency and efficiency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -99,7 +94,6 @@ Databricks enhanced autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact on the data processing latency of your pipelines. potentialBenefits: Optimized resource use and minimal latency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -119,7 +113,6 @@ To conserve cluster resources, you can terminate a cluster to store its configuration for future reuse or autostart jobs. Clusters can auto-terminate after inactivity, but this only tracks Spark jobs, not local processes, which might still be running even after Spark jobs end. potentialBenefits: Saves cluster resources, avoids idle use pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -137,7 +130,6 @@ When creating a Databricks cluster, you can set a log delivery location for the Spark driver, worker nodes, and events. Logs are delivered every 5 mins and archived hourly. Upon cluster termination, all generated logs until that point are guaranteed to be delivered. potentialBenefits: Improved troubleshooting and audit pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -155,7 +147,6 @@ Delta Lake is an open source storage format enhancing data lakes' reliability with ACID transactions, schema enforcement, and scalable metadata handling. potentialBenefits: Enhances data reliability and processing pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -173,7 +164,6 @@ Apache Spark in Databricks Lakehouse ensures resilient distributed data processing by automatically rescheduling failed tasks, aiding in overcoming external issues like network problems or revoked VMs. potentialBenefits: Boosts speed and reliability for Spark tasks pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -191,7 +181,6 @@ Invalid or nonconforming data can crash workloads dependent on specific data formats. Best practices recommend filtering such data at ingestion to improve end-to-end resilience, ensuring no data is lost or missed. potentialBenefits: Enhanced data resilience and integrity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -209,7 +198,6 @@ Use Databricks and MLflow for deploying models as Spark UDFs for job scheduling, retries, autoscaling. Model serving offers scalable infrastructure, processes models using MLflow, and serves them via REST API using serverless compute managed in Databricks cloud. potentialBenefits: Enhanced reliability and autoscaling pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -227,7 +215,6 @@ Use Databricks and MLflow for deploying models as Apache Spark UDFs, benefiting from job scheduling, retries, autoscaling, etc. potentialBenefits: Enhances scalability and reliability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -245,7 +232,6 @@ Curate data by creating a layered architecture to increase data quality across layers. Start with a raw layer for ingested source data, continue with a curated layer for cleansed and refined data, and finish with a final layer catered to business needs, focusing on security and performance. potentialBenefits: Enhances data quality and trust pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -263,7 +249,6 @@ Copying data leads to redundancy, lost integrity, lineage, and access issues, affecting lakehouse data quality. Temporary copies are useful for agility and innovation but can become problematic operational data silos, questioning data's master status and currency. potentialBenefits: Enhanced data integrity and quality pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -281,7 +266,6 @@ Uncontrolled schema changes can lead to invalid data and failing jobs. Databricks validates and enforces schema through Delta Lake, which prevents bad records during ingestion, and Auto Loader, which detects new columns and supports schema evolution to maintain data integrity. potentialBenefits: Prevents invalid data and job failures pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -299,7 +283,6 @@ Delta tables verify data quality automatically with SQL constraints, triggering an error for violations. Delta Live Tables enhance this by defining expectations for data quality, utilizing Python or SQL, to manage actions for record failures, ensuring data integrity and compliance. potentialBenefits: Ensures data quality and integrity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -317,7 +300,6 @@ To recover from a failure, regular backups are needed. The Databricks Labs project migrate lets admins create backups by exporting workspace assets using the Databricks CLI/API. These backups help in restoring or migrating workspaces. potentialBenefits: Ensures data recovery and migration pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -335,7 +317,6 @@ Structured Streaming ensures fault-tolerance and data consistency in streaming queries. With Azure Databricks workflows, you can set up your queries to automatically restart after failure, picking up precisely where they left off. potentialBenefits: Fault-tolerance and auto-restart for queries pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -353,7 +334,6 @@ Despite thorough testing, a production job can fail or yield unexpected data. Sometimes, repairs are done by adding jobs post-issue identification and pipeline correction. potentialBenefits: Easy rollback and fix for ETL jobs pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -371,7 +351,6 @@ Databricks Workflows enable efficient error recovery in multi-task jobs by offering a matrix view for issue examination. Fixes can be applied to initiate repair runs targeting only failed and dependent tasks, preserving successful outcomes and thereby saving time and money. potentialBenefits: Saves time and money with smart recovery pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -391,7 +370,6 @@ It is important to note that the Azure Databricks service is not entirely zone redudant and does support zonal failover. potentialBenefits: Ensures service continuity during disasters pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -409,7 +387,6 @@ The Databricks Terraform provider manages Azure Databricks workspaces and cloud infrastructure flexibly and powerfully. potentialBenefits: Efficient, reliable automation pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -427,7 +404,6 @@ The Databricks Terraform provider is a flexible, powerful tool for managing Azure Databricks workspaces and cloud infrastructure. potentialBenefits: Enhanced reliability and automation pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -445,7 +421,6 @@ Customers often naturally divide workspaces by teams or departments. However, it's crucial to also consider Azure Subscription and Azure Databricks (ADB) Workspace limits when partitioning. potentialBenefits: Enhanced limits management, team separation pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -463,7 +438,6 @@ Deploying only one Databricks Workspace per VNet aligns with Azure Databricks' isolation model. potentialBenefits: Enhanced security and resource isolation pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -481,7 +455,6 @@ Driven by security and data availability concerns, each Azure Databricks Workspace comes with a default DBFS designed for system-level artifacts like libraries and Init scripts, not for production data. potentialBenefits: Enhanced security, data protection pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -499,7 +472,6 @@ Azure Spot VMs are not suitable for critical production workloads needing high availability and reliability. They are meant for fault-tolerant tasks and can be evicted with 30-seconds notice if Azure needs the capacity, with no SLA guarantees. potentialBenefits: Ensures high reliability for production pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -517,7 +489,6 @@ Move workspaces to in-region control plane for increased regional isolation. Identify current control plane region using the workspace URL and nslookup. When region from CNAME differs from workspace region and an in-region control is available, consider migration using tools provided below. potentialBenefits: Improves resilience and data sovereignty pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -539,7 +510,6 @@ Azure Databricks planning should include VM SKU swap strategies for capacity issues. VMs are regional, and allocation failures may occur, shown by a "CLOUD PROVIDER" error. potentialBenefits: Ensures service availability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/DesktopVirtualization/hostPools/recommendations.yaml b/azure-resources/DesktopVirtualization/hostPools/recommendations.yaml index bd58c9158..9b7a12c2b 100644 --- a/azure-resources/DesktopVirtualization/hostPools/recommendations.yaml +++ b/azure-resources/DesktopVirtualization/hostPools/recommendations.yaml @@ -9,7 +9,6 @@ Validation host pools let you monitor service updates before the service applies them to your standard or non-validation environment. potentialBenefits: Enhanced environment stability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Create up to two maintenance windows for the Azure Virtual Desktop agent, side-by-side stack, and Geneva Monitoring agent to get updated so that updates don't happen during peak business hours. potentialBenefits: Enhanced environment stability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Place domain joined session hosts VMs in unique OUs. Segregate Prod and DR units for environment-specific settings. This ensures targeted configurations for session hosts, including FSLogix, session controls, etc. potentialBenefits: Improved AVD hostpool config & segmentation pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ Implement Azure Site Recovery (ASR) to replicate or backup stateful session hosts. This replicates VMs to a secondary Azure region or availability zone, ensuring recovery from a known VM state in case of an outage. potentialBenefits: Ensures VM recovery & failover pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/DesktopVirtualization/scalingPlans/recommendations.yaml b/azure-resources/DesktopVirtualization/scalingPlans/recommendations.yaml index 48f963ace..b0dfdced3 100644 --- a/azure-resources/DesktopVirtualization/scalingPlans/recommendations.yaml +++ b/azure-resources/DesktopVirtualization/scalingPlans/recommendations.yaml @@ -9,7 +9,6 @@ Scaling plans can only be assigned to host pools in the same region, on multi-region deployment scenario each region should has its own scaling plan. potentialBenefits: Enhanced scaling pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Devices/iotHubs/recommendations.yaml b/azure-resources/Devices/iotHubs/recommendations.yaml index e6f450c5e..286ba77b4 100644 --- a/azure-resources/Devices/iotHubs/recommendations.yaml +++ b/azure-resources/Devices/iotHubs/recommendations.yaml @@ -9,7 +9,6 @@ Device Identities should be copied to the failover region IoT Hub for all IoT devices to ensure connectivity in case of a failover. Manual Failover to another region is quicker (RTO), suitable for mission critical workloads. potentialBenefits: Faster failover; Ensures device connectivity pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -29,7 +28,6 @@ In a production scenario, the IoT Hub tier should not be Free because the Free tier does not provide the necessary Service Level Agreement. potentialBenefits: Ensures SLA for production pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ In regions supporting Availability Zones for IoT Hub, using these zones boosts availability. They're automatically activated for new IoT Hubs in supported areas. potentialBenefits: Boosts IoT Hub availability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -65,7 +62,6 @@ Device Provisioning Service (DPS) enables easy redistribution of IoT devices for scaling and availability, allowing devices to be reassigned and not bound to specific IoT Hub instances. Devices in IoT Hubs using DPS should be verified for DPS utilization. potentialBenefits: Enhances scalability and availability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -87,7 +83,6 @@ In case of a regional failure, an IoT Hub can failover to a second region, automatically or manually, to ensure your application continues working. potentialBenefits: Ensures business continuity pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -105,7 +100,6 @@ Using message routing for custom endpoints in IoT Hub, messages might not reach these destinations if specific conditions are unmet. A default route ensures all messages are received, but disabling this safety net risks leaving some messages undelivered. potentialBenefits: Prevents undelivered messages pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/DocumentDB/databaseAccounts/recommendations.yaml b/azure-resources/DocumentDB/databaseAccounts/recommendations.yaml index 52a327f8b..a41accb68 100644 --- a/azure-resources/DocumentDB/databaseAccounts/recommendations.yaml +++ b/azure-resources/DocumentDB/databaseAccounts/recommendations.yaml @@ -9,7 +9,6 @@ Enable a secondary region in Cosmos DB for higher SLA without downtime. Simple as pinning a location on a map. For Strong consistency, configure at least three regions for write availability in case of failure. potentialBenefits: Enhances SLA and resilience pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Cosmos DB boasts high uptime and resiliency. Even so, issues may arise. With Service-Managed failover, if a region is down, Cosmos DB automatically switches to the next available region, requiring no user action. potentialBenefits: Auto failover for high uptime pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ When availability zones are configured, Azure Cosmos DB intelligently distributes the 4 replicas of your data across all available zones. It ensures that your Azure Cosmos DB can withstand an outage in one availability zone and remain fully operational throughout. potentialBenefits: Enhances high availability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ Multi-region write capability allows for designing applications that are highly available across multiple regions, though it demands careful attention to consistency requirements and conflict resolution. Improper setup may decrease availability and cause data corruption due to unhandled conflicts. potentialBenefits: Enhances high availability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -85,7 +81,6 @@ Cosmos DB's backup is always on, offering protection against data mishaps. Continuous mode allows for self-serve restoration to a pre-mishap point, unlike periodic mode which requires contacting Microsoft support, leading to longer restore times. potentialBenefits: Faster self-serve data restore pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -103,7 +98,6 @@ Cosmos DB has a 4 MB response limit, leading to paginated results for large or partition-spanning queries. Each page shows availability and provides a continuation token for the next. A while loop in code is necessary to traverse all pages until completion. potentialBenefits: Maximizes data retrieval efficiency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -121,7 +115,6 @@ Using a single instance of the SDK client for each account and application is crucial as connections are tied to the client. Compute environments have a limit on open connections, affecting connectivity when exceeded. potentialBenefits: Optimizes connections and efficiency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -139,7 +132,6 @@ Cosmos DB SDKs automatically manage many transient errors through retries. Despite this, it's crucial for applications to implement additional retry policies targeting specific cases that the SDKs can't generically address, ensuring more robust error handling. potentialBenefits: Enhances error handling resilience pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -157,7 +149,6 @@ Monitoring the availability and responsiveness of Azure Cosmos DB resources and having alerts set up for your workload is a good practice. This ensures you stay proactive in handling unforeseen events. potentialBenefits: Proactive issue management pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/EventGrid/topics/recommendations.yaml b/azure-resources/EventGrid/topics/recommendations.yaml index 4af1b29ba..c5e328730 100644 --- a/azure-resources/EventGrid/topics/recommendations.yaml +++ b/azure-resources/EventGrid/topics/recommendations.yaml @@ -9,7 +9,6 @@ Enabling diagnostic settings on Azure Event Grid resources like custom topics, system topics, and domains lets you capture and view diagnostic information to troubleshoot failures effectively. potentialBenefits: Enhanced troubleshooting for Event Grid pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Event Grid may not deliver an event within a specific time or after several attempts, leading to dead-lettering where undelivered events are sent to a storage account. potentialBenefits: Saves undelivered events pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Use private endpoints for secure event ingress to custom topics/domains via a private link, avoiding the public internet. It employs an IP from the VNet space for your topic/domain. potentialBenefits: Secure, private VNet ingress pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/EventHub/namespaces/recommendations.yaml b/azure-resources/EventHub/namespaces/recommendations.yaml index 49ae51ec0..9f223fc93 100644 --- a/azure-resources/EventHub/namespaces/recommendations.yaml +++ b/azure-resources/EventHub/namespaces/recommendations.yaml @@ -9,7 +9,6 @@ When using the Azure portal, zone redundancy is automatically enabled. However, some Infrastructure as Code (IaC) tools may default this to false. To ensure replication of metadata and events across data centers in an availability zone, always verify that zone redundancy is enabled. potentialBenefits: Enhanced fault tolerance for Event Hub pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Enable auto-inflate on Event Hub Standard tier namespaces to automatically scale up throughput units (TUs), meeting usage needs and preventing data ingress or egress throttle scenarios by adjusting to allowed rates. potentialBenefits: Prevents throttling by autoscaling TUs pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Insights/activityLogAlerts/recommendations.yaml b/azure-resources/Insights/activityLogAlerts/recommendations.yaml index ed4ef964c..081dbaaa1 100644 --- a/azure-resources/Insights/activityLogAlerts/recommendations.yaml +++ b/azure-resources/Insights/activityLogAlerts/recommendations.yaml @@ -9,7 +9,6 @@ Configure Resource Health Alerts for all applicable resources to stay informed about the current and historical health status of your Azure resources. They notify you when these resources have a change in their health status. potentialBenefits: Stay informed on resource status pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -31,7 +30,6 @@ Service health gives a personalized health view of Azure services and regions used, offering the best place for notifications on outages, planned maintenance, and health advisories by knowing the services used. potentialBenefits: Proactive outage and maintenance alerts pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Insights/components/recommendations.yaml b/azure-resources/Insights/components/recommendations.yaml index d00a75889..9f61ab80d 100644 --- a/azure-resources/Insights/components/recommendations.yaml +++ b/azure-resources/Insights/components/recommendations.yaml @@ -9,7 +9,6 @@ Classic Application Insights retires in February 2024. To minimize disruption to existing application monitoring scenarios, transition to workspace-based Application Insights before 29 February 2024. potentialBenefits: Avoid service disruption post-Feb 2024 pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/KeyVault/vaults/recommendations.yaml b/azure-resources/KeyVault/vaults/recommendations.yaml index bebc6b0f8..02fa7a0ad 100644 --- a/azure-resources/KeyVault/vaults/recommendations.yaml +++ b/azure-resources/KeyVault/vaults/recommendations.yaml @@ -9,7 +9,6 @@ Key Vault's soft-delete feature enables recovery of deleted vaults and objects like keys, secrets, and certificates. When enabled, marked resources are retained for 90 days, allowing for their recovery, essentially undoing deletion. potentialBenefits: Enables recovery of deleted items pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Purge protection secures against malicious deletions by enforcing a retention period for soft deleted key vaults, ensuring no one, not even insiders or Microsoft, can purge your key vaults during this period, preventing permanent data loss. potentialBenefits: Protects from insider attacks, avoids data loss pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Azure Private Link Service lets you securely and privately connect to Azure Key Vault via a Private Endpoint in your VNet, using a private IP and eliminating public Internet exposure. potentialBenefits: Secure Key Vault with Private Link pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -63,7 +60,6 @@ Key vaults are security boundaries for secret storage. Grouping secrets together increases risk during a security event, as attacks could access multiple secrets. potentialBenefits: Enhanced security, Reduced risk pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -81,7 +77,6 @@ Enable logs, set up alerts, and adhere to retention requirements for improved monitoring and security of Key Vault access, detailing the frequency and identity of users. potentialBenefits: Enhanced monitoring and security compliance pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/NetApp/netAppAccounts/recommendations.yaml b/azure-resources/NetApp/netAppAccounts/recommendations.yaml index 678b4c724..b86546799 100644 --- a/azure-resources/NetApp/netAppAccounts/recommendations.yaml +++ b/azure-resources/NetApp/netAppAccounts/recommendations.yaml @@ -9,7 +9,6 @@ Service levels, part of capacity pool attributes, determine the maximum throughput per volume quota in Azure NetApp Files. It combines read and write speed, offering three levels: Standard (16 MiB/s per 1TiB), Premium (64 MiB/s per 1TiB), and Ultra (128 MiB/s per 1TiB) throughput. potentialBenefits: Optimized performance and cost efficiency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Standard network feature in Azure NetApp Files enhances IP limits and VNet capabilities, including network security groups, user-defined routes on subnets, and diverse connectivity options. potentialBenefits: Enhanced connectivity and security pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Availability zones are distinct locations within an Azure region to withstand local failures. Deploy your workload in multiple availability zones and use application-based replication or Azure NetApp Files cross-zone replication to achieve high availability. Note that failover is a manual process. potentialBenefits: High Availability across availability zones pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -63,7 +60,6 @@ Azure NetApp Files' availability zone (AZ) volume placement feature lets you deploy volumes in the same AZ with Azure compute and other services to have within AZ latency and share the same AZ failure domain. potentialBenefits: Within AZ latency and tolerate failure of other AZ pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -81,7 +77,6 @@ Azure NetApp Files snapshot technology ensures stability, scalability, and swift data recoverability without affecting performance. It supports automatic snapshot creation via policies for Azure NetApp Files data. potentialBenefits: Stable, scalable, swift recovery, no perf impact pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -99,7 +94,6 @@ Azure NetApp Files offers a fully managed backup solution enhancing long-term recovery, archiving, and compliance. potentialBenefits: Enhances data recovery and compliance pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -117,7 +111,6 @@ Azure NetApp Files replication offers data protection by allowing asynchronous cross-region volume replication for application failover in case of regional outages. Volumes can be replicated across regions, not concurrently with cross-zone replication. Note that failover is a manual process. potentialBenefits: Enhanced data protection and disaster recovery pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -135,7 +128,6 @@ The cross-zone replication (CZR) feature enables asynchronous data replication between Azure NetApp Files volumes across different availability zones, ensuring data protection and critical application failover in case of zone-wide disasters. Note that failover is a manual process. potentialBenefits: Enhances disaster recovery across availability zones pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -153,7 +145,6 @@ Azure NetApp Files offers metrics like allocated storage, actual usage, volume IOPS, and latency, enabling a better understanding of usage patterns and volume performance for NetApp accounts. potentialBenefits: Optimize usage and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -171,7 +162,6 @@ Azure NetApp Files supports Azure policy integration using either built-in policy definitions or by creating custom ones to maintain organizational standards and compliance. potentialBenefits: Enforce standards and assess compliance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -191,7 +181,6 @@ Access to the delegated subnet should be limited to specific Azure Virtual Networks. SMB-enabled volumes' share permissions should move away from 'Everyone/Full control'. NFS-enabled volumes' access needs to be controlled via export policies and/or NFSv4.1 ACLs. potentialBenefits: Enhanced security, Reduced data breach risk pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -217,7 +206,6 @@ Certain SMB applications need SMB Transparent Failover for maintenance without interrupting server connectivity. Azure NetApp Files provides this through SMB Continuous Availability for applications like Citrix App Layering, FSLogix user/profile containers, Microsoft SQL Server, MSIX app attach. potentialBenefits: Zero downtime for SMB apps pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -235,7 +223,6 @@ Azure NetApp Files might undergo occasional planned maintenance such as platform updates or service and software upgrades. It's important to be aware of the application's resiliency settings to cope with these storage service maintenance events. potentialBenefits: Minimizes downtime during maintenance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/applicationGateways/recommendations.yaml b/azure-resources/Network/applicationGateways/recommendations.yaml index bdd95de1d..0528dbe97 100644 --- a/azure-resources/Network/applicationGateways/recommendations.yaml +++ b/azure-resources/Network/applicationGateways/recommendations.yaml @@ -9,7 +9,6 @@ Azure Application Gateways v2 are always deployed in a highly available fashion with multiple instances by default. Enabling autoscale ensures the service is not reliant on manual intervention for scaling. potentialBenefits: Enhances uptime and enables autoscaling pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Secure all incoming connections using HTTPS for production services with end-to-end SSL/TLS or SSL/TLS termination at the Application Gateway to protect against attacks and ensure data remains private and encrypted between the web server and browsers. potentialBenefits: Enhanced security and privacy pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -53,7 +51,6 @@ Use Application Gateway with Web Application Firewall (WAF) in an application virtual network to safeguard inbound HTTP/S internet traffic. WAF offers centralized defense against potential exploits through OWASP core rule sets-based rules. potentialBenefits: Enhanced security for HTTP/S traffic pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -73,7 +70,6 @@ Use Application Gateway v2 for built-in features like autoscaling, static VIPs, Azure KeyVault integration for better traffic management and performance, unless v1 is necessary. potentialBenefits: Better performance, autoscaling, more features pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -95,7 +91,6 @@ Enable logging in storage accounts, Log Analytics, and monitoring services for auditing and insights. If using NSGs, enable NSG flow logs to be stored, providing in-depth traffic analysis into Azure Cloud. potentialBenefits: Enhanced traffic insight and audit pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -115,7 +110,6 @@ Using custom health probes enhances understanding of backend availability and facilitates monitoring of backend services for any impact. potentialBenefits: Ensures backend uptime monitoring. pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -135,7 +129,6 @@ Deploying Application Gateway in a zone-aware configuration ensures continued customer access to services even if a specific zone goes down, as services in other zones remain available. potentialBenefits: Enhanced uptime and customer access pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -155,7 +148,6 @@ Using connection draining for backend maintenance ensures graceful removal of backend pool members during updates or health issues. It's enabled via Backend Setting and applies to all members during rule creation. potentialBenefits: Smooth updates, no dropped users pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -175,7 +167,6 @@ Application Gateway v2 (Standard_v2 or WAF_v2 SKU) can support up to 125 instances. A /24 subnet isn't mandatory for deployment but is advised to provide enough space for autoscaling and maintenance upgrades. potentialBenefits: Allows autoscaling and maintenance pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/azureFirewalls/recommendations.yaml b/azure-resources/Network/azureFirewalls/recommendations.yaml index 2c928f10a..ed4b5338e 100644 --- a/azure-resources/Network/azureFirewalls/recommendations.yaml +++ b/azure-resources/Network/azureFirewalls/recommendations.yaml @@ -9,7 +9,6 @@ Azure Firewall offers different SLAs depending on its deployment; in a single availability zone or across multiple, potentially improving reliability and performance. potentialBenefits: Enhanced SLA and reliability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Monitor Azure Firewall for overall health, processed throughput, and outbound SNAT port usage. Get alerted before limits impact services. Consider NAT gateway integration with zonal deployments; note limitations with zone redundant firewalls and secure virtual hub networks. potentialBenefits: Improve health and performance monitoring pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -49,7 +47,6 @@ Associate a DDoS protection plan with the virtual network hosting Azure Firewall to provide enhanced mitigation against DDoS attacks. Azure Firewall Manager integrates the creation of firewall infrastructure and DDoS protection plans. potentialBenefits: Enhanced DDoS attack defense pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -67,7 +64,6 @@ Azure Firewall policy supports rule hierarchies for compliance enforcement, using a central base policy with higher priority over child policies, and employs Azure custom roles to safeguard base policy and manage access within subscriptions or groups. potentialBenefits: Enhanced compliance and rule hierarchy pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -85,7 +81,6 @@ Configure a minimum of two to four public IP addresses per Azure Firewall to avoid SNAT exhaustion. Azure Firewall offers SNAT for all outbound traffic to public IPs, providing 2,496 SNAT ports for each additional PIP. potentialBenefits: Avoids SNAT exhaustion. pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -103,7 +98,6 @@ Creating a metric to monitor latency probes over 20ms for periods longer than 30ms helps identify when firewall instance CPUs are stressed, potentially indicating issues. potentialBenefits: Improved CPU stress detection pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/connections/recommendations.yaml b/azure-resources/Network/connections/recommendations.yaml index e7cf00e00..6c4f9485b 100644 --- a/azure-resources/Network/connections/recommendations.yaml +++ b/azure-resources/Network/connections/recommendations.yaml @@ -9,7 +9,6 @@ ExpressRoute gateways facilitate network traffic and route exchanges. FastPath enhances on-premises to virtual network data path performance by directing traffic straight to virtual machines, bypassing the gateway for improved resiliency through reduced gateway utilization. potentialBenefits: Enhances speed and resiliency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Configure an Azure Resource lock for Gateway Connection resources to prevent accidental deletion and maintain connectivity between on-premises networks and Azure workloads. potentialBenefits: Prevents accidental deletion of connections pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/ddosProtectionPlans/recommendations.yaml b/azure-resources/Network/ddosProtectionPlans/recommendations.yaml index 3be6c58aa..80468f01c 100644 --- a/azure-resources/Network/ddosProtectionPlans/recommendations.yaml +++ b/azure-resources/Network/ddosProtectionPlans/recommendations.yaml @@ -9,7 +9,6 @@ Azure DDoS Plan metrics differentiate packets and bytes by tags: Dropped (packets scrubbed by DDoS), Forwarded (packets to VIP not filtered), and No tag (total packets, sum of dropped and forwarded). potentialBenefits: Enhanced security and traffic insight pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/dnsZones/recommendations.yaml b/azure-resources/Network/dnsZones/recommendations.yaml index 55430ef9c..02ed2c5d3 100644 --- a/azure-resources/Network/dnsZones/recommendations.yaml +++ b/azure-resources/Network/dnsZones/recommendations.yaml @@ -9,7 +9,6 @@ Azure DNS allows the Time-To-Live (TTL) for record sets in the zone to be set to a value between 1 and 2147483647 seconds. You should ensure that the TTL for the DNS record sets in your DNS Zones are set appropriately to meet your RPO targets. potentialBenefits: Ensures that no cached DNS records exist past RPO targets pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/expressRouteCircuits/recommendations.yaml b/azure-resources/Network/expressRouteCircuits/recommendations.yaml index f635b8c8a..64f252d30 100644 --- a/azure-resources/Network/expressRouteCircuits/recommendations.yaml +++ b/azure-resources/Network/expressRouteCircuits/recommendations.yaml @@ -9,7 +9,6 @@ Connecting each ExpressRoute Gateway to a minimum of two circuits in different peering locations enhances redundancy and reliability by ensuring alternate pathways for data in case one circuit fails. potentialBenefits: Enhanced reliability and redundancy pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Microsoft or the ExpressRoute provider always ensures physical redundancy in their services. It's essential to maintain this level of physical redundancy (two devices, two links) from the ExpressRoute peering location to your network for optimal performance and reliability. potentialBenefits: Enhanced reliability and fault tolerance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -47,7 +45,6 @@ Operating both connections of an ExpressRoute circuit in active-active mode enhances high availability as the Microsoft network will load balance the traffic across the connections on a per-flow basis. potentialBenefits: Improved high availability and load balancing pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -65,7 +62,6 @@ Enabling BFD over ExpressRoute speeds up link failure detection between MSEE devices and routers configured for ExpressRoute (CE/PE), applicable over both customer and Partner Edge routing devices with managed Layer 3 service. potentialBenefits: Faster link failure detection pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -83,7 +79,6 @@ Use Network Insights for monitoring ExpressRoute circuit availability, QoS, and throughput. Set alerts based on Azure Monitor Baseline Alerts for availability, QoS metrics, and throughput metrics exceeding specific thresholds. potentialBenefits: Enhanced network performance and health pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -101,7 +96,6 @@ ExpressRoute leverages service health for notifications on both planned and unplanned maintenance, ensuring users are informed about any changes to their ExpressRoute circuits. potentialBenefits: Stay informed on circuit updates pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -119,7 +113,6 @@ Rate limiting controls traffic volume between on-premises networks and Azure via ExpressRoute Direct, applying to private or Microsoft peering. It distributes port bandwidth, ensures stability, and prevents congestion, with steps outlined for enabling on circuits. potentialBenefits: Optimizes network, prevents congestion pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/expressRouteGateways/recommendations.yaml b/azure-resources/Network/expressRouteGateways/recommendations.yaml index 7b0a805d7..75639025e 100644 --- a/azure-resources/Network/expressRouteGateways/recommendations.yaml +++ b/azure-resources/Network/expressRouteGateways/recommendations.yaml @@ -8,7 +8,6 @@ longDescription: To increase reliability, it's advised that each v-Hub's ExpressRoute gateway connects to at least two circuits, with each circuit originating from a different peering location than the other, ensuring diverse connectivity paths for enhanced resilience. potentialBenefits: Enhance resiliency for Azure Service pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -25,7 +24,6 @@ longDescription: Set up monitoring and alerts for Virtual WAN ExpressRoute Gateway. Create alert rule for ensuring promptly response to critical events such as exceeding packets per second, exceeding BGP routes prefixes, Gateway overutilization and high frequency in route changes. potentialBenefits: Detection and mitigation to avoid disruptions. pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/expressRoutePorts/recommendations.yaml b/azure-resources/Network/expressRoutePorts/recommendations.yaml index fa0ed111b..66c3e886a 100644 --- a/azure-resources/Network/expressRoutePorts/recommendations.yaml +++ b/azure-resources/Network/expressRoutePorts/recommendations.yaml @@ -9,7 +9,6 @@ In Azure ExpressRoute Direct, the "Admin State" indicates the administrative status of layer 1 links, showing if a link is enabled or disabled, effectively turning the physical port on or off. potentialBenefits: Ensures optimal connectivity. pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Provisioning ExpressRoute circuits on a 10-Gbps or 100-Gbps ExpressRoute Direct resource up to 20-Gbps or 200-Gbps is possible but not recommended for resiliency. If an ExpressRoute Direct port fails, and circuits are using full capacity, the remaining port won't handle the extra load. potentialBenefits: Improves resilience during port failures pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Use Network Insights for monitoring ExpressRoute Port light levels, bits per second in/out, and line protocol. Set alerts based on Azure Monitor Baseline Alerts for light levels, bits per second in/out, and line protocol exceeding specific thresholds. potentialBenefits: Enhanced network performance and health pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/frontDoorWebApplicationFirewallPolicies/recommendations.yaml b/azure-resources/Network/frontDoorWebApplicationFirewallPolicies/recommendations.yaml index b698ebaf5..57d2715d0 100644 --- a/azure-resources/Network/frontDoorWebApplicationFirewallPolicies/recommendations.yaml +++ b/azure-resources/Network/frontDoorWebApplicationFirewallPolicies/recommendations.yaml @@ -9,7 +9,6 @@ WAF may mistakenly block legitimate requests (false positives). These can be identified by examining the last 24 hours of blocked requests in Log Analytics. potentialBenefits: Reduces false positives, improves access pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -33,7 +32,6 @@ WAF may block legitimate requests as false positives. Identifying blocked requests within the last 24 hours through Log Analytics can help manage and mitigate these incorrect blockages efficiently. potentialBenefits: Improve false positive identification pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -53,7 +51,6 @@ Monitoring the health of your Web Application Firewall and the applications it protects is crucial. This can be achieved through integration with Microsoft Defender for Cloud, Azure Monitor, and Azure Monitor logs, ensuring optimal performance and security. potentialBenefits: Enhanced security and health insight pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/loadBalancers/recommendations.yaml b/azure-resources/Network/loadBalancers/recommendations.yaml index da0d98037..6214c77df 100644 --- a/azure-resources/Network/loadBalancers/recommendations.yaml +++ b/azure-resources/Network/loadBalancers/recommendations.yaml @@ -9,7 +9,6 @@ Selecting Standard SKU Load Balancer enhances reliability through availability zones and zone resiliency, ensuring deployments withstand zone and region failures. Unlike Basic, it supports global load balancing and offers an SLA. potentialBenefits: Enhanced reliability and SLA support pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Deploying Azure Load Balancers with at least two instances in the backend prevents a single point of failure and supports scalability. Pairing with Virtual Machine Scale Sets is advised for optimal scale building. potentialBenefits: Enhances reliability and scalability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ Outbound rules for Standard Public Load Balancer involve manual port allocation for backend pools, limiting scalability and risk of SNAT port exhaustion. NAT Gateway is recommended for its dynamic scaling and secure internet connectivity. potentialBenefits: Enhanced scalability and reliability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ In regions with Availability Zones, assigning a zone-redundant frontend IP to a Standard Load Balancer ensures continuous traffic distribution even if one availability zone fails, provided other healthy zones and backend instances are available to receive the traffic. potentialBenefits: Enhances uptime and resilience pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -83,7 +79,6 @@ Health probes are used by Azure Load Balancers to determine the status of backend endpoints. Using custom health probes that are aligned with vendor recommendations enhances understanding of backend availability and facilitates monitoring of backend services for any impact. potentialBenefits: Ensures backend uptime monitoring. pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/natGateways/recommendations.yaml b/azure-resources/Network/natGateways/recommendations.yaml index 57e07554f..b97b4d3f9 100644 --- a/azure-resources/Network/natGateways/recommendations.yaml +++ b/azure-resources/Network/natGateways/recommendations.yaml @@ -9,7 +9,6 @@ NAT Gateway provides 64,512 SNAT ports per public IP address and supports up to 16 public IP addresses. Monitor "Total SNAT connection count" metric to determine if you're nearing the connection limit of NAT gateway. You can scale the NAT gateway by adding more public IP addresses. potentialBenefits: Enhances reliability and scalability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -29,7 +28,6 @@ Use Network Insights for monitoring and alerting on your NAT gateway.Use Total SNAT connection count metric to determine if you're nearing the connection limit of NAT gateway. Set alerts based on Azure Monitor Baseline Alerts (AMBA) thresholds for NAT Gateway potentialBenefits: Enhanced network performance and health pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -49,7 +47,6 @@ A zonal promise for zone isolation scenarios exists when a virtual machine instance using a NAT gateway resource is in the same zone as the NAT gateway resource and its public IP addresses. The pattern you want to use for zone isolation is creating a "zonal stack" per availability zone. potentialBenefits: Enhances reliability and scalability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/networkSecurityGroups/recommendations.yaml b/azure-resources/Network/networkSecurityGroups/recommendations.yaml index 2dea1d09d..c12866ed6 100644 --- a/azure-resources/Network/networkSecurityGroups/recommendations.yaml +++ b/azure-resources/Network/networkSecurityGroups/recommendations.yaml @@ -9,7 +9,6 @@ Resource Logs are not collected and stored until you create a diagnostic setting and route them to one or more locations. potentialBenefits: Enhanced monitoring and security insights pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Create Alerts with Azure Monitor for operations like creating or updating Network Security Group rules to catch unauthorized/undesired changes to resources and spot attempts to bypass firewalls or access resources from the outside. potentialBenefits: Enhanced security and change monitoring pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ As an administrator, you can lock an Azure subscription, resource group, or resource to protect them from accidental deletions and modifications. The lock overrides user permissions. Locks can prevent either deletions or modifications and are known as Delete and Read-only in the portal. potentialBenefits: Prevents accidental edits/deletions pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ Monitoring, managing, and understanding your network is crucial for protection and optimization. Knowing the current state, who and from where connections are made, open internet ports, expected and irregular behavior, and traffic spikes is essential. potentialBenefits: Enhances security and optimizes network pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -81,7 +77,6 @@ Azure network security groups filter network traffic between resources in a virtual network, using security rules to allow or deny inbound or outbound traffic based on source, destination, port, and protocol. potentialBenefits: Enhanced traffic control and security pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/networkWatchers/recommendations.yaml b/azure-resources/Network/networkWatchers/recommendations.yaml index 67d2b4b96..63ec37314 100644 --- a/azure-resources/Network/networkWatchers/recommendations.yaml +++ b/azure-resources/Network/networkWatchers/recommendations.yaml @@ -9,7 +9,6 @@ Azure Network Watcher offers tools for monitoring, diagnosing, viewing metrics, and managing logs for IaaS resources. It helps maintain the health of VMs, VNets, application gateways, load balancers, but not for PaaS or Web analytics. potentialBenefits: Enhanced monitoring and diagnostics for Azure IaaS pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Network security group flow logging is a feature of Azure Network Watcher that logs IP traffic info through a network security group. If in Failed state, monitoring data from the associated resource is not collected. potentialBenefits: Ensures IP traffic logging pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Improves monitoring for Azure and Hybrid connectivity potentialBenefits: Improves monitoring for Azure and Hybrid connectivity pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -63,7 +60,6 @@ Improves monitoring and security for Azure and Hybrid connectivity potentialBenefits: Improves monitoring and security for Azure connectivity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -83,7 +79,6 @@ Improves monitoring, security and troubleshooting for Azure and Hybrid connectivity potentialBenefits: Improves monitoring, security and troubleshooting. pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/p2sVpnGateways/recommendations.yaml b/azure-resources/Network/p2sVpnGateways/recommendations.yaml index 4aae076f4..9eaddd165 100644 --- a/azure-resources/Network/p2sVpnGateways/recommendations.yaml +++ b/azure-resources/Network/p2sVpnGateways/recommendations.yaml @@ -8,7 +8,6 @@ longDescription: Set up monitoring and alerts for Point-to-Site VPN gateways. Create alert rule for ensuring promptly response to critical events such as Gateway overutilization, connection count limits and User VPN route limits. potentialBenefits: Detection and mitigation to avoid disruptions. pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/privateDnsZones/recommendations.yaml b/azure-resources/Network/privateDnsZones/recommendations.yaml index 1a1fe0daa..9f47e722b 100644 --- a/azure-resources/Network/privateDnsZones/recommendations.yaml +++ b/azure-resources/Network/privateDnsZones/recommendations.yaml @@ -9,7 +9,6 @@ Assign the built-in Private DNS Zone Contributor role to specific authorized users, groups, and entities to protect against unauthorized or accidental changes to Private DNS Zones and records. Restrict access by granting Private DNS Zone Contributor permission to all zones. potentialBenefits: Prevents DNS outages pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Use Azure Monitor to monitor Private DNS Zone query volume, record set count, and capacity metrics for Record Set, Virtual Network Link, and Virtual Network Link with auto-registration. Create alerts based on Azure Monitor Baseline Alerts for these metrics that exceed specific thresholds. potentialBenefits: Enhanced DNS reliability and alerting pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ For business continuity scenarios with a low recovery time objective (RTO), ensure that distinct regional production and disaster recovery (DR) Private DNS Zones are configured and have identical workload and resource DNS entries. This keeps DNS resolution consistent across both zones. potentialBenefits: Ensures seamless failover for DNS during a regional outage pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ Azure Private DNS allows the Time-To-Live (TTL) for record sets in the zone to be set to a value between 1 and 2147483647 seconds. You should ensure that the TTL for the DNS record sets in your DNS Zones are set appropriately to meet your RPO targets. potentialBenefits: Ensures that no cached DNS records exist past RPO targets pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/privateEndpoints/recommendations.yaml b/azure-resources/Network/privateEndpoints/recommendations.yaml index b25afd13e..be851040b 100644 --- a/azure-resources/Network/privateEndpoints/recommendations.yaml +++ b/azure-resources/Network/privateEndpoints/recommendations.yaml @@ -9,7 +9,6 @@ A private endpoint has two custom properties, static IP address and the network interface name, which must be set at creation. If not in Succeeded state, there may be issues with the endpoint or associated resource. potentialBenefits: Ensure connection availability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/publicIPAddresses/recommendations.yaml b/azure-resources/Network/publicIPAddresses/recommendations.yaml index 76d02852c..dbfcc4f9e 100644 --- a/azure-resources/Network/publicIPAddresses/recommendations.yaml +++ b/azure-resources/Network/publicIPAddresses/recommendations.yaml @@ -9,7 +9,6 @@ Public IP addresses in Azure can be of standard SKU, available as non-zonal, zonal, or zone-redundant. Zone-redundant IPs are accessible across all zones, resisting any single zone failure, thereby providing higher resilience. potentialBenefits: Enhanced resilience with zone redundancy pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Prevent connectivity failures due to SNAT port exhaustion by employing NAT gateway for outbound traffic from virtual networks, ensuring dynamic scaling and secure internet connections. potentialBenefits: Avoids SNAT port exhaustion risks pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -49,7 +47,6 @@ Basic SKU public IP addresses will be retired on September 30, 2025. Users are advised to upgrade to Standard SKU public IP addresses before this date to avoid service disruptions. potentialBenefits: Avoids service disruption pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -70,7 +67,6 @@ DDoS attacks can be targeted at any endpoint that is publicly reachable through the internet. potentialBenefits: Avoids service disruption pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/routeTables/recommendations.yaml b/azure-resources/Network/routeTables/recommendations.yaml index 764a083c4..f0d5b59af 100644 --- a/azure-resources/Network/routeTables/recommendations.yaml +++ b/azure-resources/Network/routeTables/recommendations.yaml @@ -9,7 +9,6 @@ Create Alerts with Azure Monitor for operations like Create or Update Route Table to spot unauthorized/undesired changes in production resources. This setup aids in identifying improper routing changes, including efforts to evade firewalls or access resources from outside. potentialBenefits: Enhanced security and change detection pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ As an administrator, you can protect Azure subscriptions, resource groups, or resources from accidental deletions and modifications by setting locks. potentialBenefits: Prevents accidental edits/deletions pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/trafficManagerProfiles/recommendations.yaml b/azure-resources/Network/trafficManagerProfiles/recommendations.yaml index 48918ea6f..91c11f307 100644 --- a/azure-resources/Network/trafficManagerProfiles/recommendations.yaml +++ b/azure-resources/Network/trafficManagerProfiles/recommendations.yaml @@ -9,7 +9,6 @@ Monitor status should be online to ensure failover for application workload. If Traffic Manager's health shows Degraded, one or more endpoints may also be Degraded. potentialBenefits: Ensures failover functionality pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -31,7 +30,6 @@ When configuring the Azure traffic manager, provision at least two endpoints to ensure workloads can fail-over to another instance, enhancing reliability and availability. potentialBenefits: Enhances failover capabilities pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -49,7 +47,6 @@ Profiles should have multiple endpoints to ensure availability in case an endpoint fails. It's also advised to distribute these endpoints across different regions for enhanced reliability. potentialBenefits: Enhances availability across regions pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -67,7 +64,6 @@ For geographic routing, traffic is directed to endpoints based on specific regions. If a region fails, without a predefined failover, configuring an endpoint to "All (World)" for geographic profiles can prevent traffic black holes, ensuring service remains available. potentialBenefits: Avoids traffic black holing, ensures availability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/virtualHubs/recommendations.yaml b/azure-resources/Network/virtualHubs/recommendations.yaml index ff368dcf1..922f7f5dc 100644 --- a/azure-resources/Network/virtualHubs/recommendations.yaml +++ b/azure-resources/Network/virtualHubs/recommendations.yaml @@ -8,7 +8,6 @@ longDescription: Set up monitoring and alerts for v-Hubs. Create alert rule for ensuring promptly response to changes in BGP status and Data processed by v-Hubs. potentialBenefits: Detection and mitigation to avoid disruptions. pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Network/virtualNetworkGateways/recommendations.yaml b/azure-resources/Network/virtualNetworkGateways/recommendations.yaml index 971fdb370..3a7acfe76 100644 --- a/azure-resources/Network/virtualNetworkGateways/recommendations.yaml +++ b/azure-resources/Network/virtualNetworkGateways/recommendations.yaml @@ -9,7 +9,6 @@ To increase reliability, it's advised that each ExpressRoute gateway connects to at least two circuits, with each circuit originating from a different peering location than the other, ensuring diverse connectivity paths for enhanced resilience. potentialBenefits: Enhanced resiliency for Azure service pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Azure ExpressRoute gateway offers variable SLAs based on deployment in single or multiple availability zones. To deploy virtual network gateways across zones automatically, use zone-redundant gateways for accessing critical, scalable services with increased resilience. potentialBenefits: Enhanced SLA and resilience pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -49,7 +47,6 @@ Configuring an Azure Resource lock for ExpressRoute gateway prevents accidental deletion by enabling administrators to lock an Azure subscription, resource group, or resource, thereby protecting them from unintended user deletions and modifications, with the lock overriding all user permissions. potentialBenefits: Prevents accidental deletions pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -67,7 +64,6 @@ Use Network Insights for monitoring ExpressRoute Gateway's health, including availability, performance, and scalability. potentialBenefits: Enhanced monitoring and alerting pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -87,7 +83,6 @@ While multiple VNets can connect via the same ExpressRoute gateway, Microsoft recommends using alternatives like VNet peering, Azure Firewall, NVA, Azure Route Server, site-to-site VPN, virtual WAN, or SD-WAN for VNet-to-VNet communication to optimize network performance and management. potentialBenefits: Enhanced VNet integration efficiency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -105,7 +100,6 @@ ExpressRoute gateways are updated for improved functionality, reliability, performance, and security. Customer-controlled maintenance configuration and scheduling minimize update impact and align with your maintenance windows. potentialBenefits: Minimizes update impact pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -123,7 +117,6 @@ Azure VPN gateway offers variable SLAs based on deployment in one or two availability zones. Deploying zone-redundant virtual network gateways across availability zones ensures zone-resiliency, improving access to mission-critical, scalable services on Azure. potentialBenefits: Enhanced reliability and scalability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -145,7 +138,6 @@ The active-active mode is available for all SKUs except Basic, allowing for two Gateway IP configurations and two public IP addresses, enhancing redundancy and traffic handling. potentialBenefits: Enhanced reliability and network capacity pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -165,7 +157,6 @@ Deploying active-active VPN concentrators and Azure VPN Gateways maximizes resilience and availability using a fully-meshed topology with four IPSec tunnels. potentialBenefits: Maximizes resilience and availability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -183,7 +174,6 @@ Set up monitoring and alerts for Virtual Network Gateway health to utilize a variety of metrics for ensuring operational efficiency and prompt response to any disruptions. potentialBenefits: Improved uptime and issue awareness pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -201,7 +191,6 @@ VPN gateway leverages service health to inform users about both planned and unplanned maintenance, ensuring they are notified about modifications to their VPN connectivity. potentialBenefits: Improves VPN maintenance alerts pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -221,7 +210,6 @@ For zone-redundant VPN gateways, always use zone-redundant Standard SKU public IPs to avoid deploying all instances in one zone. This ensures the gateway's reliability, applying to both active-passive (single IP) and active-active (dual IP) setups. potentialBenefits: Enhanced reliability and disaster recovery pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/virtualNetworks/recommendations.yaml b/azure-resources/Network/virtualNetworks/recommendations.yaml index 0619eda28..731a720f8 100644 --- a/azure-resources/Network/virtualNetworks/recommendations.yaml +++ b/azure-resources/Network/virtualNetworks/recommendations.yaml @@ -9,7 +9,6 @@ Network security groups and application security groups allow filtering of inbound and outbound traffic by IP, port, and protocol, adding a security layer at the Subnet level. potentialBenefits: Enhanced subnet security and traffic control pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -33,7 +32,6 @@ Azure DDoS Protection offers enhanced mitigation features against DDoS attacks and is auto-tuned to protect specific resources in a virtual network, combined with application design best practices. potentialBenefits: Enhanced DDoS attack mitigation pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -51,7 +49,6 @@ Use VNet service endpoints only if Private Link isn't available and no data movement concerns. This feature restricts Azure service access to specified VNet and subnet, enhancing network security and isolating service traffic. potentialBenefits: Enhanced security and data isolation pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Network/vpnGateways/recommendations.yaml b/azure-resources/Network/vpnGateways/recommendations.yaml index 57e40d33a..79a59cab7 100644 --- a/azure-resources/Network/vpnGateways/recommendations.yaml +++ b/azure-resources/Network/vpnGateways/recommendations.yaml @@ -8,7 +8,6 @@ longDescription: Set up monitoring and alerts for v-Hub's VPN Gateway. Create alert rule for ensuring promptly response to critical events such as packet drop counts, BGP status, Gateway overutilization. potentialBenefits: Detection and mitigation to avoid disruptions. pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/NetworkFunction/azureTrafficCollectors/recommendations.yaml b/azure-resources/NetworkFunction/azureTrafficCollectors/recommendations.yaml index 0fb51e53a..f07c97e0d 100644 --- a/azure-resources/NetworkFunction/azureTrafficCollectors/recommendations.yaml +++ b/azure-resources/NetworkFunction/azureTrafficCollectors/recommendations.yaml @@ -9,7 +9,6 @@ ExpressRoute Traffic Collector samples network flows over ExpressRoute Direct or Service-Provider based circuits, sending flow logs to a Log Analytics workspace for analysis or export to visualization tools/SIEM. potentialBenefits: Enhanced network flow analysis and DR readiness pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/OperationalInsights/workspaces/recommendations.yaml b/azure-resources/OperationalInsights/workspaces/recommendations.yaml index 02206c526..003d3f7d2 100644 --- a/azure-resources/OperationalInsights/workspaces/recommendations.yaml +++ b/azure-resources/OperationalInsights/workspaces/recommendations.yaml @@ -1,4 +1,4 @@ -- description: Enable Log Analytics data export to GRS or GZRS +- description: Enable Log Analytics data export to GRS or GZRS aprlGuid: b36fd2ac-dd83-664a-ab48-ff7b8d3b189d recommendationTypeId: null recommendationControl: Governance @@ -9,7 +9,6 @@ Data export in a Log Analytics workspace to an Azure Storage account enhances data protection against regional failures by using geo-redundant (GRS) or geo-zone-redundant storage (GZRS), mainly for compliance and integration with other Azure services and tools. potentialBenefits: Enhances compliance and regional fault tolerance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -29,7 +28,6 @@ A health status alert will proactively notify you if a workspace becomes unavailable because of a datacenter or regional failure. potentialBenefits: Early alert for workspace failure pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/RecoveryServices/vaults/recommendations.yaml b/azure-resources/RecoveryServices/vaults/recommendations.yaml index 93021aef7..d781ac461 100644 --- a/azure-resources/RecoveryServices/vaults/recommendations.yaml +++ b/azure-resources/RecoveryServices/vaults/recommendations.yaml @@ -9,7 +9,6 @@ Ensure VM failover settings' static IP addresses are available in the failover subnet to maintain consistent IP assignment during failover, with the target VM receiving the same static IP if it's available or the next available IP otherwise. IP adjustments can be made in VM Network settings. potentialBenefits: Smooth failover IP management pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Perform a test failover to validate your BCDR strategy and ensure that your applications are functioning correctly in the target region without impacting your production environment. Test your Disaster Recovery plan periodically without any data loss or downtime, using test failovers. potentialBenefits: Ensures BCDR plan accuracy and VM performance pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -45,7 +43,6 @@ Classic alerts for Recovery Services vaults in Azure Backup will be retired on 31 March 2026. potentialBenefits: Enhanced, scalable, and consistent alerting. pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ Cross Region Restore enables the restoration of Azure VMs in a secondary, Azure paired region, facilitating drills for audit or compliance and allowing recovery of VMs or disks in the event of a primary region disaster. It is an opt-in feature available exclusively for GRS vaults. potentialBenefits: Enhances disaster recovery capabilities pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -89,7 +85,6 @@ With soft delete, if backup data is deleted, the backup data is retained for 14 additional days, allowing the recovery of that backup item with no data loss with no cost to you. Soft delete is enabled by default. Disabling this feature isn't recommended. potentialBenefits: Enhances disaster recovery capabilities pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Resources/resourceGroups/recommendations.yaml b/azure-resources/Resources/resourceGroups/recommendations.yaml index e139943c4..89619a110 100644 --- a/azure-resources/Resources/resourceGroups/recommendations.yaml +++ b/azure-resources/Resources/resourceGroups/recommendations.yaml @@ -9,7 +9,6 @@ Ensure resource locations align with their resource group to manage resources during regional outages. ARM stores resource data, which if in an unavailable region, could halt updates, rendering resources read-only. potentialBenefits: Improves outage management pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/ServiceBus/namespaces/recommendations.yaml b/azure-resources/ServiceBus/namespaces/recommendations.yaml index 13e886adf..c2f6485c1 100644 --- a/azure-resources/ServiceBus/namespaces/recommendations.yaml +++ b/azure-resources/ServiceBus/namespaces/recommendations.yaml @@ -9,7 +9,6 @@ Availability zones are now enabled by default on new namespaces where possible. Existing namespaces are being migrated to availability zones where possible. The property zoneRedundant might still show as false, even when availability zones has been enabled. potentialBenefits: Enhances fault tolerance and uptime pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -31,7 +30,6 @@ Use Service Bus with auto-scale for high availability. The Premium SKU supports auto-scale, ensuring that the resources are automatically scaled based on the load. potentialBenefits: Ensures high availability and performance pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -49,7 +47,6 @@ As of 31 October 2024, TLS 1.0 and TLS 1.1 will no longer be supported on Azure including Service Bus to enhance security and provide best-in-class encryption for your data. Change the minimum TLS version for your Service Bus namespace to TLS v1.2 or higher. potentialBenefits: Avoids service disruption pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/SignalRService/signalR/recommendations.yaml b/azure-resources/SignalRService/signalR/recommendations.yaml index 81de2bf2b..3502a073c 100644 --- a/azure-resources/SignalRService/signalR/recommendations.yaml +++ b/azure-resources/SignalRService/signalR/recommendations.yaml @@ -9,7 +9,6 @@ Use SignalR with zone redundancy for production to improve uptime. This feature, available in the Premium tier, is activated upon creating or upgrading to Premium. Standard can upgrade to Premium without downtime. potentialBenefits: Enhances reliability and uptime pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Sql/managedInstances/recommendations.yaml b/azure-resources/Sql/managedInstances/recommendations.yaml index be93d21d2..f89a2fd41 100644 --- a/azure-resources/Sql/managedInstances/recommendations.yaml +++ b/azure-resources/Sql/managedInstances/recommendations.yaml @@ -9,7 +9,6 @@ Azure SQL Managed Instance offers built-in availability by deploying multiple replicas in the same zone. For higher availability, use a zone-redundant configuration that spreads replicas across three Azure availability zones, each with independent power, cooling, and networking. potentialBenefits: Enhanced availability and reliability pgVerified: false - publishedToLearn: false automationAvailable: True tags: null learnMoreLink: @@ -27,7 +26,6 @@ Configuring zone redundancy option for backups copies your backup file synchronously across three Azure availability zones in the primary region. If Geo is selected, then it copies your data asynchronously three times to a single physical location in the paired secondary region. potentialBenefits: Enhanced availability and reliability pgVerified: false - publishedToLearn: false automationAvailable: True tags: null learnMoreLink: @@ -45,7 +43,6 @@ Redirect mode enables direct connectivity to the instance bypassing the local gateway component and resulting in improved latency and throughput. Redirect mode applies to the VNet-local endpoint only, while the public and private endpoint will always operate in Proxy connection mode. potentialBenefits: Improved latency and throughput pgVerified: false - publishedToLearn: false automationAvailable: True tags: null learnMoreLink: @@ -63,7 +60,6 @@ During an outage on the managed instance, use the failover group to switch all databases to a secondary region, either manually or automatically. Route connections to the failover group’s listener instead of the primary instance to avoid changing the connection string after geo-failover. potentialBenefits: Ensure seamless service with cross-region failover pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -81,7 +77,6 @@ Monitoring and alerting are an important part of database operations. When working with Azure SQL Managed Instance, make use of Azure Monitor and Database watcher to ensure that you capture relevant database metrics. potentialBenefits: Quick incident detection and response pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -99,7 +94,6 @@ It is highly recommended to use Azure Key Vault (AKV) to store encryption keys related to Always Encrypted configurations, however it is not required. If you are not using AKV, then ensure that your keys are properly backed up and stored in a secure manner. potentialBenefits: Enhanced security and data recovery pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Sql/servers/kql/74c2491d-048b-0041-a140-935960220e20.kql b/azure-resources/Sql/servers/kql/74c2491d-048b-0041-a140-935960220e20.kql index 55d36478d..382a6c182 100644 --- a/azure-resources/Sql/servers/kql/74c2491d-048b-0041-a140-935960220e20.kql +++ b/azure-resources/Sql/servers/kql/74c2491d-048b-0041-a140-935960220e20.kql @@ -1,12 +1,12 @@ // Azure Resource Graph Query // Provides a list of SQL databases that are not part of Geo Replication. resources -| where type == "microsoft.sql/servers/databases" +| where type == "microsoft.sql/servers/databases" and name != "master" | summarize secondaryTypeCount = countif(isnotempty(properties.secondaryType)) by name | where secondaryTypeCount == 0 | join kind=inner ( - Resources - | where type == "microsoft.sql/servers/databases" + resources + | where type == "microsoft.sql/servers/databases" and name != "master" ) on name | extend param1 = "Not part of Geo Replication" | project recommendationId = "74c2491d-048b-0041-a140-935960220e20", name, id, tags, param1 diff --git a/azure-resources/Sql/servers/kql/943c168a-2ec2-a94c-8015-85732a1b4859.kql b/azure-resources/Sql/servers/kql/943c168a-2ec2-a94c-8015-85732a1b4859.kql index 0b5c51e89..638da738e 100644 --- a/azure-resources/Sql/servers/kql/943c168a-2ec2-a94c-8015-85732a1b4859.kql +++ b/azure-resources/Sql/servers/kql/943c168a-2ec2-a94c-8015-85732a1b4859.kql @@ -1,6 +1,6 @@ // Azure Resource Graph Query // Provides a list of SQL databases that are not configured to use a failover-group. resources -| where type =~'microsoft.sql/servers/databases' +| where type =~'microsoft.sql/servers/databases' and name !~ "master" | where isnull(properties['failoverGroupId']) | project recommendationId = "943c168a-2ec2-a94c-8015-85732a1b4859", name, id, tags, param1= strcat("databaseId=", properties['databaseId']) diff --git a/azure-resources/Sql/servers/recommendations.yaml b/azure-resources/Sql/servers/recommendations.yaml index c72da3867..325675bbd 100644 --- a/azure-resources/Sql/servers/recommendations.yaml +++ b/azure-resources/Sql/servers/recommendations.yaml @@ -9,7 +9,6 @@ Active Geo Replication ensures business continuity by utilizing readable secondary database replicas. In case of primary database failure, manually failover to secondary database. Secondaries, up to four, can be in same/different regions, used for read-only access. potentialBenefits: Enhanced disaster recovery and read scalability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Failover Groups facilitate disaster recovery by configuring databases on one logical server to replicate to another region's logical server. This streamlines geo-replicated database management, offering a single endpoint for connection routing to replicated databases if the primary server fails. potentialBenefits: Improves load balancing and disaster recovery pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ By default, Azure SQL Database premium tier provisions multiple copies within the same region. For geo redundancy, databases can be set as Zone Redundant, distributing copies across Azure Availability Zones to maintain availability during regional outages. potentialBenefits: Enhanced reliability, no extra cost pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ During transient failures, the application should handle connection retries effectively with Azure SQL Database. No Database layer configuration is needed; instead, the application must be set up for graceful retrying. potentialBenefits: Enhanced connectivity stability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -83,7 +79,6 @@ Monitoring and alerting are an important part of database operations. When working with Azure SQL Database, make use of Azure Monitor and SQL Insights to ensure that you capture relevant database metrics. potentialBenefits: Quick incident detection and response pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -105,7 +100,6 @@ It is highly recommended to use Azure Key Vault (AKV) to store encryption keys related to Always Encrypted configurations, however it is not required. If you are not using AKV, then ensure that your keys are properly backed up and stored in a secure manner. potentialBenefits: Enhanced security and data recovery pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -125,7 +119,6 @@ When using Failover Groups, it is recommended to connect to the Failover Group endpoint instead of individual database endpoints. This allows for automatic redirection to the secondary database in case of a failover, ensuring high availability. potentialBenefits: Enhanced disaster recovery pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Storage/storageAccounts/recommendations.yaml b/azure-resources/Storage/storageAccounts/recommendations.yaml index 2b6ac7440..e5bec6d61 100644 --- a/azure-resources/Storage/storageAccounts/recommendations.yaml +++ b/azure-resources/Storage/storageAccounts/recommendations.yaml @@ -9,7 +9,6 @@ Redundancy ensures storage accounts meet availability and durability targets amidst failures, weighing lower costs against higher availability. Locally redundant storage offers the least durability at the lowest cost. potentialBenefits: High availability and durability for storage pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Classic storage accounts will be fully retired on August 31, 2024. If you have classic storage accounts, start planning your migration now. potentialBenefits: Avoids service retirement issues pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -49,7 +47,6 @@ Use premium performance block blob storage instead of standard performance storage for workloads that require fast storage response times and/or high transaction rates. potentialBenefits: Optimized cost and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -75,7 +72,6 @@ The soft delete option enables data recovery if mistakenly deleted, while the Lock feature prevents the accidental deletion of the storage account itself, ensuring additional security and data integrity measures. potentialBenefits: Prevents accidental data/account loss pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -93,7 +89,6 @@ Consider enabling versioning for Azure Storage Accounts to recover from accidental modifications or deletions and manage blob operation latency. Microsoft advises maintaining fewer than 1000 versions per blob to optimize performance. Lifecycle management can help delete old versions automatically. potentialBenefits: Recover data, manage latency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -111,7 +106,6 @@ Consider enabling point-in-time restore for standard general purpose v2 accounts with flat namespace to protect against accidental deletion or corruption by restoring block blob data to an earlier state. potentialBenefits: Protects data from loss/corruption pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -131,7 +125,6 @@ For critical applications and business processes relying on Azure, monitoring and alerts are crucial. Resource logs are only stored after creating a diagnostic setting to route logs to specified locations, requiring selection of log categories to collect. potentialBenefits: Enhanced alerting and log analysis pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -151,7 +144,6 @@ General-purpose v2 accounts are recommended for most storage scenarios offering the latest features or the lowest per-gigabyte pricing. Legacy accounts like Standard general-purpose v1 and Blob Storage aren't advised by Microsoft but may fit specific scenarios. potentialBenefits: Latest features, lowest cost pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -171,7 +163,6 @@ Leverage Azure Private Link Service for secure access to Azure Storage and services via Private Endpoint in your VNet. Eliminate the need for public IPs, ensuring data privacy. Enjoy granular access control for enhanced security. potentialBenefits: Secure, private access to storage with no public IPs pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/StreamAnalytics/streamingJobs/recommendations.yaml b/azure-resources/StreamAnalytics/streamingJobs/recommendations.yaml index aeb782f41..184683df3 100644 --- a/azure-resources/StreamAnalytics/streamingJobs/recommendations.yaml +++ b/azure-resources/StreamAnalytics/streamingJobs/recommendations.yaml @@ -9,7 +9,6 @@ Stream Analytics cluster (dedicated) offers more reliable performance guarantees. All the jobs running on your cluster belong only to you. You can also have access to important features like private endpoints, Auto-Scaling, Vnet Support, etc. potentialBenefits: Enhanced reliability and security pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Configure Autoscale to allow your job to dynamically change the allocated number of Streaming Units (SU) based on load, metrics, and/or schedule. potentialBenefits: Enhanced reliability and security pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Subscription/subscriptions/recommendations.yaml b/azure-resources/Subscription/subscriptions/recommendations.yaml index 2da7d31c1..1905df8be 100644 --- a/azure-resources/Subscription/subscriptions/recommendations.yaml +++ b/azure-resources/Subscription/subscriptions/recommendations.yaml @@ -9,7 +9,6 @@ A Citrix Managed Azure subscription supports VMs with VDA for app/desktop delivery, excluding other machines like Cloud Connectors. When close to the limit, signaled by a dashboard notification, and with sufficient licenses, request another subscription. Can't exceed the given limits for catalogs. potentialBenefits: Avoids hitting limit, ensures reliability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ The root management group in Azure is designed for organizational hierarchy, allowing for all management groups and subscriptions to fold into it. potentialBenefits: Enhanced security, compliance, and management pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/VirtualMachineImages/imageTemplates/recommendations.yaml b/azure-resources/VirtualMachineImages/imageTemplates/recommendations.yaml index 619071f11..9db57a1cc 100644 --- a/azure-resources/VirtualMachineImages/imageTemplates/recommendations.yaml +++ b/azure-resources/VirtualMachineImages/imageTemplates/recommendations.yaml @@ -9,7 +9,6 @@ When building Image Templates, use sources for gen 2 VMs. Gen 2 offers more memory, supports >2TB disks, uses UEFI for faster boot/installation, has Intel SGX, and virtualized persistent memory (vPMEM), unlike gen 1's BIOS-based architecture. potentialBenefits: More memory, supports >2TB disks, faster boot pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ The Azure Image Builder service lacks availability zones support. Replicating Image Templates to a secondary region will enable the build of new images in secondary region. potentialBenefits: Enhances disaster recovery capability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: diff --git a/azure-resources/Web/serverFarms/recommendations.yaml b/azure-resources/Web/serverFarms/recommendations.yaml index 5bbb32913..0bb5539d9 100644 --- a/azure-resources/Web/serverFarms/recommendations.yaml +++ b/azure-resources/Web/serverFarms/recommendations.yaml @@ -9,7 +9,6 @@ Azure's feature of deploying App Service plans across availability zones enhances resiliency and reliability by ensuring operation during datacenter failures, providing redundancy without needing different regions, thus minimizing downtime and maintaining uninterrupted services. potentialBenefits: Enhances app resiliency and reliability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -29,7 +28,6 @@ Choose Standard/Premium Azure App Service Plan for robust apps with advanced scaling, high availability, better performance, and multiple slots, ensuring resilience and continuous operation. potentialBenefits: Enhanced scaling and reliability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -47,7 +45,6 @@ Avoid frequent scaling up/down of Azure App Service instances to prevent service disruptions. Choose the right tier and size for the workload and scale out for traffic changes, as scaling adjustments can trigger application restarts. potentialBenefits: Minimizes restarts, enhances stability pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -65,7 +62,6 @@ It is strongly recommended to create separate App Service plans for production and test environments to avoid using slots within your production deployment for testing purposes. potentialBenefits: Protects prod performance; avoids test impact pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -83,7 +79,6 @@ Enabling Autoscale/Automatic Scaling for your Azure App Service ensures sufficient resources for incoming requests. Autoscaling is rule-based, whereas Automatic Scaling, a newer feature, automatically adjusts resources based on HTTP traffic. potentialBenefits: Optimizes resources for traffic pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-resources/Web/sites/recommendations.yaml b/azure-resources/Web/sites/recommendations.yaml index 9261bbd34..a9992b8dd 100644 --- a/azure-resources/Web/sites/recommendations.yaml +++ b/azure-resources/Web/sites/recommendations.yaml @@ -9,7 +9,6 @@ Enabling diagnostics logging for your Azure App Service is crucial for monitoring and diagnostics, including both application logging and web server logging. potentialBenefits: Monitoring and Alerting pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Use Application Insights to monitor app performance and load behavior, offering real-time insights, issue diagnosis, and root-cause analysis. It supports ASP.NET, ASP.NET Core, Java, and Node.js on Azure App Service, now with built-in monitoring. potentialBenefits: Real-time insights and issue diagnosis pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -47,7 +45,6 @@ If your solution includes both a web front end and a web API, decomposing them into separate App Service apps facilitates solution decomposition by workload, allowing for independent scaling. Initially, you can deploy both in the same plan and separate them for independent scaling when necessary. potentialBenefits: Independent scaling, easier management pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -65,7 +62,6 @@ Creating a separate storage account for logs and not using the same one for application data prevents logging activities from reducing application performance by ensuring that the resources dedicated to handling application data are not burdened by logging processes. potentialBenefits: Improves app performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -83,7 +79,6 @@ Create a deployment slot for staging to deploy updates, verify them, and ensure all instances are warmed up before production swap, reducing bad update chances. An LKG slot allows easy rollback to a previous good deployment if issues arise later, enhancing reliability. potentialBenefits: Safer updates and easy rollback pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -101,7 +96,6 @@ Use app settings for configuration and define them in Resource Manager templates or via PowerShell to facilitate part of an automated deployment/update process for improved reliability. potentialBenefits: Enhanced reliability via automation pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -119,7 +113,6 @@ Use Health Check for production workloads. Health check increases your application's availability by rerouting requests away from unhealthy instances, and replacing instances if they remain unhealthy. The Health check path should check critical components of your application. potentialBenefits: Enhanced reliability via automation pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -137,7 +130,6 @@ Use network access restrictions to define a priority-ordered allow/deny list that controls network access to your app. Web application firewalls, such as the one available in Application Gateway, are recommended for protection of public-facing web applications. potentialBenefits: Enhanced security pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -155,7 +147,6 @@ App Service should be configured with a minimum of two instances for production workloads. If apps have a longer warmup time a minimum of three instances should be used. potentialBenefits: Improves app performace pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -173,7 +164,6 @@ Auto Heal allows you to mitigate your apps when it runs into unexpected situations like HTTP server errors, resource exhaustion, etc. You can configure different triggers based on your need and choose to recycle the app to recover it from a bad state. potentialBenefits: Improved app availability pgVerified: false - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -191,7 +181,6 @@ Add a warmup trigger to pre-load custom dependencies during the pre-warming process so that your functions are ready to start processing requests immediately. potentialBenefits: Improved app availability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -209,7 +198,6 @@ A host ID must be between 1 and 32 characters, contain only lowercase letters, numbers, and dashes, not start or end with a dash, and not contain consecutive dashes. The host ID value should be unique for all apps/slots you're running. potentialBenefits: Easier management pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -227,7 +215,6 @@ Beginning on December 13, 2022, function apps running on versions 2.x and 3.x of the Azure Functions runtime have reached the end of life (EOL) of extended support. We highly recommend you migrating your function apps to version 4.x of the Functions runtime. potentialBenefits: Better governance pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -245,7 +232,6 @@ The FUNCTIONS_WORKER_RUNTIME setting in the Function App configuration should be set to the appropriate value based on the language you are using. This setting is used to determine the language worker that will be used to execute your functions. potentialBenefits: Better governance pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-specialized-workloads/avd/recommendations.yaml b/azure-specialized-workloads/avd/recommendations.yaml index ef2ef7c49..9c1fd86a8 100644 --- a/azure-specialized-workloads/avd/recommendations.yaml +++ b/azure-specialized-workloads/avd/recommendations.yaml @@ -9,7 +9,6 @@ Set up Service Health alerts so that you stay aware of service issues, planned maintenance, or other changes that might affect your Azure Virtual Desktop resources. Use Resource Health to monitor your VMs and storage solutions. potentialBenefits: Enhanced AVD error tracking and resolution pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -27,7 +26,6 @@ Configure AVD insights workbook template to monitor and troubleshoot AVD workloads across metrics, logs, events, and more. Both Production and DR workloads should be enabled with AVD Insights. potentialBenefits: Enhanced AVD monitoring and troubleshooting pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Having separate Log Analytics ensures that your DR environment is fully operational for visibility of the metrics, performance, and other auditing tools your workload teams will rely on in the event of an incident. potentialBenefits: Improved DR visibility and operation pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ Follow AVD Landing Zone best practices using multiple resource groups based on resource type and associated shared resources for AVD workloads. potentialBenefits: Enhanced organization and scalability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -82,7 +78,6 @@ To handle a large number of users, consider scaling horizontally by creating multiple host pools. potentialBenefits: Avoids limits, ensures smooth scaling pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -102,7 +97,6 @@ Active Directory Domain Services (AD DS) integrated DNS/other should target Secondary/Tertiary customer DNS across multi-region zones. If using custom DNS, ensure there are redundant DNS servers to avoid a single point of failure. potentialBenefits: Improves uptime & resilience pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -120,7 +114,6 @@ It is recommended to adopt a multi-region deployment (active-active or active-passive) for AVD. Each region should contain at least identity, name resolution, AVD management resources, and session hosts in case of a primary region outage. potentialBenefits: Enhanced resilience and uptime pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -140,7 +133,6 @@ To maximize capacity and performance scaling it is recommended to creat only one file share per Azure files storage account, with this approach the single file share will be able to grow to the maximum capacities of the storage account. potentialBenefits: Enhanced scaling and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -158,7 +150,6 @@ To maximize capacity and performance scaling of the file share service and avoid user's profile contention, it is recommended to create one file share target and FSLogix setup per host pool. potentialBenefits: Enhanced performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -176,7 +167,6 @@ It is recommended to enable backup on the FSLogix Storage Account. Ensuring the user profiles are resilient will allow user data and experience to be consistent through outages. potentialBenefits: Ensures data resilience and consistency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -196,7 +186,6 @@ RDP Shortpath establishes a direct UDP-based connection between a client and the session host. By default, Remote Desktop Protocol (RDP) tries to establish connection using UDP and uses a TCP-based reverse connect transport as a fallback connection mechanism. UDP-based transport offers better connection reliability and more consistent latency. potentialBenefits: Better reliability and consistent latency pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -214,7 +203,6 @@ Ensure that AVD session hosts can effectively communicate with the AVD control plane and that UDP ports are open if UDP is utilized. Validate the connectivity of VMs to the AVD Control Plane and confirm the accessibility of UDP TURN ports. Whitelist global URLs and ensure that UDP/TURN ports are open and accessible to facilitate smooth user connections. potentialBenefits: Enhanced performance & user experience pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -235,7 +223,6 @@ Set up secondary server in staging mode for Entra Connect for syncing to Entra in case of primary server outage. potentialBenefits: Improved failover reliability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -253,7 +240,6 @@ For high availability connections back to on-premises data centers should consider backup paths across the regions that have been utilized. Ensure redundancy in routing by having a secondary route table in the secondary region. potentialBenefits: Enhanced availability & routing pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -272,7 +258,6 @@ It's important your organization plans for IP addressing in Azure. Planning ensures the IP address space doesn't overlap across on-premises locations and Azure regions. Overlapping IP address spaces across on-premises and Azure regions create major contention challenges. potentialBenefits: Enhances security and prevents IP conflicts pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -290,7 +275,6 @@ Ensure that Route Tables have static routes to allow session host traffic that targets AVD control plane to go outbound directly out of the subnet to the internet (next hop), this will avoid any delays of inspecting or adding additional hops in the communication of trusted traffic. potentialBenefits: Enhanced performance and Disaster Recovery pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -310,7 +294,6 @@ Establish a systematic process for handling image updates within your Azure Virtual Desktop environment. Instead of directly updating individual session hosts, create a new version of the updated image. This process involves creating and configuring a golden image with the necessary updates and configurations. potentialBenefits: Ensures consistency; minimizes drift pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -328,7 +311,6 @@ Ensure all session hosts have the standard FSLogix configuration deployed. Regularly validate settings for consistency and alignment with best practices. potentialBenefits: Optimized session reliability and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -346,7 +328,6 @@ Verify user permissions are correctly set on SMB shares so that users have appropriate access to only their own profile and not other user profiles, while administrators have full access at the root volume. Also ensure secondary storage path permissions are set in case of a DR event. potentialBenefits: Enhanced security & disaster recovery pgVerified: true - publishedToLearn: false automationAvailable: false tags: learnMoreLink: @@ -364,7 +345,6 @@ Configure diagnostic settings on FSLogix storage resources and regularly its metrics and FSLogix logs for errors. Events can be reviewed by looking locally inside the Session Host, but it is recommended to configure AVD insights workbook to consolidate this information to a Log Analytics workspace. potentialBenefits: Enhanced AVD error tracking and resolution pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -384,7 +364,6 @@ Ensure a process is in place to regularly check for FSLogix agent upgrades and maintain FSLogix up to date. We recommend customers upgrade to the latest version of FSLogix as quickly as their deployment process can allow. FSLogix will provide hotfix releases which address current and potential bugs that impact customer deployments. Additionally, it is the first requirement when opening any support case. potentialBenefits: Enhanced reliability & support pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -403,7 +382,6 @@ Verify the number of users connecting to each file share to make sure the SMB path can handle the number of file connections. Currently, Azure Files supports up to 10k handles per root directory. potentialBenefits: Enhanced stability & user limit checks pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -422,7 +400,6 @@ Your file share should be in the same Azure region as your session hosts. potentialBenefits: Enhances performance and scalability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -440,7 +417,6 @@ To ensure continuous availability and disaster recovery readiness, it is recommended to provision a secondary Key Vault in a secondary region. In the event of a primary region failure, this secondary Key Vault will ensure that critical secrets are accessible for use in deployments in the secondary region. potentialBenefits: Ensures DR readiness and access pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -459,7 +435,6 @@ This recommendation doesn't apply when using Microsoft Entra ID or Entra Domain Services joined session hosts. potentialBenefits: Enhanced identity resilience pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -477,7 +452,6 @@ When using custom DNS servers, deploy DNS servers on Azure virtual machines across availability zones in the same region as the session hosts. This improves the environment's reliability by removing a dependency on an on-premises service and improves performance by creating a shorter path for name resolution. potentialBenefits: Enhanced reliability and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-specialized-workloads/avs/recommendations.yaml b/azure-specialized-workloads/avs/recommendations.yaml index aa0beeb38..9667dc5d3 100644 --- a/azure-specialized-workloads/avs/recommendations.yaml +++ b/azure-specialized-workloads/avs/recommendations.yaml @@ -9,7 +9,6 @@ Use the Interconnect feature for direct communication between private clouds in different availability zones, enabling connectivity between the private clouds management and workload networks. potentialBenefits: Enhanced private cloud connectivity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Ensure two external identity sources are configured for NSX and vCenter Server. The VMware vCenter Server and NSX Manager use these for authentication with external identities. potentialBenefits: Continuous login access during maintenances pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -47,7 +45,6 @@ Enable Network Extension High Availability for appliance failure tolerance in HCX service. It pairs selected appliances for Active Standby configuration, ensuring high availability and quick recovery, keeping configurations in-service despite failures. potentialBenefits: Improves HCX service continuity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -67,7 +64,6 @@ Do not extend the network used by the HCX Management devices to ensure the network's security and stability. potentialBenefits: Enhanced network safety and performance pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -85,7 +81,6 @@ The Azure VMware Solution's service SLA is influenced by vSAN storage policies, which change based on cluster size. For clusters over 6 hosts, an FTT-2 policy (RAID-1 or RAID-6) is advised. FTT refers to the Fault Tolerance feature. potentialBenefits: Enhanced cluster reliability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -105,7 +100,6 @@ Microsoft suggests using two or more ExpressRoute circuits at distinct peering locations for critical workloads. Connect these circuits and your Azure VMware Solutions private clouds using Global Reach. potentialBenefits: Enhanced circuit resilience for Azure VMware pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -125,7 +119,6 @@ Azure VMware Solution vSAN stretched clusters cover 2 Availability Zones plus a third for witness. Use ExpressRoute for added resilience by deploying two circuits in different locations. With Global Reach, create a mesh topology by connecting on-premises circuits to Azure's managed circuits. potentialBenefits: Enhanced resilience and connectivity pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -143,7 +136,6 @@ Two Azure VMware Solution private clouds can be deployed in different regions for business continuity, implementing a mesh network topology based on ExpressRoute Gateway Connections and Global Reach Connections. potentialBenefits: Enhanced disaster recovery pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-specialized-workloads/hpc/recommendations.yaml b/azure-specialized-workloads/hpc/recommendations.yaml index 26709e881..61ee9bc04 100644 --- a/azure-specialized-workloads/hpc/recommendations.yaml +++ b/azure-specialized-workloads/hpc/recommendations.yaml @@ -9,7 +9,6 @@ Currently in all HPC Pack ARM templates we create the cluster share on one of the head node which is not highly available. potentialBenefits: Enhances job metadata availability pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ By deploying Azure "burst" nodes (both Windows and Linux) in your HPC Pack cluster or creating your HPC Pack cluster in Azure, you can automatically grow or shrink the cluster's resources such as nodes or cores according to the workload on the cluster. potentialBenefits: Efficient, uninterrupted execution pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Establish a cluster with a minimum of two head nodes. In the event of a head node failure, the active HPC Service will be automatically transferred from the affected head node to another functioning one. potentialBenefits: Enhanced reliability for HPC pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ When HPC failed to connect to the Domain controller, admin and user will not be able to connect to the HPC Service thus not able to manage and submit jobs to the cluster. potentialBenefits: Enhanced reliability and job management pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-specialized-workloads/sap/recommendations.yaml b/azure-specialized-workloads/sap/recommendations.yaml index 76d21cc06..95ac82c82 100644 --- a/azure-specialized-workloads/sap/recommendations.yaml +++ b/azure-specialized-workloads/sap/recommendations.yaml @@ -9,7 +9,6 @@ Azure Availability Zones are physically separate locations within each Azure region that are tolerant to local failures. Use availability zones to protect your applications and data against unlikely data center failures. Ensure each single point of failure of each SAP production system is protected with high availability using multiple availability zones. If you cannot deploy across different zones in a region, then refer to Microsoft guidance for High availability deployment options for SAP workload. potentialBenefits: High availability for SAP systems pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -35,7 +34,6 @@ Use Virtual Machines Scale Set (VMSS) with flexible orchestration to distribute the virtual machines across specified zones and within each zone to also distribute VMs across different fault domains within the zone on a best effort basis. Configure VMSS Flex following Microsoft recommendation for SAP workload using the right mode and correct settings. If you aren't currently using VMSS Flex for SAP application servers and also not using Availability Sets with Fault domain and Update domain distribution, then you should consider moving to VMSS Flex architecture to improve the resiliency posture of your SAP deployment. The following blog post in links below outlines the details on the process of migrating existing SAP workloads that are deployed in an availability set or availability zone to a flexible scale set with FD=1 deployment option. potentialBenefits: Enhanced resiliency for SAP on Azure pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -59,7 +57,6 @@ For single-instance VMs, both OS and data disks must be either Premium SSD or Ultra Disk to achieve the single-instance SLA of 99.9% availability. potentialBenefits: Higher SLA of 99.9% with SSDs pgVerified: true - publishedToLearn: false automationAvailable: true tags: null learnMoreLink: @@ -85,7 +82,6 @@ High availability for databases should be implemented using database native replication technologies and the data should be replicated synchronously that is in SYNC mode from primary database to a stand-by node. potentialBenefits: Ensures high availability for SAP data pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -106,7 +102,6 @@ In case of Azure File Shares, we recommend that you use ZRS (Zone-redundant storage) and for Azure NetApp Files use Zonal replication for your volumes. potentialBenefits: Enhanced data availability for SAP pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -125,7 +120,6 @@ The fail back can be either automatic or manual. potentialBenefits: Ensures SAP Azure's failover reliability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -146,7 +140,6 @@ This approach allows for controlled resource movement within the cluster, facilitating maintenance while preserving the integrity and efficiency of the cluster's configuration. potentialBenefits: Enhanced maintenance and failover handling pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -166,7 +159,6 @@ Warm standby involves keeping VMs in the DR region running. On-demand Capacity Reservation, on the other hand, reserves compute capacity without having to run the VMs, allowing you to start them when needed. When DR VMs are not needed, the reserved capacity may safely be used to run other workloads without the risk of losing the capacity to other customers. This strategy guarantees resource availability for your critical workloads in the event of a disaster, balancing cost and readiness. potentialBenefits: Guarantees DR region availability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -184,7 +176,6 @@ Replicate production databases (ASYNC) to the DR location using the database vendor's replication technology. potentialBenefits: Enhanced DR resilience pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -202,7 +193,6 @@ SAP components such as (A)SCS, application servers, WebDispatchers, etc are backed up to DR location using an appropriate backup tool or ASR. potentialBenefits: Ensures SAP data safety and recovery pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -222,7 +212,6 @@ Implementing robust monitoring and alerting for DR in SAP on Azure ensures coverage across its complex, multi-layer architecture. This strategy is crucial for databases, services, applications, and shared systems. potentialBenefits: Enhances SAP DR oversight pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -240,7 +229,6 @@ Automate the build of disaster recovery (DR) infrastructure (or pre-deploy DR resources) and streamline SAP service recovery as much as possible. potentialBenefits: Faster SAP recovery, reduced downtime pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -260,7 +248,6 @@ Test a wide range of failure scenarios, including regional outages. Testing should confirm that your DR strategy is robust, meets your RPO and RTO targets, and provides seamless failover across all layers of the SAP architecture. This will ensure a comprehensive and resilient DR strategy capable of withstanding regional failures and ensuring business continuity. potentialBenefits: Ensures robust DR, meets RPO/RTO pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -278,7 +265,6 @@ For an SAP solution hosted on Azure it is imperative to implement a robust monitoring and alerting solution that comprehensively covers DR of each layer of the SAP architecture. Given the complexity of SAP systems, which span multiple layers using diverse technologies and Azure resources, each with potentially distinct DR replication mechanisms, an appropriate monitoring strategy is crucial. The different layers include database, central services, application, and shared file systems. potentialBenefits: Improved DR oversight and rapid issue response pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -302,7 +288,6 @@ In addition, it is also important that you define a procedure on how to react to scheduled events. potentialBenefits: Proactive maintenance awareness pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -322,7 +307,6 @@ For the ASCS-Pacemaker (Central Server Instance), ensure that the Pacemaker cluster configuration parameters are correctly set up for SAP ASCS high availability. potentialBenefits: Enhances SAP ASCS uptime pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -344,7 +328,6 @@ For the ASCS-LB (Central Server Instance), ensure that the load balancer is configured correctly for SAP ASCS high availability. potentialBenefits: Enhanced HA for SAP ASCS pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -366,7 +349,6 @@ For the DBHANA-Pacemaker (Database Instance), ensure that the Pacemaker cluster configuration parameters are correctly set up for SAP HANA database high availability. potentialBenefits: Enhances SAP HANA DB uptime pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -388,7 +370,6 @@ For the DBHANA-LB (Database Instance), make sure the load balancer is configured correctly for SAP HANA database high availability. potentialBenefits: Enhanced DB availability pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -410,7 +391,6 @@ High availability of SAP while used with Azure NetApp Files relies on setting proper timeout values to prevent disruption to your application. Review the documentation to ensure your configuration meets the timeout values as noted in the documentation. potentialBenefits: Improve resiliency and performance of SAP on Azure pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -430,7 +410,6 @@ SAP on Azure QualityCheck tool can help you identify any deviations from Microsoft recommendations quickly and at scale. potentialBenefits: Improve reliability, performance and optimize costs pgVerified: true - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/azure-waf/reliability/recommendations.yaml b/azure-waf/reliability/recommendations.yaml index 247feab33..43da9570e 100644 --- a/azure-waf/reliability/recommendations.yaml +++ b/azure-waf/reliability/recommendations.yaml @@ -9,7 +9,6 @@ Design your workload to align with business objectives and avoid unnecessary complexity or overhead. Use a practical and balanced approach to make design decisions that deliver the desired results. Contain your design to the necessities to reduce inefficiencies and potential problems. potentialBenefits: Meet business requirements pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -27,7 +26,6 @@ Identify and rate user and system flows. Use a criticality scale based on your business requirements to prioritize the flows. potentialBenefits: Align architecture with reliability goals pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -45,7 +43,6 @@ Use failure mode analysis (FMA) to identify and prioritize potential failures in your solution components. Perform FMA to help you assess the risk and effect of each failure mode. Determine how the workload responds and recovers. potentialBenefits: Reduce risk of unpredicted behavior pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -63,7 +60,6 @@ Define reliability and recovery targets for the components, the flows, and the overall solution. Use the defined targets to build the health model. The health model defines what healthy, degraded, and unhealthy states look like. potentialBenefits: Communicate reliability expectations with stakeholders pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -81,7 +77,6 @@ Add redundancy at different levels, especially for critical flows. Apply redundancy to the compute, data, network, and other infrastructure tiers in accordance with the identified reliability targets. potentialBenefits: Optimize for resiliency pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -99,7 +94,6 @@ High availability is a foundational tenet of designing for reliability. A highly available architecture can help you avoid downtime as much as possible and recover efficiently if downtime does occur. potentialBenefits: Minimize downtime from regional outages pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -117,7 +111,6 @@ High availability is a foundational tenet of designing for reliability. A highly available architecture can help you avoid downtime as much as possible and recover efficiently if downtime does occur. potentialBenefits: Minimize downtime from zonal outages pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -135,7 +128,6 @@ Partitioning data improves scalability, reduces contention, and optimizes performance. Implement data partitioning to divide data by usage pattern. potentialBenefits: Improve data estate reliability pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -153,7 +145,6 @@ Implement a timely and reliable scaling strategy at the application, data, and infrastructure levels. potentialBenefits: Dynamically handle increased load pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -171,7 +162,6 @@ Background jobs help minimize the load on the application UI, which improves availability and reduces interactive response time. potentialBenefits: Minimize application load pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -189,7 +179,6 @@ Strengthen the resiliency and recoverability of your workload by implementing self-preservation and self-healing measures. Self-healing capabilities help you avoid downtime by building in failure detection and automatic corrective actions to respond to different failure types. potentialBenefits: Reduce the likelihood of outages pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -207,7 +196,6 @@ Build capabilities into the solution by using infrastructure-based reliability patterns and software-based design patterns to handle component failures and transient errors. potentialBenefits: Reduce the likelihood of outages pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -225,7 +213,6 @@ Test resiliency and availability scenarios by applying the principles of chaos engineering in your test and production environments. Use testing to ensure that your graceful degradation implementation and scaling strategies are effective by performing active malfunction and simulated load testing. potentialBenefits: Validate and optimize workload reliability pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -243,7 +230,6 @@ Implement structured, tested, and documented business continuity and disaster recovery (BCDR) plans that align with the recovery targets. Plans must cover all components and the system as a whole. potentialBenefits: Reliable disaster recovery pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: @@ -261,7 +247,6 @@ Measure and publish the solution's health indicators. Continuously capture uptime and other reliability data from across the workload and also from individual components and key flows. potentialBenefits: Observability into workload health pgVerified: true - publishedToLearn: true automationAvailable: false tags: null learnMoreLink: diff --git a/docs/archetypes/azure-resource-type/recommendations.yaml b/docs/archetypes/azure-resource-type/recommendations.yaml index 3bb72d913..7893084fd 100644 --- a/docs/archetypes/azure-resource-type/recommendations.yaml +++ b/docs/archetypes/azure-resource-type/recommendations.yaml @@ -10,7 +10,6 @@ (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -28,7 +27,6 @@ (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: diff --git a/docs/archetypes/azure-specialized-workload/recommendations.yaml b/docs/archetypes/azure-specialized-workload/recommendations.yaml index 94f201898..0c4f6439f 100644 --- a/docs/archetypes/azure-specialized-workload/recommendations.yaml +++ b/docs/archetypes/azure-specialized-workload/recommendations.yaml @@ -9,8 +9,7 @@ Your Long Description Here (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) - pgVerified: false - publishedToLearn: false/arg + pgVerified: false/arg automationAvailable: false tags: null learnMoreLink: @@ -28,8 +27,7 @@ Your Long Description Here (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) - pgVerified: false - publishedToLearn: false/arg + pgVerified: false/arg automationAvailable: false tags: null learnMoreLink: diff --git a/docs/archetypes/azure-waf/recommendations.yaml b/docs/archetypes/azure-waf/recommendations.yaml index 94f201898..0c4f6439f 100644 --- a/docs/archetypes/azure-waf/recommendations.yaml +++ b/docs/archetypes/azure-waf/recommendations.yaml @@ -9,8 +9,7 @@ Your Long Description Here (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) - pgVerified: false - publishedToLearn: false/arg + pgVerified: false/arg automationAvailable: false tags: null learnMoreLink: @@ -28,8 +27,7 @@ Your Long Description Here (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) - pgVerified: false - publishedToLearn: false/arg + pgVerified: false/arg automationAvailable: false tags: null learnMoreLink: diff --git a/docs/content/contributing/create-content/create-recommendations/_index.md b/docs/content/contributing/create-content/create-recommendations/_index.md index 295931234..19aa3bb2c 100644 --- a/docs/content/contributing/create-content/create-recommendations/_index.md +++ b/docs/content/contributing/create-content/create-recommendations/_index.md @@ -29,7 +29,6 @@ To contribute a new recommendation for an Azure resource, follow these steps: (less than 300 characters) potentialBenefits: Potential Benefits of Implementing the Recommendation (less than 60 characters) pgVerified: false - publishedToLearn: false automationAvailable: false tags: null learnMoreLink: @@ -77,7 +76,6 @@ The YAML structure for adding new recommendations consists of several key-value | longDescription | To enable Cross-region disaster recovery and business continuity, ensure that the appropriate quotas are set for all user subscription Batch accounts. | String | The length should be less than 300 characters | Detailed description of the recommendation and its implications | | potentialBenefits | Enhanced data redundancy and boosts availability | String | The length should be less than 60 characters | The potential benefits of implementing the recommendation | | pgVerified | false | Boolean | true, false | Indicates whether the recommendation is verified by the relevant product group | -| publishedToLearn | false | Boolean | true, false | Indicates whether the recommendation is published to [Microsoft Learn](https://learn.microsoft.com/en-us/azure/well-architected/pillars) | | automationAvailable | false| Boolean | true, false | Indicates whether automation is available for validating the recommendation | | tags | null | String | null | Generalized tags used for incorporating fields to automate | | learnMoreLink | - name: Learn More url: "" | Object | Only 1 link per recommendation | Links related to the recommendation, such as announcements or documentation | diff --git a/docs/layouts/shortcodes/azure-resources-recommendationlist.html b/docs/layouts/shortcodes/azure-resources-recommendationlist.html index c959a7d94..323cce44a 100644 --- a/docs/layouts/shortcodes/azure-resources-recommendationlist.html +++ b/docs/layouts/shortcodes/azure-resources-recommendationlist.html @@ -15,6 +15,7 @@

Summary

Impact Category Automation Available + In Azure Advisor {{ range sort .recommendations "recommendation" "asc" }} @@ -26,6 +27,7 @@

Summary

{{ .recommendationImpact }} {{ .recommendationControl }} {{ if eq .automationAvailable true }}Yes{{ else }}No{{ end }} + {{ if .recommendationTypeId }}Yes{{ else }}No{{ end }} {{ end }} {{ end }} diff --git a/docs/layouts/shortcodes/azure-specialized-workloads-recommendationlist.html b/docs/layouts/shortcodes/azure-specialized-workloads-recommendationlist.html index c73ae6fbb..84849b823 100644 --- a/docs/layouts/shortcodes/azure-specialized-workloads-recommendationlist.html +++ b/docs/layouts/shortcodes/azure-specialized-workloads-recommendationlist.html @@ -1,5 +1,5 @@ @@ -15,8 +15,9 @@

Summary

Impact Category Automation Available + In Azure Advisor - + {{ range sort .recommendations "recommendation" "asc" }} {{ if and (eq .recommendationMetadataState "Active") (or (not $.Site.Params.aprlDevMode) true) }} @@ -26,6 +27,7 @@

Summary

{{ .recommendationImpact }} {{ .recommendationControl }} {{ if eq .automationAvailable true }}Yes{{ else }}No{{ end }} + {{ if .recommendationTypeId }}Yes{{ else }}No{{ end }} {{ end }} {{ end }} diff --git a/tools/data/recommendations.json b/tools/data/recommendations.json index c13ee7213..b2bd99c2e 100644 --- a/tools/data/recommendations.json +++ b/tools/data/recommendations.json @@ -1,10551 +1,8226 @@ [ { "publishedToAdvisor": null, - "aprlGuid": "bb6deb9d-24fa-4ee8-bc23-ac3ebc7fdf8e", + "aprlGuid": "ca87914f-aac4-4783-ab67-82a6f936f194", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/entra/identity/domain-services/tutorial-create-replica-set", - "name": "Create and use replica sets for resiliency or geolocation in Microsoft Entra Domain Services" + "url": "https://learn.microsoft.com/azure/postgresql/flexible-server/concepts-high-availability", + "name": "Overview of high availability with Azure Database for PostgreSQL" } ], "recommendationControl": "High Availability", - "longDescription": "You need to use a minimum of Enterprise SKU for your managed domain to support replica sets.\n", - "pgVerified": false, - "description": "Use at least the Enterprise SKU", - "potentialBenefits": "The Enterprise SKU enables creation of replica sets.", - "publishedToLearn": false, + "longDescription": "Enable HA with zone redundancy on flexible server instances to deploy a standby replica in a different zone, offering automatic failover capability for improved reliability and disaster recovery.\n", + "pgVerified": true, + "description": "Enable HA with zone redundancy", + "potentialBenefits": "Enhanced uptime and data protection", "tags": null, - "recommendationResourceType": "Microsoft.AAD/domainServices", + "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Gets Entry Domain Services that are using the Standard SKU\r\nresources\r\n| where type == \"microsoft.aad/domainservices\"\r\n| extend sku = properties.sku\r\n| where sku =~ 'Standard'\r\n| project recommendationId='bb6deb9d-24fa-4ee8-bc23-ac3ebc7fdf8e', name=name, id=id, tags=tags, param1=strcat('SKU:', sku)\r\n" + "query": "// Azure Resource Graph Query\n// Find Database for PostgreSQL instances that are not zone redundant\nresources\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\n| where properties.highAvailability.mode != \"ZoneRedundant\"\n| project recommendationId = \"ca87914f-aac4-4783-ab67-82a6f936f194\", name, id, tags, param1 = \"ZoneRedundant: False\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a3058909-fcf8-4450-88b5-499f57449178", + "aprlGuid": "b2bad57d-7e03-4c0f-9024-597c9eb295bb", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/entra/identity/domain-services/tutorial-create-replica-set", - "name": "Create and use replica sets for resiliency or geolocation in Microsoft Entra Domain Services" + "url": "https://learn.microsoft.com/azure/postgresql/flexible-server/concepts-maintenance", + "name": "Scheduled maintenance in Azure Database for PostgreSQL - Flexible Server" } ], - "recommendationControl": "High Availability", - "longDescription": "To improve the resiliency of a Microsoft Entra Domain Services managed domain, or deploy to additional geographic locations close to your applications, you can use replica sets.\nYou can add a replica set to any peered virtual network in any Azure region that supports Domain Services.\n", - "pgVerified": false, - "description": "Use replica sets for resiliency or geolocation in Microsoft Entra Domain Services", - "potentialBenefits": "The replica sets provide geographical resiliency.", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Use custom maintenance schedule on flexible server instances to select a preferred time for service updates to be applied.\n", + "pgVerified": true, + "description": "Enable custom maintenance schedule", + "potentialBenefits": "Control update timings", "tags": null, - "recommendationResourceType": "Microsoft.AAD/domainServices", + "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Gets Entry Domain Services that are using only one replicaSet\r\nresources\r\n| where type == \"microsoft.aad/domainservices\"\r\n| extend replicaSets = properties.replicaSets\r\n| where array_length(replicaSets) < 2\r\n| project recommendationId='a3058909-fcf8-4450-88b5-499f57449178', name=name, id=id, tags=tags, param1=strcat('replicaSetLocation:', replicaSets[0].location)\r\n" + "query": "// Azure Resource Graph Query\n// Find Database for PostgreSQL instances that do not have a custom maintenance window\nresources\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\n| where properties.maintenanceWindow.customWindow != \"Enabled\"\n| project recommendationId = \"b2bad57d-7e03-4c0f-9024-597c9eb295bb\", name, id, tags, param1 = strcat(\"customWindow:\", properties['maintenanceWindow']['customWindow'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "baf3bfc0-32a2-4c0c-926d-c9bf0b49808e", + "aprlGuid": "31f4ac4b-29cb-4588-8de2-d8fe6f13ceb3", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/api-management/upgrade-and-scale#change-your-api-management-service-tier", - "name": "Change your API Management service tier" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/reliability/migrate-api-mgt", - "name": "Migrate Azure API Management to availability zone support" + "url": "https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-backup-restore", + "name": "Backup and restore in Azure Database for PostgreSQL - Flexible Server" } ], - "recommendationControl": "High Availability", - "longDescription": "Upgrading the API Management instance to the Premium SKU adds support for Availability Zones, enhancing availability and resilience by distributing services across physically separate locations within Azure regions.\n", - "pgVerified": false, - "description": "Migrate API Management services to Premium SKU to support Availability Zones", - "potentialBenefits": "Enhanced availability and resilience", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Configure GRS to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", + "pgVerified": true, + "description": "Configure geo redundant backup storage", + "potentialBenefits": "Recover from regional failure and/or disaster", "tags": null, - "recommendationResourceType": "Microsoft.ApiManagement/service", + "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all API Management instances that aren't Premium\r\nresources\r\n| where type =~ 'Microsoft.ApiManagement/service'\r\n| extend skuName = sku.name\r\n| where tolower(skuName) != tolower('premium')\r\n| project recommendationId = \"baf3bfc0-32a2-4c0c-926d-c9bf0b49808e\", name, id, tags, param1=strcat(\"SKU: \", skuName)\r\n" + "query": "// Azure Resource Graph Query\n// Find Database for PostgreSQL instances that do not have geo redundant backup storage configured\nresources\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\n| where properties.backup.geoRedundantBackup != \"Enabled\"\n| project recommendationId = \"31f4ac4b-29cb-4588-8de2-d8fe6f13ceb3\", name, id, tags, param1 = strcat(\"geoRedundantBackup:\", properties['backup']['geoRedundantBackup'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "740f2c1c-8857-4648-80eb-47d2c56d5a50", + "aprlGuid": "2ab85a67-26be-4ed2-a0bb-101b2513ec63", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/api-management/high-availability#availability-zones", - "name": "Ensure API Management availability and reliability" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/reliability/migrate-api-mgt", - "name": "Migrate Azure API Management to availability zone support" + "url": "https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-read-replicas", + "name": "Read replicas in Azure Database for PostgreSQL - Flexible Server" } ], - "recommendationControl": "High Availability", - "longDescription": "Zone redundancy for APIM instances ensures the gateway and control plane (Management API, developer portal, Git configuration) are replicated across datacenters in physically separated zones, boosting resilience to zone failures.\n", - "pgVerified": false, - "description": "Enable Availability Zones on Premium API Management instances", - "potentialBenefits": "Improved resilience to zone failures", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Configure one or more read replicas to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", + "pgVerified": true, + "description": "Configure one or more read replicas", + "potentialBenefits": "Recover from regional failure and/or disaster", "tags": null, - "recommendationResourceType": "Microsoft.ApiManagement/service", + "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Premium API Management instances that aren't zone redundant\r\nresources\r\n| where type =~ 'Microsoft.ApiManagement/service'\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| extend skuName = sku.name\r\n| where tolower(skuName) == tolower('premium')\r\n| where isnull(zones) or array_length(zones) < 2\r\n| extend zoneValue = iff((isnull(zones)), \"null\", zones)\r\n| project recommendationId = \"740f2c1c-8857-4648-80eb-47d2c56d5a50\", name, id, tags, param1=\"Zones: No Zone or Zonal\", param2=strcat(\"Zones value: \", zoneValue )\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find Database for PostgreSQL instances that are read replicas\nresources\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\n| where properties.replicationRole == \"AsyncReplica\"\n| project recommendationId = \"2ab85a67-26be-4ed2-a0bb-101b2513ec63\", name, id, tags, param1 = strcat(\"replicationRole:\", properties['replicationRole'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e35cf148-8eee-49d1-a1c9-956160f99e0b", - "recommendationTypeId": "e5f60ef8-3fcc-4fb5-bee7-7aaeb44c1509", + "aprlGuid": "6293a3cc-6b4a-4c0f-9ea7-b8ae8d7dd3d5", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/api-management/breaking-changes/stv1-platform-retirement-august-2024", - "name": "Azure API Management - stv1 platform retirement (August 2024)" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/api-management/compute-infrastructure", - "name": "Azure API Management compute platform" + "url": "https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/how-to-auto-grow-storage-portal", + "name": "Storage autogrow using Azure portal in Azure Database for PostgreSQL - Flexible Server" } ], - "recommendationControl": "High Availability", - "longDescription": "Upgrading to API Management stv2 is required as stv1 retires on 31 Aug 2024, offering enhanced capabilities with the new platform version.\n", + "recommendationControl": "Scalability", + "longDescription": "Configure storage auto-grow to prevent the server from running out of storage and becoming read-only.\n", "pgVerified": false, - "description": "Azure API Management platform version should be stv2", - "potentialBenefits": "Ensures service continuity", - "publishedToLearn": false, + "description": "Configure storage auto-grow", + "potentialBenefits": "Scale storage automatically to meet increasing demand", "tags": null, - "recommendationResourceType": "Microsoft.ApiManagement/service", + "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all API Management instances that aren't upgraded to platform version stv2\r\nresources\r\n| where type =~ 'Microsoft.ApiManagement/service'\r\n| extend plat_version = properties.platformVersion\r\n| extend skuName = sku.name\r\n| where tolower(plat_version) != tolower('stv2')\r\n| project recommendationId = \"e35cf148-8eee-49d1-a1c9-956160f99e0b\", name, id, tags, param1=strcat(\"Platform Version: \", plat_version) , param2=strcat(\"SKU: \", skuName)\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c79680ea-de85-44fa-a596-f31fa17a952f", + "aprlGuid": "e93bb813-b356-48f3-9bdf-a06a0a6ba039", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/api-management/api-management-howto-autoscale", - "name": "Setting up auto-scale for Azure API Management" + "url": "https://learn.microsoft.com/en-us/azure/site-recovery/azure-to-azure-network-mapping#set-up-ip-addressing-for-target-vms", + "name": "Setup network mapping for site recovery" } ], - "recommendationControl": "High Availability", - "longDescription": "Use API Management with auto-scale for high availability in workloads that experience variable traffic patterns. There are several limitations with auto-scale, so review the documentation to ensure it meets your requirements.\n", - "pgVerified": false, - "description": "Enable auto-scale for production workloads on API Management services", - "potentialBenefits": "Enhanced availability and resilience", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Ensure VM failover settings' static IP addresses are available in the failover subnet to maintain consistent IP assignment during failover, with the target VM receiving the same static IP if it's available or the next available IP otherwise. IP adjustments can be made in VM Network settings.\n", + "pgVerified": true, + "description": "Ensure static IP addresses in Site Recovery VM failover settings are available in failover subnet", + "potentialBenefits": "Smooth failover IP management", "tags": null, - "recommendationResourceType": "Microsoft.ApiManagement/service", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.RecoveryServices/vaults", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8dbcd94b-0948-4df3-b608-1946726c3abf", + "aprlGuid": "17e877f7-3a89-4205-8a24-0670de54ddcd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/container-apps/health-probes?tabs=arm-template", - "name": "Health probes for Azure Container Apps" + "url": "https://learn.microsoft.com/en-us/azure/site-recovery/azure-to-azure-tutorial-dr-drill#run-a-test-failover", + "name": "Run a test failover" } ], - "recommendationControl": "High Availability", - "longDescription": "Enable container health probes to monitor the health of your container apps and ensure that unhealthy containers are restarted automatically.\n", - "pgVerified": false, - "description": "Enable container health probes", - "potentialBenefits": "Enhanced availability and resilience", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Perform a test failover to validate your BCDR strategy and ensure that your applications are functioning correctly in the target region without impacting your production environment. Test your Disaster Recovery plan periodically without any data loss or downtime, using test failovers.\n", + "pgVerified": true, + "description": "Validate VM functionality with a Site Recovery test failover to check performance at target", + "potentialBenefits": "Ensures BCDR plan accuracy and VM performance", "tags": null, - "recommendationResourceType": "Microsoft.App/containerApps", + "recommendationResourceType": "Microsoft.RecoveryServices/vaults", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs where replication has been enabled but Test Failover was never performed\nrecoveryservicesresources\n| where type == \"microsoft.recoveryservices/vaults/replicationfabrics/replicationprotectioncontainers/replicationprotecteditems\"\n| where properties.providerSpecificDetails.dataSourceInfo.datasourceType == 'AzureVm' and isnull(properties.lastSuccessfulTestFailoverTime)\n| project recommendationId=\"17e877f7-3a89-4205-8a24-0670de54ddcd\" , name = properties.providerSpecificDetails.recoveryAzureVMName, id=properties.providerSpecificDetails.dataSourceInfo.resourceId\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f4201965-a88d-449d-b3b4-021394719eb2", - "recommendationTypeId": null, + "aprlGuid": "2912472d-0198-4bdc-aa90-37f145790edc", + "recommendationTypeId": "06578866-1877-41e6-9d22-3ea5122e8048", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/reliability/reliability-azure-container-apps", - "name": "Reliability in Azure Container Apps" + "url": "https://learn.microsoft.com/azure/backup/move-to-azure-monitor-alerts", + "name": "Move to Azure monitor Alerts" + }, + { + "url": "https://azure.microsoft.com/updates/transition-to-builtin-azure-monitor-alerts-for-recovery-services-vaults-in-azure-backup-by-31-march-2026/", + "name": "Classic alerts retirement announcement" } ], - "recommendationControl": "High Availability", - "longDescription": "To take advantage of availability zones, you must enable zone redundancy when you create a Container Apps environment. The environment must include a virtual network with an available subnet. To ensure proper distribution of replicas, set your app's minimum replica count to three.\n", - "pgVerified": false, - "description": "Deploy zone redundant Container app environments", - "potentialBenefits": "Enhances app resiliency and reliability", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Classic alerts for Recovery Services vaults in Azure Backup will be retired on 31 March 2026.\n", + "pgVerified": true, + "description": "Migrate from classic alerts to built-in Azure Monitor alerts for Azure Recovery Services Vaults", + "potentialBenefits": "Enhanced, scalable, and consistent alerting.", "tags": null, - "recommendationResourceType": "Microsoft.App/managedenvironments", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.RecoveryServices/vaults", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// The query filters the qualified Container app environments that do not have Zone Redundancy enabled.\r\nresources\r\n| where type =~ \"microsoft.app/managedenvironments\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where tobool(properties.zoneRedundant) == false\r\n| project recommendationId = \"f4201965-a88d-449d-b3b4-021394719eb2\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\r\n| order by id asc\r\n" + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Recovery services vault with Classic alerts enabled.\nresources\n| where type in~ ('microsoft.recoveryservices/vaults')\n| extend monitoringSettings = parse_json(properties).monitoringSettings\n| extend isUsingClassicAlerts = case(isnull(monitoringSettings),'Enabled',monitoringSettings.classicAlertSettings.alertsForCriticalOperations)\n| extend isUsingJobsAlerts = case(isnull(monitoringSettings), 'Enabled', monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures)\n| where isUsingClassicAlerts == 'Enabled'\n| project recommendationId = \"2912472d-0198-4bdc-aa90-37f145790edc\", name, id, tags, param1=strcat(\"isUsingClassicAlerts: \", isUsingClassicAlerts), param2=strcat(\"isUsingJobsAlerts: \", isUsingJobsAlerts)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "bb4c8db4-f821-475b-b1ea-16e95358665e", - "recommendationTypeId": null, + "aprlGuid": "1549b91f-2ea0-4d4f-ba2a-4596becbe3de", + "recommendationTypeId": "9b1308f1-4c25-4347-a061-7cc5cd6a44ab", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-app-configuration/concept-soft-delete#purge-protection", - "name": "Purge protection" + "url": "https://learn.microsoft.com/azure/backup/backup-create-recovery-services-vault#set-cross-region-restore", + "name": "Set Cross Region Restore" + }, + { + "url": "https://learn.microsoft.com/azure/backup/guidance-best-practices", + "name": "Azure Backup Best Practices" + }, + { + "url": "https://learn.microsoft.com/azure/backup/backup-rbac-rs-vault#minimum-role-requirements-for-azure-vm-backup", + "name": "Minimum Role Requirements for Cross Region Restore" + }, + { + "url": "https://learn.microsoft.com/azure/backup/backup-azure-arm-vms-prepare", + "name": "Recovery Services Vault" } ], - "recommendationControl": "Governance", - "longDescription": "With Purge protection enabled, soft deleted stores can't be purged in the retention period. If disabled, the soft deleted store can be purged before the retention period expires.\n", - "pgVerified": false, - "description": "Enable Purge protection for Azure App Configuration", - "potentialBenefits": "Prevent accidental deletion of configuration stores.", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Cross Region Restore enables the restoration of Azure VMs in a secondary, Azure paired region, facilitating drills for audit or compliance and allowing recovery of VMs or disks in the event of a primary region disaster. It is an opt-in feature available exclusively for GRS vaults.\n", + "pgVerified": true, + "description": "Enable Cross Region Restore for your GRS Recovery Services Vault", + "potentialBenefits": "Enhances disaster recovery capabilities", "tags": null, - "recommendationResourceType": "Microsoft.AppConfiguration/configurationStores", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.RecoveryServices/vaults", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Purge protection should be enabled for App Configuration stores to prevent accidental deletion of configuration data.\r\nresources\r\n| where type =~ \"Microsoft.AppConfiguration/configurationStores\"\r\n| where sku.name <> \"free\"\r\n| where (properties.enablePurgeProtection <> true) or isnull(properties.enablePurgeProtection )\r\n| project recommendationId = \"bb4c8db4-f821-475b-b1ea-16e95358665e\", name, id, tags, param1 = \"Enable purge protection\"\r\n" + "query": "// Azure Resource Graph Query\n// Displays all recovery services vaults that do not have cross region restore enabled\nresources\n| where type =~ \"Microsoft.RecoveryServices/vaults\" and\n properties.redundancySettings.standardTierStorageRedundancy =~ \"GeoRedundant\" and\n properties.redundancySettings.crossRegionRestore !~ \"Enabled\"\n| extend\n param1 = strcat(\"CrossRegionRestore: \", properties.redundancySettings.crossRegionRestore),\n param2 = strcat(\"StorageReplicationType: \", properties.redundancySettings.standardTierStorageRedundancy)\n| project recommendationId = \"1549b91f-2ea0-4d4f-ba2a-4596becbe3de\", name, id, tags, param1, param2\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2102a57a-a056-4d5e-afe5-9df9f92177ca", + "aprlGuid": "9e39919b-78af-4a0b-b70f-c548dae97c25", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-app-configuration/faq#which-app-configuration-tier-should-i-use", - "name": "Choose App Configuration tier" + "url": "https://learn.microsoft.com/azure/backup/backup-azure-security-feature-cloud?tabs=azure-portal", + "name": "Soft Delete for Azure Backup" } ], - "recommendationControl": "High Availability", - "longDescription": "SLA is not available for Free tier. Upgrade to the Standard tier to get an SLA of 99.9%\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "With soft delete, if backup data is deleted, the backup data is retained for 14 additional days, allowing the recovery of that backup item with no data loss with no cost to you. Soft delete is enabled by default. Disabling this feature isn't recommended.\n", "pgVerified": false, - "description": "Upgrade to App Configuration Standard tier", - "potentialBenefits": "High availability, more storage, higher request quota.", - "publishedToLearn": false, + "description": "Enable Soft Delete for Recovery Services Vaults in Azure Backup", + "potentialBenefits": "Enhances disaster recovery capabilities", "tags": null, - "recommendationResourceType": "Microsoft.AppConfiguration/configurationStores", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.RecoveryServices/vaults", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Upgrade to App Configuration Standard tier\r\nresources\r\n| where type =~ \"Microsoft.AppConfiguration/configurationStores\"\r\n| where sku.name == \"free\"\r\n| project recommendationId = \"2102a57a-a056-4d5e-afe5-9df9f92177ca\", name, id, tags, param1 = \"Upgrade to Standard SKU\"\r\n" + "query": "// Azure Resource Graph Query\n// Find all Azure Recovery Services vaults that do not have soft delete enabled\nresources\n| where type == \"microsoft.recoveryservices/vaults\"\n| mv-expand issoftDelete=properties.securitySettings.softDeleteSettings.softDeleteState\n| where issoftDelete == 'Disabled'\n| project recommendationId = \"9e39919b-78af-4a0b-b70f-c548dae97c25\", name, id, tags, param1=strcat(\"Soft Delete: \",issoftDelete)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "67205887-0733-466e-b50e-b1cd7316c514", + "aprlGuid": "eb005943-40a8-194b-9db2-474d430046b7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/automation/automation-disaster-recovery?tabs=win-hrw%2Cps-script%2Coption-one", - "name": "Disaster recovery for Automation accounts" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/automation/automation-disaster-recovery?tabs=win-hrw%2Cps-script%2Coption-one#scenarios-for-cloud-and-hybrid-jobs", - "name": "Disaster recovery scenarios for cloud and hybrid jobs" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices", + "name": "Container Registry Best Practices" } ], - "recommendationControl": "High Availability", - "longDescription": "Set up disaster recovery for Automation accounts and resources like Modules, Connections, Credentials, Certificates, Variables, and Schedules to deal with region or zone failures. A replica Automation account should be ready in a secondary region for failover.\n", + "recommendationControl": "Scalability", + "longDescription": "Choose a service tier of Azure Container Registry to meet your performance needs. Premium offers the most bandwidth and highest rate of read and write operations for high-volume deployments. Use Basic to start, Standard for production, and Premium for hyper-scale performance and geo-replication.\n", "pgVerified": false, - "description": "Set up disaster recovery of Automation accounts and its dependent resources", - "potentialBenefits": "Ensures continuity during outages", - "publishedToLearn": false, + "description": "Use Premium tier for critical production workloads", + "potentialBenefits": "High-volume support and geo-replication", "tags": null, - "recommendationResourceType": "Microsoft.Automation/automationAccounts", + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Container Registries that are not using the Premium tier\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| where sku.name != \"Premium\"\n| project recommendationId = \"eb005943-40a8-194b-9db2-474d430046b7\", name, id, tags, param1=strcat(\"SkuName: \", tostring(sku.name))\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "74fcb9f2-9a25-49a6-8c42-d32851c4afb7", + "aprlGuid": "63491f70-22e4-3b4a-8b0c-845450e46fac", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/cloud-adoption-framework/scenarios/azure-vmware/eslz-management-and-monitoring#design-recommendations", - "name": "Configure Azure Service Health alerts" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/zone-redundancy?toc=%2Fazure%2Freliability%2Ftoc.json&bc=%2Fazure%2Freliability%2Fbreadcrumb%2Ftoc.json&branch=main", + "name": "Registry best practices - Enable zone redundancy" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Ensure Azure Service Health notifications are set for Azure VMware Solution across all used regions and subscriptions. This communicates service/security issues and maintenance activities like host replacements and upgrades, reducing service request submissions.\n", - "pgVerified": true, - "description": "Configure Azure Service Health notifications and alerts for Azure VMware Solution", - "potentialBenefits": "Prompt mitigation of issues.", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", + "recommendationControl": "High Availability", + "longDescription": "Azure Container Registry's optional zone redundancy enhances resiliency and high availability for registries or replication resources in a specific region by distributing resources across multiple zones.\n", + "pgVerified": false, + "description": "Enable zone redundancy", + "potentialBenefits": "Enhances resiliency and high availability", + "tags": null, + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure VMware Solution resources that don't have one or more service health alerts covering AVS private clouds in the deployed subscription and region pairs.\r\n//full list of private clouds\r\n(resources\r\n| where ['type'] == \"microsoft.avs/privateclouds\"\r\n| extend locale = tolower(location)\r\n| extend subscriptionId = tolower(subscriptionId)\r\n| project id, name, tags, subscriptionId, locale)\r\n| join kind=leftouter\r\n//Alert ID's that include all incident types filtered by AVS Service Health alerts\r\n((resources\r\n| where type == \"microsoft.insights/activitylogalerts\"\r\n| extend alertproperties = todynamic(properties)\r\n| where alertproperties.condition.allOf[0].field == \"category\" and alertproperties.condition.allOf[0].equals == \"ServiceHealth\"\r\n| where alertproperties.condition.allOf[1].field == \"properties.impactedServices[*].ServiceName\" and set_has_element(alertproperties.condition.allOf[1].containsAny, \"Azure VMware Solution\")\r\n| extend locale = strcat_array(split(tolower(alertproperties.condition.allOf[2].containsAny),' '), '')\r\n| mv-expand todynamic(locale)\r\n| where locale != \"global\"\r\n| project subscriptionId, tostring(locale) )\r\n| union\r\n//Alert ID's that include only some of the incident types after filtering by service health alerts covering AVS private clouds.\r\n(resources\r\n| where type == \"microsoft.insights/activitylogalerts\"\r\n| extend subscriptionId = tolower(subscriptionId)\r\n| extend alertproperties = todynamic(properties)\r\n| where alertproperties.condition.allOf[0].field == \"category\" and alertproperties.condition.allOf[0].equals == \"ServiceHealth\"\r\n| where alertproperties.condition.allOf[2].field == \"properties.impactedServices[*].ServiceName\" and set_has_element(alertproperties.condition.allOf[2].containsAny, \"Azure VMware Solution\")\r\n| extend locale = strcat_array(split(tolower(alertproperties.condition.allOf[3].containsAny),' '), '')\r\n| mv-expand todynamic(locale)\r\n| mv-expand alertproperties.condition.allOf[1].anyOf\r\n| extend incidentType = alertproperties_condition_allOf_1_anyOf.equals\r\n| where locale != \"global\"\r\n| project id, subscriptionId, locale, incidentType\r\n| distinct subscriptionId, tostring(locale), tostring(incidentType)\r\n| summarize incidentTypes=count() by subscriptionId, locale\r\n| where incidentTypes == 5 //only include this subscription, region pair if it includes all the incident types.\r\n| project subscriptionId, locale)) on subscriptionId, locale\r\n| where subscriptionId1 == \"\" or locale1 == \"\" or isnull(subscriptionId1) or isnull(locale1)\r\n| project recommendationId = \"74fcb9f2-9a25-49a6-8c42-d32851c4afb7\", name, id, tags, param1 = \"avsServiceHealthAlertsAllIncidentTypesConfigured: False\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all Container Registries that do not have zone redundancy enabled\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| where properties.zoneRedundancy != \"Enabled\"\n| project recommendationId = \"63491f70-22e4-3b4a-8b0c-845450e46fac\", name, id, tags, param1=strcat(\"zoneRedundancy: \", tostring(properties.zoneRedundancy))\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "29d7a115-dfb6-4df1-9205-04824109548f", + "aprlGuid": "36ea6c09-ef6e-d743-9cfb-bd0c928a430b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", - "name": "Configure and streamline alerts" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#geo-replicate-multi-region-deployments", + "name": "Registry best practices - Enable geo-replication" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-geo-replication", + "name": "Geo-Replicate Container Registry" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Set an alert for when the node count in Azure VMware Solution Private Cloud hits or exceeds 90 hosts, enabling timely planning for a new private cloud.\n", - "pgVerified": true, - "description": "Monitor when Azure VMware Solution Private Cloud is reaching the capacity limit", - "potentialBenefits": "Proactive capacity planning", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Use Azure Container Registry's geo-replication for multi-region deployments to simplify registry management and minimize latency. It enables serving global customers from local data centers and supports distributed development teams. Regional webhooks can notify of events in replicas.\n", + "pgVerified": false, + "description": "Enable geo-replication", + "potentialBenefits": "Simplifies management, reduces latency", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Container Registries that do not have geo-replication enabled\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| project registryName = name, registryId = id, tags, primaryRegion = location\n| join kind=leftouter (\n Resources\n | where type =~ \"microsoft.containerregistry/registries/replications\"\n | project replicationRegion=name, replicationId = id\n | extend registryId=strcat_array(array_slice(split(replicationId, '/'), 0, -3), '/')\n ) on registryId\n| project-away registryId1, replicationId\n| where isempty(replicationRegion)\n| project recommendationId = \"36ea6c09-ef6e-d743-9cfb-bd0c928a430b\", name=registryName, id=registryId, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f86355e3-de7c-4dad-8080-1b0b411e66c8", + "aprlGuid": "a5a0101a-a240-8742-90ba-81dbde9a0c0c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", - "name": "Configure and streamline alerts" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#repository-namespaces", + "name": "Registry best practices - use repository namespaces" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Alert when the cluster size reaches 14 hosts. Set up periodic alerts for planning new clusters or datastores due to growth, especially from storage needs. Beyond 14 hosts, trigger alerts for each new host addition for proactive resource monitoring.\n", - "pgVerified": true, - "description": "Monitor when Azure VMware Solution Cluster Size is approaching the host limit", - "potentialBenefits": "Proactive resource management", - "publishedToLearn": false, + "recommendationControl": "Security", + "longDescription": "Using repository namespaces allows a single registry to be shared across multiple groups and deployments within an organization, supporting nested namespaces for group isolation. However, repositories are managed independently, not hierarchically.\n", + "pgVerified": false, + "description": "Use Repository namespaces", + "potentialBenefits": "Enables sharing and group isolation", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9ec5b4c8-3dd8-473a-86ee-3273290331b9", + "aprlGuid": "8e389532-5db5-7e4c-9d4d-443b3e55ae82", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/infrastructure#implement-high-availability", - "name": "Implement high availability" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/deploy-vsan-stretched-clusters", - "name": "Stretched Clusters" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#dedicated-resource-group", + "name": "Registry best practices - Use dedicated resource group" } ], - "recommendationControl": "High Availability", - "longDescription": "For Azure VMware Solution, enabling Stretched Clusters offers 99.99% SLA, synchronous storage replication (RPO=0), and spreads vSAN datastore across two AZs. Must be done at initial setup, needing double quota due to extension across AZs.\n", - "pgVerified": true, - "description": "Enable Stretched Clusters for Multi-AZ Availability of the vSAN Datastore", - "potentialBenefits": "99.99% SLA, 0 RPO, Multi-AZ", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "Container registries, used across multiple hosts, should be in their own resource group to prevent accidental deletion of images when container instances are deleted, preserving the image collection while experimenting with hosts.\n", + "pgVerified": false, + "description": "Move Container Registry to a dedicated resource group", + "potentialBenefits": "Safeguards image collection", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure VMware Solution resources that aren't configured as stretched clusters and in supported regions.\r\nresources\r\n| where ['type'] == \"microsoft.avs/privateclouds\"\r\n| extend avsproperties = todynamic(properties)\r\n| where avsproperties.availability.strategy != \"DualZone\"\r\n| where location in (\"uksouth\", \"westeurope\", \"germanywestcentral\", \"australiaeast\")\r\n| project recommendationId = \"9ec5b4c8-3dd8-473a-86ee-3273290331b9\", name, id, tags, param1 = \"stretchClusters: Disabled\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// List container registries that contain additional resources within the same resource group.\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| project registryName=name, registryId=id, registryTags=tags, resourceGroupId=strcat('/subscriptions/', subscriptionId, '/resourceGroups/', resourceGroup), resourceGroup, subscriptionId\n| join kind=inner (\n resources\n | where not(type =~ \"microsoft.containerregistry/registries\")\n | summarize recourceCount=count() by subscriptionId, resourceGroup\n | where recourceCount != 0\n) on resourceGroup, subscriptionId\n| project recommendationId = \"8e389532-5db5-7e4c-9d4d-443b3e55ae82\", name=registryName, id=registryId, tags=registryTags, param1=strcat('resourceGroupName:',resourceGroup), param2=strcat('resourceGroupId:',resourceGroupId)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4232eb32-3241-4049-9e14-9b8005817b56", + "aprlGuid": "3ef86f16-f65b-c645-9901-7830d6dc3a1b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-alerts-for-azure-vmware-solution#supported-metrics-and-activities", - "name": "Supported metrics and activities" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#manage-registry-size", + "name": "Registry best practices - Manage registry size" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-retention-policy#about-the-retention-policy", + "name": "Retention Policy" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Ensure VMware vSAN datastore slack space is maintained for SLA by monitoring storage utilization and setting alerts at 70% and 75% utilization to allow for capacity planning. To expand, add hosts or external storage like Azure Elastic SAN, Azure NetApp Files, if CPU and RAM requirements are met.\n", - "pgVerified": true, - "description": "Configure Azure Monitor Alert warning thresholds for vSAN datastore utilization", - "potentialBenefits": "Optimized capacity planning for vSAN", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "The storage constraints of Azure Container Registry's service tiers align with usage scenarios: Basic for starters, Standard for production, and Premium for high-scale performance and geo-replication.\n", + "pgVerified": false, + "description": "Manage registry size", + "potentialBenefits": "Reduce costs, optimize storage", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure VMware Solution resources that don't have a vSAN capacity critical alert with a threshold of 75% or a warning capacity of 70%.\r\n(\r\nresources\r\n| where ['type'] == \"microsoft.avs/privateclouds\"\r\n| extend scopeId = tolower(tostring(id))\r\n| project ['scopeId'], name, id, tags\r\n| join kind=leftouter (\r\nresources\r\n| where type == \"microsoft.insights/metricalerts\"\r\n| extend alertProperties = todynamic(properties)\r\n| mv-expand alertProperties.scopes\r\n| mv-expand alertProperties.criteria.allOf\r\n| extend scopeId = tolower(tostring(alertProperties_scopes))\r\n| extend metric = alertProperties_criteria_allOf.metricName\r\n| extend threshold = alertProperties_criteria_allOf.threshold\r\n| project scopeId, tostring(metric), toint(['threshold'])\r\n| where metric == \"DiskUsedPercentage\"\r\n| where threshold == 75\r\n) on scopeId\r\n| where isnull(['threshold'])\r\n| project recommendationId = \"4232eb32-3241-4049-9e14-9b8005817b56\", name, id, tags, param1 = \"vsanCapacityCriticalAlert: isNull or threshold != 75\"\r\n)\r\n| union (\r\nresources\r\n| where ['type'] == \"microsoft.avs/privateclouds\"\r\n| extend scopeId = tolower(tostring(id))\r\n| project ['scopeId'], name, id, tags\r\n| join kind=leftouter (\r\nresources\r\n| where type == \"microsoft.insights/metricalerts\"\r\n| extend alertProperties = todynamic(properties)\r\n| mv-expand alertProperties.scopes\r\n| mv-expand alertProperties.criteria.allOf\r\n| extend scopeId = tolower(tostring(alertProperties_scopes))\r\n| extend metric = alertProperties_criteria_allOf.metricName\r\n| extend threshold = alertProperties_criteria_allOf.threshold\r\n| project scopeId, tostring(metric), toint(['threshold'])\r\n| where metric == \"DiskUsedPercentage\"\r\n| where threshold == 70\r\n) on scopeId\r\n| where isnull(['threshold'])\r\n| project recommendationId = \"4232eb32-3241-4049-9e14-9b8005817b56\", name, id, tags, param1 = \"vsanCapacityWarningAlert: isNull or threshold != 70\"\r\n)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all Container Registries that have their retention policy disabled\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| where properties.policies.retentionPolicy.status == \"disabled\"\n| project recommendationId = \"3ef86f16-f65b-c645-9901-7830d6dc3a1b\", name, id, tags, param1='retentionPolicy:disabled'\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fa4ab927-bced-429a-971a-53350de7f14b", + "aprlGuid": "03f4a7d8-c5b4-7842-8e6e-14997a34842b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#manage-logs-and-archives", - "name": "Manage logs and archives" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/anonymous-pull-access#about-anonymous-pull-access", + "name": "Enable anonymous pull access" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Ensure Diagnostic Settings are configured for each private cloud to send syslogs to external sources for analysis and/or archiving. Azure VMware Solution Syslogs contain data for troubleshooting and performance, aiding quicker issue resolution and early detection of issues.\n", - "pgVerified": true, - "description": "Configure Syslog in Diagnostic Settings for Azure VMware Solution", - "potentialBenefits": "Faster issue resolution, early detection", - "publishedToLearn": false, + "recommendationControl": "Security", + "longDescription": "By default, Azure container registry requires authentication for pull/push actions. Enabling anonymous pull access exposes all content for public read actions. This applies to all repositories, potentially allowing unrestricted access if repository-scoped tokens are used.\n", + "pgVerified": false, + "description": "Disable anonymous pull access", + "potentialBenefits": "Enhanced security and controlled access", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Container Registries that have anonymous pull access enabled\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| where properties.anonymousPullEnabled == \"true\"\n| project recommendationId = \"03f4a7d8-c5b4-7842-8e6e-14997a34842b\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4ee5d535-c47b-470a-9557-4a3dd297d62f", + "aprlGuid": "44107155-7a32-9348-89f3-d5aa7e7c5a1d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", - "name": "Configure and streamline alerts" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service-reference#resource-logs", + "name": "Monitoring Azure Container Registry data reference - Resource Logs" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service#collection-and-routing", + "name": "Monitor Azure Container Registry - Enable diagnostic logs" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Ensure sufficient compute resources to avoid host resource exhaustion in Azure VMware Solution, which utilizes vSphere DRS and HA for dynamic workload resource management. However, sustained CPU utilization over 95% may increase CPU Ready times, impacting workloads.\n", - "pgVerified": true, - "description": "Monitor CPU Utilization to ensure sufficient resources for workloads", - "potentialBenefits": "Avoids resource exhaustion, optimizes performance", - "publishedToLearn": false, + "longDescription": "Resource Logs are not collected and stored until you create a diagnostic setting and route them to one or more locations.\n", + "pgVerified": false, + "description": "Configure Diagnostic Settings for all Azure Container Registries", + "potentialBenefits": "Enhanced tracking and debugging", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure VMware Solution resources that don't have a Cluster CPU capacity critical alert with a threshold of 95%.\r\nresources\r\n| where ['type'] == \"microsoft.avs/privateclouds\"\r\n| extend scopeId = tolower(tostring(id))\r\n| project ['scopeId'], name, id, tags\r\n| join kind=leftouter (\r\nresources\r\n| where type == \"microsoft.insights/metricalerts\"\r\n| extend alertProperties = todynamic(properties)\r\n| mv-expand alertProperties.scopes\r\n| mv-expand alertProperties.criteria.allOf\r\n| extend scopeId = tolower(tostring(alertProperties_scopes))\r\n| extend metric = alertProperties_criteria_allOf.metricName\r\n| extend threshold = alertProperties_criteria_allOf.threshold\r\n| project scopeId, tostring(metric), toint(['threshold'])\r\n| where metric == \"EffectiveCpuAverage\"\r\n| where threshold == 95\r\n) on scopeId\r\n| where isnull(['threshold'])\r\n| project recommendationId = \"4ee5d535-c47b-470a-9557-4a3dd297d62f\", name, id, tags, param1 = \"hostCpuCriticalAlert: isNull or threshold != 95\"\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "029208c8-5186-4a76-8ee8-6e3445fef4dd", + "aprlGuid": "d594cde6-4116-d143-a64a-25f63289a2f8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", - "name": "Configure and streamline alerts" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service-reference#metrics", + "name": "Monitoring Azure Container Registry data reference" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service", + "name": "Monitor Azure Container Registry" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Ensure sufficient memory resources to prevent host resource exhaustion in Azure VMware Solution. It uses vSphere DRS and vSphere HA for dynamic workload management. Yet, continuous memory use over 95% leads to disk swapping, affecting workloads.\n", - "pgVerified": true, - "description": "Monitor Memory Utilization to ensure sufficient resources for workloads", - "potentialBenefits": "Avoids host exhaustion and swapping", - "publishedToLearn": false, + "longDescription": "Monitoring Azure resources using Azure Monitor enhances their availability, performance, and operation. Azure Container Registry, a full-stack monitoring service, provides features for Azure and other cloud and on-premises resources.\n", + "pgVerified": false, + "description": "Monitor Azure Container Registry with Azure Monitor", + "potentialBenefits": "Enhanced monitoring and operation", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure VMware Solution resources that don't have a cluster host memory critical alert with a threshold of 95%.\r\nresources\r\n| where ['type'] == \"microsoft.avs/privateclouds\"\r\n| extend scopeId = tolower(tostring(id))\r\n| project ['scopeId'], name, id, tags\r\n| join kind=leftouter (\r\nresources\r\n| where type == \"microsoft.insights/metricalerts\"\r\n| extend alertProperties = todynamic(properties)\r\n| mv-expand alertProperties.scopes\r\n| mv-expand alertProperties.criteria.allOf\r\n| extend scopeId = tolower(tostring(alertProperties_scopes))\r\n| extend metric = alertProperties_criteria_allOf.metricName\r\n| extend threshold = alertProperties_criteria_allOf.threshold\r\n| project scopeId, tostring(metric), toint(['threshold'])\r\n| where metric == \"UsageAverage\"\r\n| where threshold == 95\r\n) on scopeId\r\n| where isnull(['threshold'])\r\n| project recommendationId = \"029208c8-5186-4a76-8ee8-6e3445fef4dd\", name, id, tags, param1 = \"hostMemoryCriticalAlert: isNull or threshold != 95\"\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a5ef7c05-c611-4842-9af5-11efdc99123a", + "aprlGuid": "e7f0fd54-fba0-054e-9ab8-e676f2851f88", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources", - "name": "Lock your resources to protect your infrastructure" + "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-soft-delete-policy", + "name": "Enable soft delete policy" } ], - "recommendationControl": "Governance", - "longDescription": "Applying a resource delete lock to the Azure VMware Solution Private Cloud resource group prevents unauthorized or accidental deletion by anyone with contributor access, ensuring the protection and reliability of the Azure VMware Solution Private Cloud.\n", - "pgVerified": true, - "description": "Apply Resource delete lock on the resource group hosting the private cloud", - "potentialBenefits": "Prevents accidental deletion", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Enabling soft delete in Azure Container Registry (ACR) allows for the management of deleted artifacts with a specified retention period. Users can list, filter, and restore these artifacts until automatically purged post-retention.\n", + "pgVerified": false, + "description": "Enable soft delete policy", + "potentialBenefits": "Recovery of deleted artifacts", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.ContainerRegistry/registries", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure Container Registry resources that do not have soft delete enabled\nresources\n| where type =~ \"microsoft.containerregistry/registries\"\n| where properties.policies.softDeletePolicy.status == \"disabled\"\n| project recommendationId = \"e7f0fd54-fba0-054e-9ab8-e676f2851f88\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e0ac2f57-c8c0-4b8c-a7c8-19e5797828b5", + "aprlGuid": "1ceea4b5-1d8b-4be0-9bbe-9594557be51a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-customer-managed-keys?tabs=azure-portal", - "name": "Configure Customer Managed Keys" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/traffic-collector", + "name": "Azure ExpressRoute Traffic Collector" } ], - "recommendationControl": "Security", - "longDescription": "When using customer-managed keys for encrypting vSAN datastores, leveraging Azure Key Vault for central management and accessing them via a managed identity linked to the private cloud is advised. The expiration of these keys can render the vSAN datastore and its associated workloads inaccessible.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "ExpressRoute Traffic Collector samples network flows over ExpressRoute Direct or Service-Provider based circuits, sending flow logs to a Log Analytics workspace for analysis or export to visualization tools/SIEM.\n", "pgVerified": true, - "description": "Use key autorotation for vSAN datastore customer-managed keys", - "potentialBenefits": "Avoid outages with key auto-rotation", - "publishedToLearn": false, + "description": "Ensure ExpressRoute Traffic Collector is enabled and configured for Direct or Provider circuits", + "potentialBenefits": "Enhanced network flow analysis and DR readiness", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.NetworkFunction/azureTrafficCollectors", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fcc2e257-23af-4c68-aac8-9cc03033c939", + "aprlGuid": "baf3bfc0-32a2-4c0c-926d-c9bf0b49808e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-dns-azure-vmware-solution#configure-dns-forwarder", - "name": "Configure DNS forwarder" + "url": "https://learn.microsoft.com/en-us/azure/api-management/upgrade-and-scale#change-your-api-management-service-tier", + "name": "Change your API Management service tier" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/reliability/migrate-api-mgt", + "name": "Migrate Azure API Management to availability zone support" } ], "recommendationControl": "High Availability", - "longDescription": "Azure VMware Solution private clouds support up to three DNS servers for a single FQDN, preventing a single DNS server from becoming a point of failure. It's crucial to use multiple DNS servers for on-premises FQDN resolution from each private cloud.\n", - "pgVerified": true, - "description": "Use multiple DNS servers per private FQDN zone", - "potentialBenefits": "Enhances reliability and avoids failure", - "publishedToLearn": false, + "longDescription": "Upgrading the API Management instance to the Premium SKU adds support for Availability Zones, enhancing availability and resilience by distributing services across physically separate locations within Azure regions.\n", + "pgVerified": false, + "description": "Migrate API Management services to Premium SKU to support Availability Zones", + "potentialBenefits": "Enhanced availability and resilience", "tags": null, - "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationResourceType": "Microsoft.ApiManagement/service", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all API Management instances that aren't Premium\nresources\n| where type =~ 'Microsoft.ApiManagement/service'\n| extend skuName = sku.name\n| where tolower(skuName) != tolower('premium')\n| project recommendationId = \"baf3bfc0-32a2-4c0c-926d-c9bf0b49808e\", name, id, tags, param1=strcat(\"SKU: \", skuName)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3464854d-6f75-4922-95e4-a2a308b53ce6", + "aprlGuid": "740f2c1c-8857-4648-80eb-47d2c56d5a50", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/reliability/reliability-batch#cross-region-disaster-recovery-and-business-continuity", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/api-management/high-availability#availability-zones", + "name": "Ensure API Management availability and reliability" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/reliability/migrate-api-mgt", + "name": "Migrate Azure API Management to availability zone support" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "To ensure cross-region disaster recovery and business continuity, set the right quotas for all Batch accounts to allocate necessary core numbers upfront, preventing execution interruptions from reaching quota limits.\n", + "recommendationControl": "High Availability", + "longDescription": "Zone redundancy for APIM instances ensures the gateway and control plane (Management API, developer portal, Git configuration) are replicated across datacenters in physically separated zones, boosting resilience to zone failures.\n", "pgVerified": false, - "description": "Monitor Batch Account quota", - "potentialBenefits": "Ensures business continuity", - "publishedToLearn": false, + "description": "Enable Availability Zones on Premium API Management instances", + "potentialBenefits": "Improved resilience to zone failures", "tags": null, - "recommendationResourceType": "Microsoft.Batch/batchAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.ApiManagement/service", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Premium API Management instances that aren't zone redundant\nresources\n| where type =~ 'Microsoft.ApiManagement/service'\n| extend skuName = sku.name\n| where tolower(skuName) == tolower('premium')\n| where isnull(zones) or array_length(zones) < 2\n| extend zoneValue = iff((isnull(zones)), \"null\", zones)\n| project recommendationId = \"740f2c1c-8857-4648-80eb-47d2c56d5a50\", name, id, tags, param1=\"Zones: No Zone or Zonal\", param2=strcat(\"Zones value: \", zoneValue )\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "71cfab8f-d588-4742-b175-b6e07ae48dbd", - "recommendationTypeId": null, + "aprlGuid": "e35cf148-8eee-49d1-a1c9-956160f99e0b", + "recommendationTypeId": "e5f60ef8-3fcc-4fb5-bee7-7aaeb44c1509", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/batch/create-pool-availability-zones", - "name": "Learn More" - } - ], - "recommendationControl": "High Availability", - "longDescription": "When using Virtual Machine Configuration for Azure Batch pools, opting to distribute your pool across Availability Zones bolsters your compute nodes against Azure datacenter failures.\n", - "pgVerified": false, - "description": "Create an Azure Batch pool across Availability Zones", - "potentialBenefits": "Enhanced reliability and failure protection", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Batch/batchAccounts", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "5a44bd30-ae6a-4b81-9b68-dc3a8ffca4d8", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ + "url": "https://learn.microsoft.com/en-us/azure/api-management/breaking-changes/stv1-platform-retirement-august-2024", + "name": "Azure API Management - stv1 platform retirement (August 2024)" + }, { - "url": "https://learn.microsoft.com/azure/azure-cache-for-redis/cache-how-to-zone-redundancy", - "name": "Enable zone redundancy for Azure Cache for Redis" + "url": "https://learn.microsoft.com/en-us/azure/api-management/compute-infrastructure", + "name": "Azure API Management compute platform" } ], "recommendationControl": "High Availability", - "longDescription": "Azure Cache for Redis offers zone redundancy in Premium and Enterprise tiers, using VMs across multiple Availability Zones to ensure greater resilience and availability.\n", + "longDescription": "Upgrading to API Management stv2 is required as stv1 retires on 31 Aug 2024, offering enhanced capabilities with the new platform version.\n", "pgVerified": false, - "description": "Enable zone redundancy for Azure Cache for Redis", - "potentialBenefits": "Higher resilience and availability", - "publishedToLearn": false, + "description": "Azure API Management platform version should be stv2", + "potentialBenefits": "Ensures service continuity", "tags": null, - "recommendationResourceType": "Microsoft.Cache/Redis", + "recommendationResourceType": "Microsoft.ApiManagement/service", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Cache for Redis instances with one or no Zones selected\r\nresources\r\n| where type =~ \"microsoft.cache/redis\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where array_length(zones) <= 1 or isnull(zones)\r\n| project recommendationId = \"5a44bd30-ae6a-4b81-9b68-dc3a8ffca4d8\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\r\n| order by id asc\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all API Management instances that aren't upgraded to platform version stv2\nresources\n| where type =~ 'Microsoft.ApiManagement/service'\n| extend plat_version = properties.platformVersion\n| extend skuName = sku.name\n| where tolower(plat_version) != tolower('stv2')\n| project recommendationId = \"e35cf148-8eee-49d1-a1c9-956160f99e0b\", name, id, tags, param1=strcat(\"Platform Version: \", plat_version) , param2=strcat(\"SKU: \", skuName)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "cabc1f98-c8a7-44f7-ab24-977982ef3f70", + "aprlGuid": "c79680ea-de85-44fa-a596-f31fa17a952f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-cache-for-redis/cache-administration#update-channel-and-schedule-updates", - "name": "Schedule Redis Updates" + "url": "https://learn.microsoft.com/azure/api-management/api-management-howto-autoscale", + "name": "Setting up auto-scale for Azure API Management" } ], "recommendationControl": "High Availability", - "longDescription": "Azure Cache for Redis allows for specifying maintenance windows. A maintenance window allows you to control the days and times of a week during which the VMs hosting your cache can be updated.\n", + "longDescription": "Use API Management with auto-scale for high availability in workloads that experience variable traffic patterns. There are several limitations with auto-scale, so review the documentation to ensure it meets your requirements.\n", "pgVerified": false, - "description": "Schedule updates by setting a maintenance window", - "potentialBenefits": "Higher resilience and availability", - "publishedToLearn": false, + "description": "Enable auto-scale for production workloads on API Management services", + "potentialBenefits": "Enhanced availability and resilience", "tags": null, - "recommendationResourceType": "Microsoft.Cache/redis", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.ApiManagement/service", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c474fc96-4e6a-4fb0-95d0-a26b3f35933c", + "aprlGuid": "bb4c8db4-f821-475b-b1ea-16e95358665e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-cache-for-redis/cache-network-isolation", - "name": "Configure private endpoints for Azure Redis Cache" + "url": "https://learn.microsoft.com/en-us/azure/azure-app-configuration/concept-soft-delete#purge-protection", + "name": "Purge protection" } ], - "recommendationControl": "Security", - "longDescription": "Use private endpoints for secure connection to cache via a private link, avoiding the public internet.\n", + "recommendationControl": "Governance", + "longDescription": "With Purge protection enabled, soft deleted stores can't be purged in the retention period. If disabled, the soft deleted store can be purged before the retention period expires.\n", "pgVerified": false, - "description": "Configure Private Endpoints", - "potentialBenefits": "Secure, private VNet ingress, efficient data transfer", - "publishedToLearn": false, + "description": "Enable Purge protection for Azure App Configuration", + "potentialBenefits": "Prevent accidental deletion of configuration stores.", "tags": null, - "recommendationResourceType": "Microsoft.Cache/redis", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.AppConfiguration/configurationStores", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Azure Redis cache services not protected by private endpoints.\r\nResources\r\n| where type =~ \"microsoft.cache/redis\"\r\n| where properties['publicNetworkAccess'] == \"Enabled\"\r\n| project recommendationId = \"c474fc96-4e6a-4fb0-95d0-a26b3f35933c\", name, id, tags\r\n| order by id asc\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Purge protection should be enabled for App Configuration stores to prevent accidental deletion of configuration data.\nresources\n| where type =~ \"Microsoft.AppConfiguration/configurationStores\"\n| where sku.name <> \"free\"\n| where (properties.enablePurgeProtection <> true) or isnull(properties.enablePurgeProtection )\n| project recommendationId = \"bb4c8db4-f821-475b-b1ea-16e95358665e\", name, id, tags, param1 = \"Enable purge protection\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9437634c-d69e-2747-b13e-631c13182150", + "aprlGuid": "2102a57a-a056-4d5e-afe5-9df9f92177ca", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/guide/technology-choices/load-balancing-overview", - "name": "Azure Load Balancing Options" - }, - { - "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-overview", - "name": "Azure Traffic Manager" - }, - { - "url": "https://learn.microsoft.com/azure/frontdoor/front-door-overview", - "name": "Azure Front Door" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/architecture/guide/networking/global-web-applications/mission-critical-content-delivery", - "name": "Mission-critical global content delivery" + "url": "https://learn.microsoft.com/en-us/azure/azure-app-configuration/faq#which-app-configuration-tier-should-i-use", + "name": "Choose App Configuration tier" } ], - "recommendationControl": "Business Continuity", - "longDescription": "For most solutions, choose either Azure Front Door for content caching, CDN, TLS termination, and WAF, or Traffic Manager for simple global load balancing.\n", - "pgVerified": true, - "description": "Avoid combining Traffic Manager and Front Door", - "potentialBenefits": "Optimized network routing and security", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "SLA is not available for Free tier. Upgrade to the Standard tier to get an SLA of 99.9%\n", + "pgVerified": false, + "description": "Upgrade to App Configuration Standard tier", + "potentialBenefits": "High availability, more storage, higher request quota.", "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationResourceType": "Microsoft.AppConfiguration/configurationStores", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Avoid combining Traffic Manager and Front Door\r\nresources\r\n| where type == \"microsoft.network/trafficmanagerprofiles\"\r\n| mvexpand(properties.endpoints)\r\n| extend endpoint=tostring(properties_endpoints.properties.target)\r\n| project name, trafficmanager=id, matchname=endpoint, tags\r\n| join (\r\n resources\r\n | where type =~ \"microsoft.cdn/profiles/afdendpoints\"\r\n | extend matchname= tostring(properties.hostName)\r\n | extend splitid=split(id, \"/\")\r\n | extend frontdoorid=tolower(strcat_array(array_slice(splitid, 0, 8), \"/\"))\r\n | project name, id, matchname, frontdoorid, type\r\n | union\r\n (cdnresources\r\n | where type =~ \"Microsoft.Cdn/Profiles/CustomDomains\"\r\n | extend matchname= tostring(properties.hostName)\r\n | extend splitid=split(id, \"/\")\r\n | extend frontdoorid=tolower(strcat_array(array_slice(splitid, 0, 8), \"/\"))\r\n | project name, id, matchname, frontdoorid, type)\r\n )\r\n on matchname\r\n| project\r\n recommendationId = \"9437634c-d69e-2747-b13e-631c13182150\",\r\n name=split(trafficmanager, \"/\")[-1],\r\n id=trafficmanager,\r\n tags,\r\n param1=strcat(\"hostname:\", matchname),\r\n param2=strcat(\"frontdoorid:\", frontdoorid)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "6c40b7ae-2bea-5748-be1a-9e9e3b834649", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/origin-security?tabs=app-service-functions&pivots=front-door-standard-premium", - "name": "Secure traffic to Azure Front Door origins" - } - ], - "recommendationControl": "Security", - "longDescription": "Front Door's features perform optimally when traffic exclusively comes through Front Door. It's advised to set up your origin to deny access to traffic that bypasses Front Door.\n", - "pgVerified": true, - "description": "Restrict traffic to your origins", - "potentialBenefits": "Enhances security and performance", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "52bc9a7b-23c8-bc4c-9d2a-7bc43b50104a", - "recommendationTypeId": "e607041e-3194-42ad-9994-b6ea5ec12f5e", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/rest/api/frontdoor/", - "name": "REST API Reference" - }, - { - "url": "https://learn.microsoft.com/java/api/overview/azure/resourcemanager-frontdoor-readme?view=azure-java-preview", - "name": "Client library for Java" - }, - { - "url": "https://learn.microsoft.com/python/api/overview/azure/front-door?view=azure-python", - "name": "SDK for Python" - } - ], - "recommendationControl": "Scalability", - "longDescription": "When working with Azure Front Door through APIs, ARM templates, Bicep, or SDKs, using the latest API or SDK version is crucial. Updates bring new functions, important security patches, and bug fixes.\n", - "pgVerified": true, - "description": "Use the latest API version and SDK version", - "potentialBenefits": "Enhanced security and features", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1ad74c3c-e3d7-0046-b83f-a2199974ef15", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/front-door-diagnostics?pivots=front-door-standard-premium", - "name": "Monitor metrics and logs in Azure Front Door" - }, - { - "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-monitor?pivots=front-door-standard-premium#waf-logs", - "name": "WAF logs" - }, - { - "url": "https://learn.microsoft.com/azure/frontdoor/standard-premium/how-to-logs", - "name": "Configure Azure Front Door logs" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Front Door logs offer comprehensive telemetry on each request, crucial for understanding your solution's performance and responses, especially when caching is enabled, as origin servers might not receive every request.\n", - "pgVerified": true, - "description": "Configure logs", - "potentialBenefits": "Enhanced insights and solution monitoring", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Upgrade to App Configuration Standard tier\nresources\n| where type =~ \"Microsoft.AppConfiguration/configurationStores\"\n| where sku.name == \"free\"\n| project recommendationId = \"2102a57a-a056-4d5e-afe5-9df9f92177ca\", name, id, tags, param1 = \"Upgrade to Standard SKU\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d9bd6780-0d6f-cd4c-bc66-8ddcab12f3d1", - "recommendationTypeId": null, + "aprlGuid": "1cca00d2-d9ab-8e42-a788-5d40f49405cb", + "recommendationTypeId": "78211c00-15a9-336e-17c4-0b48613dadf4", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/frontdoor/end-to-end-tls?pivots=front-door-standard-premium", - "name": "End-to-end TLS with Azure Front Door" + "url": "https://learn.microsoft.com/azure/key-vault/general/soft-delete-overview", + "name": "Azure Key Vault soft-delete overview" } ], - "recommendationControl": "Security", - "longDescription": "Front Door terminates TCP and TLS connections from clients and establishes new connections from each PoP to the origin. Securing these connections with TLS, even for Azure-hosted origins, ensures data is always encrypted during transit.\n", - "pgVerified": true, - "description": "Use end-to-end TLS", - "potentialBenefits": "Ensures data encryption in transit", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Key Vault's soft-delete feature enables recovery of deleted vaults and objects like keys, secrets, and certificates. When enabled, marked resources are retained for 90 days, allowing for their recovery, essentially undoing deletion.\n", + "pgVerified": false, + "description": "Key vaults should have soft delete enabled", + "potentialBenefits": "Enables recovery of deleted items", "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationResourceType": "Microsoft.KeyVault/vaults", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Use end-to-end TLS\r\ncdnresources\r\n| where type == \"microsoft.cdn/profiles/afdendpoints/routes\"\r\n| extend forwardingProtocol=tostring(properties.forwardingProtocol),supportedProtocols=properties.supportedProtocols\r\n| project id,name,forwardingProtocol,supportedProtocols,tags\r\n| where forwardingProtocol !~ \"httpsonly\" or supportedProtocols has \"http\"\r\n| project recommendationId= \"d9bd6780-0d6f-cd4c-bc66-8ddcab12f3d1\", name,id,tags,param1=strcat(\"forwardingProtocol:\",forwardingProtocol),param2=strcat(\"supportedProtocols:\",supportedProtocols)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Key Vaults that do not have soft delete enabled.\nresources\n| where type == \"microsoft.keyvault/vaults\"\n| where isnull(properties.enableSoftDelete) or properties.enableSoftDelete != \"true\"\n| project recommendationId = \"1cca00d2-d9ab-8e42-a788-5d40f49405cb\", name, id, tags, param1 = \"EnableSoftDelete: Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "24ab9f11-a3e4-3043-a985-22cf94c4933a", - "recommendationTypeId": null, + "aprlGuid": "70fcfe6d-00e9-5544-a63a-fff42b9f2edb", + "recommendationTypeId": "4ed62ae4-5072-f9e7-8d94-51c76c48159a", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/frontdoor/front-door-how-to-redirect-https#create-http-to-https-redirect-rule", - "name": "Create HTTP to HTTPS redirect rule" + "url": "https://learn.microsoft.com/azure/key-vault/general/soft-delete-overview#purge-protection", + "name": "Azure Key Vault purge-protection overview" } ], - "recommendationControl": "Security", - "longDescription": "Using HTTPS is ideal for secure connections. However, for compatibility with older clients, HTTP requests may be necessary. Azure Front Door enables auto redirection of HTTP to HTTPS, enhancing security without sacrificing accessibility.\n", - "pgVerified": true, - "description": "Use HTTP to HTTPS redirection", - "potentialBenefits": "Enhances security and compliance", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Purge protection secures against malicious deletions by enforcing a retention period for soft deleted key vaults, ensuring no one, not even insiders or Microsoft, can purge your key vaults during this period, preventing permanent data loss.\n", + "pgVerified": false, + "description": "Key vaults should have purge protection enabled", + "potentialBenefits": "Protects from insider attacks, avoids data loss", "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.KeyVault/vaults", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Use HTTP to HTTPS redirection\r\ncdnresources\r\n| where type == \"microsoft.cdn/profiles/afdendpoints/routes\"\r\n| extend httpsRedirect=tostring(properties.httpsRedirect)\r\n| project id,name,httpsRedirect,tags\r\n| where httpsRedirect !~ \"enabled\"\r\n| project recommendationId= \"24ab9f11-a3e4-3043-a985-22cf94c4933a\", name,id,tags,param1=strcat(\"httpsRedirect:\",httpsRedirect)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// This resource graph query will return all Key Vaults that do not have Purge Protection enabled.\nresources\n| where type == \"microsoft.keyvault/vaults\"\n| where isnull(properties.enablePurgeProtection) or properties.enablePurgeProtection != \"true\"\n| project recommendationId = \"70fcfe6d-00e9-5544-a63a-fff42b9f2edb\", name, id, tags, param1 = \"EnablePurgeProtection: Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "29d65c41-2fad-d142-95eb-9eab95f6c0a5", - "recommendationTypeId": null, + "aprlGuid": "00c3d2b0-ea6e-4c4b-89be-b78a35caeb51", + "recommendationTypeId": "2e96bc2f-1972-e471-9e70-ae58d41e9d2a", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/frontdoor/standard-premium/how-to-configure-https-custom-domain?tabs=powershell", - "name": "Configure HTTPS on an Azure Front Door custom domain using the Azure portal" + "url": "https://learn.microsoft.com/azure/key-vault/general/security-features#network-security", + "name": "Azure Key Vault Private Link Service overview" } ], "recommendationControl": "Security", - "longDescription": "When Front Door manages your TLS certificates, it reduces your operational costs and helps you to avoid costly outages caused by forgetting to renew a certificate. Front Door automatically issues and rotates the managed TLS certificates.\n", - "pgVerified": true, - "description": "Use managed TLS certificates", - "potentialBenefits": "Lowers costs, avoids outages", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "4638c2c0-03de-6d42-9e09-82ee4478cbf3", - "recommendationTypeId": "2c057605-4707-4d3e-bbb0-a7fe9b6a626b", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/standard-premium/how-to-configure-https-custom-domain?tabs=powershell#select-the-certificate-for-azure-front-door-to-deploy", - "name": "Select the certificate for Azure Front Door to deploy" - } - ], - "recommendationControl": "High Availability", - "longDescription": "If you use your own TLS certificates, set the Key Vault certificate version to 'Latest' to avoid reconfiguring Azure Front Door for new certificate versions and waiting for deployment across Front Door's environments.\n", - "pgVerified": true, - "description": "Use latest version for customer-managed certificates", - "potentialBenefits": "Saves time and automates TLS updates", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "cd6a32af-747a-e649-82a7-a98f528ca842", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/architecture/best-practices/host-name-preservation", - "name": "Preserve the original HTTP host name between a reverse proxy and its back-end web application" - } - ], - "recommendationControl": "Governance", - "longDescription": "Front Door can rewrite Host headers for custom domain names routing to a single origin, useful for avoiding custom domain configuration at both Front Door and the origin.\n", - "pgVerified": true, - "description": "Use the same domain name on Front Door and your origin", - "potentialBenefits": "Improves session/auth handling", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1bd2b7e8-400f-e64a-99a2-c572f7b08a62", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/web-application-firewall", - "name": "Web Application Firewall on Azure Front Door" - } - ], - "recommendationControl": "Security", - "longDescription": "For internet-facing applications, enabling the Front Door web application firewall (WAF) and configuring it to use managed rules is recommended for protection against a wide range of attacks using Microsoft-managed rules.\n", - "pgVerified": true, - "description": "Enable the WAF", - "potentialBenefits": "Enhances web app security", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Enable the WAF\r\n\r\nresources\r\n| where type =~ \"microsoft.cdn/profiles\" and sku has \"AzureFrontDoor\"\r\n| project name, cdnprofileid=tolower(id), tostring(tags), resourceGroup, subscriptionId,skuname=tostring(sku.name)\r\n| join kind= fullouter (\r\n cdnresources\r\n | where type == \"microsoft.cdn/profiles/securitypolicies\"\r\n | extend wafpolicyid=tostring(properties['parameters']['wafPolicy']['id'])\r\n | extend splitid=split(id, \"/\")\r\n | extend cdnprofileid=tolower(strcat_array(array_slice(splitid, 0, 8), \"/\"))\r\n | project secpolname=name, cdnprofileid, wafpolicyid\r\n )\r\n on cdnprofileid\r\n| project name, cdnprofileid, secpolname, wafpolicyid,skuname\r\n| join kind = fullouter (\r\n resources\r\n | where type == \"microsoft.network/frontdoorwebapplicationfirewallpolicies\"\r\n | extend\r\n managedrulesenabled=iff(tostring(properties.managedRules.managedRuleSets) != \"[]\", true, false),\r\n enabledState = tostring(properties.policySettings.enabledState)\r\n | project afdwafname=name, managedrulesenabled, wafpolicyid=id, enabledState, tostring(tags)\r\n )\r\n on wafpolicyid\r\n| where name != \"\"\r\n| summarize\r\n associatedsecuritypolicies=countif(secpolname != \"\"),\r\n wafswithmanagedrules=countif(managedrulesenabled == 1)\r\n by name, id=cdnprofileid, tags,skuname\r\n| where associatedsecuritypolicies == 0 or wafswithmanagedrules == 0\r\n| project\r\n recommendationId = \"1bd2b7e8-400f-e64a-99a2-c572f7b08a62\",\r\n name,\r\n id,\r\n todynamic(tags),\r\n param1 = strcat(\"associatedsecuritypolicies:\", associatedsecuritypolicies),\r\n param2 = strcat(\"wafswithmanagedrules:\", wafswithmanagedrules),\r\n param3 = strcat(\"skuname:\",skuname)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "38f3d542-6de6-a44b-86c6-97e3be690281", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/health-probes", - "name": "Health probes" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Front Door health probes help detect unavailable or unhealthy origins, directing traffic to alternate origins if needed.\n", - "pgVerified": true, - "description": "Disable health probes when there is only one origin in an origin group", - "potentialBenefits": "Reduces unnecessary origin traffic", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Disable health probes when there is only one origin in an origin group\r\ncdnresources\r\n| where type =~ \"microsoft.cdn/profiles/origingroups\"\r\n| extend healthprobe=tostring(properties.healthProbeSettings)\r\n| project origingroupname=name, id, tags, resourceGroup, subscriptionId, healthprobe\r\n| join (\r\n cdnresources\r\n | where type =~ \"microsoft.cdn/profiles/origingroups/Origins\"\r\n | extend origingroupname = tostring(properties.originGroupName)\r\n )\r\n on origingroupname\r\n| summarize origincount=count(), enabledhealthprobecount=countif(healthprobe != \"\") by origingroupname, id, tostring(tags), resourceGroup, subscriptionId\r\n| where origincount == 1 and enabledhealthprobecount != 0\r\n| project\r\n recommendationId = \"38f3d542-6de6-a44b-86c6-97e3be690281\",\r\n name=origingroupname,\r\n id,\r\n todynamic(tags),\r\n param1 = strcat(\"origincount:\", origincount),\r\n param2 = strcat(\"enabledhealthprobecount:\", enabledhealthprobecount)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "5225bba3-28ec-1e43-8986-7eedfd466d65", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/architecture/patterns/health-endpoint-monitoring", - "name": "Health Endpoint Monitoring pattern" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Consider selecting a webpage or location specifically designed for health monitoring as the endpoint for Azure Front Door's health probes. This should encompass the status of critical components like application servers, databases, and caches to serve production traffic efficiently.\n", - "pgVerified": true, - "description": "Select good health probe endpoints", - "potentialBenefits": "Improves traffic routing and uptime", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "5783defe-b49e-d947-84f7-d8677593f324", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/health-probes#supported-http-methods-for-health-probes", - "name": "Supported HTTP methods for health probes" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Health probes in Azure Front Door can use GET or HEAD HTTP methods. Using the HEAD method for health probes is a recommended practice because it reduces the traffic load on your origins, being less resource-intensive.\n", - "pgVerified": true, - "description": "Use HEAD health probes", - "potentialBenefits": "Reduces traffic load on origins", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b515690d-3bf9-3a49-8d38-188e0fd45896", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-geo-filtering", - "name": "Geo filter WAF policy - GeoMatch" - } - ], - "recommendationControl": "Security", - "longDescription": "Azure Front Door's geo-filtering through WAF enables defining custom access rules by country/region to restrict or allow web app access.\n", - "pgVerified": true, - "description": "Use geo-filtering in Azure Front Door", - "potentialBenefits": "Enhanced regional access control", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1cfe7834-56ec-ff41-b11d-993734705dba", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/frontdoor/private-link", - "name": "Private link for Azure Front Door" - } - ], - "recommendationControl": "Security", - "longDescription": "Azure Private Link enables secure access to Azure PaaS and services over a private endpoint in your virtual network, ensuring traffic goes over the Microsoft backbone network, not the public internet.\n", - "pgVerified": true, - "description": "Secure your Origin with Private Link in Azure Front Door", - "potentialBenefits": "Enhanced security and private connectivity", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "82fa3cff-74bd-4063-b726-834f160592fa", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/frontdoor/understanding-pricing", - "name": "Compare pricing between Azure Front Door tiers" - } - ], - "recommendationControl": "Service Upgrade and Retirement", - "longDescription": "Azure Front Door standard is ~45% cheaper then AFD classic and has many additional benefits. Classic is also scheduled to be retired on March 31, 2027.\n", - "pgVerified": false, - "description": "Avoid using Classic Azure Front Door", - "potentialBenefits": "Costs savings and additional supported features", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Cdn/profiles", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "//under-development\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "d6d9e18a-9ad2-491e-878d-86d621785453", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/ai-services/diagnostic-logging", - "name": "Enable diagnostic logging for Azure AI services" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "All Logs and Metrics should be configured. These logs provide rich, frequent data about the operation of a resource that are used for issue identification and debugging.\n", - "pgVerified": false, - "description": "Enable diagnostic logging for Azure AI services and send the data to Log Analytics", - "potentialBenefits": "Enhanced monitoring and troubleshooting capabilities", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.CognitiveServices/Accounts", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": null - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b49a39fd-f431-4b61-9062-f2157849d845", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#best-practices", - "name": "Compute Gallery best practices" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Keeping a minimum of 3 replicas for production images in Azure's Compute Gallery ensures scalability and prevents throttling in multi-VM deployments by distributing VM deployments across different replicas. This reduces the risk of overloading a single replica.\n", - "pgVerified": true, - "description": "A minimum of three replicas should be kept for production image versions", - "potentialBenefits": "Enhances scalability and avoids throttling", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/galleries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to list all image versions,its associated image name and version replica configurations per region in a compute gallery whose version replicas is less than 3\r\nresources\r\n| where type =~ \"microsoft.compute/galleries/images/versions\"\r\n| extend GalleryName = tostring(split(tostring(id), \"/\")[8]), ImageName = tostring(split(tostring(id), \"/\")[10])\r\n| mv-expand VersionReplicas = properties.publishingProfile.targetRegions\r\n| project RecommendationId=\"b49a39fd-f431-4b61-9062-f2157849d845\",name,id,tags,param1=strcat(\"GalleryName: \",GalleryName),param2=strcat(\"ImageName: \",ImageName),param3=strcat(\"VersionReplicaRegionName: \",VersionReplicas.name),param4=strcat(\"VersionReplicationCount: \",VersionReplicas.regionalReplicaCount),rc=toint(VersionReplicas.regionalReplicaCount)\r\n| where rc < 3\r\n| project-away rc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "488dcc8b-f2e3-40ce-bf95-73deb2db095f", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#best-practices", - "name": "Compute Gallery best practices" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/storage/common/storage-redundancy#zone-redundant-storage", - "name": "Zone-redundant storage" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Use ZRS for high availability when creating image/VM versions in Azure Compute Gallery, offering resilience against Availability Zone failures. ZRS accounts are advisable in regions with Availability Zones, with the choice of Standard_ZRS recommended over Standard_LRS for these regions.\n", - "pgVerified": true, - "description": "Zone redundant storage should be used for image versions", - "potentialBenefits": "Enhances image version availability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/galleries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to list all image versions and its associated image and gallery name whose Storage account type is not using ZRS\r\nresources\r\n| where type =~ \"microsoft.compute/galleries/images/versions\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| extend GalleryName = tostring(split(tostring(id), \"/\")[8]), ImageName = tostring(split(tostring(id), \"/\")[10])\r\n| extend StorageAccountType = tostring(properties.publishingProfile.storageAccountType)\r\n| where StorageAccountType !has \"ZRS\"\r\n| project RecommendationId=\"488dcc8b-f2e3-40ce-bf95-73deb2db095f\",name,id,tags,param1=strcat(\"GalleryName: \",GalleryName),param2=strcat(\"ImageName: \",ImageName),param3=strcat(\"StorageAccountType: \",StorageAccountType)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1c5e1e58-4e56-491c-8529-10f37af9d4ed", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#best-practices", - "name": "Compute Gallery best practices" - }, - { - "url": "https://learn.microsoft.com/en-us/windows-server/virtualization/hyper-v/plan/should-i-create-a-generation-1-or-2-virtual-machine-in-hyper-v", - "name": "Generation 1 vs Generation 2 in Hyper-V" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/shared-image-galleries?tabs=azure-cli", - "name": "Images in Compute gallery" - } - ], - "recommendationControl": "High Availability", - "longDescription": "We recommend creating Trusted Launch Supported Images for benefits like Secure Boot, vTPM, trusted launch VMs, large boot volume. These are Gen 2 Images by default and you cannot change a VM's generation after creation, so review the considerations first.\n", - "pgVerified": true, - "description": "Consider creating TrustedLaunchSupported images where possible", - "potentialBenefits": "Enhances VM security and features", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/galleries", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to list all images whose Hyper-V generation is not V2\r\nresources\r\n| where type =~ \"microsoft.compute/galleries/images\"\r\n| extend VMGeneration = properties.hyperVGeneration\r\n| where VMGeneration <> 'V2'\r\n| project RecommendationId=\"1c5e1e58-4e56-491c-8529-10f37af9d4ed\",name,id,tags,param1=strcat(\"VMGeneration: \",VMGeneration)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b14ee8ed-7d27-447b-b6fb-6472cb5f4b75", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/azure-compute-gallery#replication", - "name": "Compute Gallery Replication" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "On multi-region deployments, replicate Image Versions to a secondary region to ensure disaster recovery capability. This ensures that the Image Versions are available in the secondary region in case of a disaster in the primary region.\n", - "pgVerified": true, - "description": "Create Image Versions replicas in secondary region", - "potentialBenefits": "Enhances disaster recovery capability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/galleries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// under-development\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b3c3ba1d-7de6-442d-8c50-023330fbf765", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#scaling", - "name": "Compute Gallery Scaling" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "You can set a different replica count in each target region, based on the scale needs for the region. For every 20 VMs that you create concurrently, we recommend you keep one replica.\n", - "pgVerified": true, - "description": "Configure Image version replica count per region.", - "potentialBenefits": "Enhances disaster recovery capability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/galleries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// under-development\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "273f6b30-68e0-4241-85ea-acf15ffb60bf", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-orchestration-modes#what-has-changed-with-flexible-orchestration-mode", - "name": "What has changed with Flexible orchestration mode" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-attach-detach-vm?branch=main&tabs=portal-1%2Cportal-2%2Cportal-3", - "name": "Attach or detach a Virtual Machine to or from a Virtual Machine Scale Set" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Production VM workloads should be deployed on multiple VMs and grouped in a VMSS Flex instance to intelligently distribute across the platform, minimizing the impact of platform faults and updates.\n", - "pgVerified": true, - "description": "Run production workloads on two or more VMs using VMSS Flex", - "potentialBenefits": "Enhanced fault/update resilience", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that are not associated with a VMSS Flex instance\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnull(properties.virtualMachineScaleSet.id)\r\n| project recommendationId=\"273f6b30-68e0-4241-85ea-acf15ffb60bf\", name, id, tags\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "2bd0be95-a825-6f47-a8c6-3db1fb5eb387", - "recommendationTypeId": "066a047a-9ace-45f4-ac50-6325840a6b00", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/create-portal-availability-zone?tabs=standard", - "name": "Create virtual machines in an availability zone using the Azure portal" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Azure Availability Zones, within each Azure region, are tolerant to local failures, protecting applications and data against unlikely Datacenter failures by being physically separate.\n", - "pgVerified": true, - "description": "Deploy VMs across Availability Zones", - "potentialBenefits": "Enhanced VM resilience to failures", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that are not assigned to a Zone\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where isnull(zones)\r\n| project recommendationId=\"2bd0be95-a825-6f47-a8c6-3db1fb5eb387\", name, id, tags, param1=\"No Zone\"\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "a8d25876-7951-b646-b4e8-880c9031596b", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/flexible-virtual-machine-scale-sets-migration-resources", - "name": "Migrate deployments and resources to Virtual Machine Scale Sets in Flexible orchestration" - } - ], - "recommendationControl": "High Availability", - "longDescription": "While availability sets are not scheduled for immediate deprecation, they are planned to be deprecated in the future. Migrate workloads from VMs to VMSS Flex for deployment across zones or within the same zone across different fault domains (FDs) for better reliability.\n", - "pgVerified": true, - "description": "Migrate VMs using availability sets to VMSS Flex", - "potentialBenefits": "Enhances reliability and future-proofs VMs", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs using Availability Sets\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnotnull(properties.availabilitySet)\r\n| project recommendationId = \"a8d25876-7951-b646-b4e8-880c9031596b\", name, id, tags, param1=strcat(\"availabilitySet: \",properties.availabilitySet.id)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "cfe22a65-b1db-fd41-9e8e-d573922709ae", - "recommendationTypeId": "ed651749-cd37-4fd5-9897-01b416926745", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#virtual-machines", - "name": "Resiliency checklist for Virtual Machines" - }, - { - "url": "https://learn.microsoft.com/azure/site-recovery/site-recovery-test-failover-to-azure", - "name": "Run a test failover (disaster recovery drill) to Azure" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Replicating Azure VMs via Site Recovery entails continuous, asynchronous disk replication to a target region. Recovery points are generated every few minutes, ensuring a Recovery Point Objective (RPO) in minutes.\n", - "pgVerified": true, - "description": "Replicate VMs using Azure Site Recovery", - "potentialBenefits": "Minimize downtime in disasters", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that do NOT have replication with ASR enabled\r\nresources\r\n| where type =~ \"Microsoft.Compute/virtualMachines\"\r\n| extend securityType = iif(isnull(properties.securityProfile.securityType), \"Standard\", properties.securityProfile.securityType)\r\n| where securityType !in~ (\"TrustedLaunch\", \"ConfidentialVM\")\r\n| project id, vmIdForJoin = tolower(id), name, tags\r\n| join kind = leftouter (\r\n recoveryservicesresources\r\n | where type =~ \"Microsoft.RecoveryServices/vaults/replicationFabrics/replicationProtectionContainers/replicationProtectedItems\"\r\n and properties.providerSpecificDetails.dataSourceInfo.datasourceType =~ \"AzureVm\"\r\n | project vmResourceId = tolower(properties.providerSpecificDetails.dataSourceInfo.resourceId)\r\n )\r\n on $left.vmIdForJoin == $right.vmResourceId\r\n| where isempty(vmResourceId)\r\n| project recommendationId = \"cfe22a65-b1db-fd41-9e8e-d573922709ae\", name, id, tags\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "122d11d7-b91f-8747-a562-f56b79bcfbdc", - "recommendationTypeId": "57ecb3cd-f2b4-4cad-8b3a-232cca527a0b", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/unmanaged-disks-deprecation", - "name": "Migrate your Azure unmanaged disks by Sep 30, 2025" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/windows/convert-unmanaged-to-managed-disks", - "name": "Migrate Windows VM from unmanaged disks to managed disks" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/linux/convert-unmanaged-to-managed-disks", - "name": "Migrate Linux VM from unmanaged disks to managed disks" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Azure is retiring unmanaged disks on September 30, 2025. Users should plan the migration to avoid disruptions and maintain service reliability.\n", - "pgVerified": true, - "description": "Use Managed Disks for VM disks", - "potentialBenefits": "Avoid retirement disruption, enhance reliability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that are not using Managed Disks\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnull(properties.storageProfile.osDisk.managedDisk)\r\n| project recommendationId = \"122d11d7-b91f-8747-a562-f56b79bcfbdc\", name, id, tags\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "4ea2878f-0d69-8d4a-b715-afc10d1e538e", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/managed-disks-overview#data-disk", - "name": "Introduction to Azure managed disks - Data disks" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-types", - "name": "Azure managed disk types" - } - ], - "recommendationControl": "Scalability", - "longDescription": "A data disk is a managed disk attached to a virtual machine for storing database or other essential data. These disks are SCSI drives labeled as per choice.\n", - "pgVerified": true, - "description": "Host database data on a data disk", - "potentialBenefits": "Enhances performance, recovery, migration flexibility", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that only have OS Disk\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where array_length(properties.storageProfile.dataDisks) < 1\r\n| project recommendationId = \"4ea2878f-0d69-8d4a-b715-afc10d1e538e\", name, id, tags\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1981f704-97b9-b645-9c57-33f8ded9261a", - "recommendationTypeId": "651c7925-17a3-42e5-85cd-73bd095cf27f", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/backup/backup-overview", - "name": "What is the Azure Backup service?" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Enable backups for your virtual machines with Azure Backup to secure and quickly recover your data. This service offers simple, secure, and cost-effective solutions for backing up and recovering data from the Microsoft Azure cloud.\n", - "pgVerified": true, - "description": "Backup VMs with Azure Backup service", - "potentialBenefits": "Secure data recovery and backup", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that do NOT have Backup enabled\r\n// Run query to see results.\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| project name, id, tags\r\n| join kind=leftouter (\r\n recoveryservicesresources\r\n | where type =~ 'Microsoft.RecoveryServices/vaults/backupFabrics/protectionContainers/protectedItems'\r\n | where properties.dataSourceInfo.datasourceType =~ 'Microsoft.Compute/virtualMachines'\r\n | project idBackupEnabled=properties.sourceResourceId\r\n | extend name=strcat_array(array_slice(split(idBackupEnabled, '/'), 8, -1), '/')\r\n) on name\r\n| where isnull(idBackupEnabled)\r\n| project-away idBackupEnabled\r\n| project-away name1\r\n| project recommendationId = \"1981f704-97b9-b645-9c57-33f8ded9261a\", name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "98b334c0-8578-6046-9e43-b6e8fce6318e", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/states-billing?context=%2Ftroubleshoot%2Fazure%2Fvirtual-machines%2Fcontext%2Fcontext#power-states-and-billing", - "name": "States and billing status of Azure Virtual Machines" - } - ], - "recommendationControl": "Governance", - "longDescription": "Azure Virtual Machines (VM) instances have various states, like provisioning and power states. A non-running VM may indicate issues or it being unnecessary, suggesting removal could help cut costs.\n", - "pgVerified": true, - "description": "Review VMs in stopped state", - "potentialBenefits": "Reduce costs by removing unused VMs", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that are NOT running\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where properties.extended.instanceView.powerState.displayStatus != 'VM running'\r\n| project recommendationId = \"98b334c0-8578-6046-9e43-b6e8fce6318e\", name, id, tags\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "dfedbeb1-1519-fc47-86a5-52f96cf07105", - "recommendationTypeId": "3a3c1a2a-8597-4d3a-981a-0a24a0ee9de4", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-network/accelerated-networking-overview", - "name": "Accelerated Networking (AccelNet) overview" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Accelerated networking enables SR-IOV to a VM, greatly improving its networking performance by bypassing the host from the data path, which reduces latency, jitter, and CPU utilization for demanding network workloads on supported VM types.\n", - "pgVerified": true, - "description": "Enable Accelerated Networking (AccelNet)", - "potentialBenefits": "Reduces latency, jitter and CPU use", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VM NICs that do not have Accelerated Networking enabled\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| mv-expand nic = properties.networkProfile.networkInterfaces\r\n| project name, id, tags, lowerCaseNicId = tolower(nic.id), vmSize = tostring(properties.hardwareProfile.vmSize)\r\n| join kind = inner (\r\n resources\r\n | where type =~ 'Microsoft.Network/networkInterfaces'\r\n | where properties.enableAcceleratedNetworking == false\r\n | project nicName = split(id, \"/\")[8], lowerCaseNicId = tolower(id)\r\n )\r\n on lowerCaseNicId\r\n| summarize nicNames = make_set(nicName) by name, id, tostring(tags), vmSize\r\n| extend param1 = strcat(\"NicName: \", strcat_array(nicNames, \", \")), param2 = strcat(\"VMSize: \", vmSize)\r\n| project recommendationId = \"dfedbeb1-1519-fc47-86a5-52f96cf07105\", name, id, tags, param1, param2\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "73d1bb04-7d3e-0d47-bc0d-63afe773b5fe", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-network/accelerated-networking-overview", - "name": "Accelerated Networking (AccelNet) overview" - } - ], - "recommendationControl": "Governance", - "longDescription": "When Accelerated Networking is enabled, the default Azure VNet interface in GuestOS is swapped for a Mellanox, and its driver comes from a 3rd party. Marketplace images have the latest Mellanox drivers, but post-deployment, updating the driver is the user's responsibility.\n", - "pgVerified": true, - "description": "When AccelNet is enabled, you must manually update the GuestOS NIC driver", - "potentialBenefits": "Enhanced VM network efficiency", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1f629a30-c9d0-d241-82ee-6f2eb9d42cb4", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/load-balancer/load-balancer-outbound-connections", - "name": "Use Source Network Address Translation (SNAT) for outbound connections" - } - ], - "recommendationControl": "Security", - "longDescription": "For outbound internet connectivity of Virtual Machines, using NAT Gateway or Azure Firewall is recommended to enhance security and service resilience, thanks to their higher availability and SNAT ports.\n", - "pgVerified": true, - "description": "VMs should not have a Public IP directly associated", - "potentialBenefits": "Enhanced security and service resiliency", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs with PublicIPs directly associated with them\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnotnull(properties.networkProfile.networkInterfaces)\r\n| mv-expand nic=properties.networkProfile.networkInterfaces\r\n| project name, id, tags, nicId = nic.id\r\n| extend nicId = tostring(nicId)\r\n| join kind=inner (\r\n Resources\r\n | where type =~ 'Microsoft.Network/networkInterfaces'\r\n | where isnotnull(properties.ipConfigurations)\r\n | mv-expand ipconfig=properties.ipConfigurations\r\n | extend publicIp = tostring(ipconfig.properties.publicIPAddress.id)\r\n | where publicIp != \"\"\r\n | project name, nicId = tostring(id), publicIp\r\n) on nicId\r\n| project recommendationId = \"1f629a30-c9d0-d241-82ee-6f2eb9d42cb4\", name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "82b3cf6b-9ae2-2e44-b193-10793213f676", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-network/network-security-group-how-it-works#intra-subnet-traffic", - "name": "How network security groups filter network traffic" - } - ], - "recommendationControl": "Security", - "longDescription": "Unless you have a specific reason, it's advised to associate a network security group to a subnet or a network interface, but not both, to avoid unexpected communication issues and troubleshooting due to potential rule conflicts between the two associations.\n", - "pgVerified": true, - "description": "VM network interfaces and associated subnets both have a Network Security Group associated", - "potentialBenefits": "Reduces communication problems", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of virtual machines and associated NICs that do have an NSG associated to them and also an NSG associated to the subnet.\r\nResources\r\n| where type =~ 'Microsoft.Network/networkInterfaces'\r\n| where isnotnull(properties.networkSecurityGroup)\r\n| mv-expand ipConfigurations = properties.ipConfigurations, nsg = properties.networkSecurityGroup\r\n| project nicId = tostring(id), subnetId = tostring(ipConfigurations.properties.subnet.id), nsgName=split(nsg.id, '/')[8]\r\n| parse kind=regex subnetId with '/virtualNetworks/' virtualNetwork '/subnets/' subnet\r\n | join kind=inner (\r\n Resources\r\n | where type =~ 'Microsoft.Network/NetworkSecurityGroups' and isnotnull(properties.subnets)\r\n | project name, resourceGroup, subnet=properties.subnets\r\n | mv-expand subnet\r\n | project subnetId=tostring(subnet.id)\r\n ) on subnetId\r\n | project nicId\r\n| join kind=leftouter (\r\n Resources\r\n | where type =~ 'Microsoft.Compute/virtualMachines'\r\n | where isnotnull(properties.networkProfile.networkInterfaces)\r\n | mv-expand nic=properties.networkProfile.networkInterfaces\r\n | project vmName = name, vmId = id, tags, nicId = nic.id, nicName=split(nic.id, '/')[8]\r\n | extend nicId = tostring(nicId)\r\n) on nicId\r\n| project recommendationId = \"82b3cf6b-9ae2-2e44-b193-10793213f676\", name=vmName, id = vmId, tags, param1 = strcat(\"nic-name=\", nicName)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "41a22a5e-5e08-9647-92d0-2ffe9ef1bdad", - "recommendationTypeId": "c3b51c94-588b-426b-a892-24696f9e54cc", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-network/virtual-network-network-interface?tabs=network-interface-portal#enable-or-disable-ip-forwarding", - "name": "Enable or disable IP forwarding" - } - ], - "recommendationControl": "Security", - "longDescription": "IP forwarding allows a virtual machine network interface to receive and send network traffic not destined for or originating from its assigned IP addresses.\n", - "pgVerified": true, - "description": "IP Forwarding should only be enabled for Network Virtual Appliances", - "potentialBenefits": "Enhances network appliance function", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VM NICs that have IPForwarding enabled. This feature is usually only required for Network Virtual Appliances\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnotnull(properties.networkProfile.networkInterfaces)\r\n| mv-expand nic=properties.networkProfile.networkInterfaces\r\n| project name, id, tags, nicId = nic.id\r\n| extend nicId = tostring(nicId)\r\n| join kind=inner (\r\n Resources\r\n | where type =~ 'Microsoft.Network/networkInterfaces'\r\n | where properties.enableIPForwarding == true\r\n | project nicId = tostring(id)\r\n) on nicId\r\n| project recommendationId = \"41a22a5e-5e08-9647-92d0-2ffe9ef1bdad\", name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1cf8fe21-9593-1e4e-966b-779a294c0d30", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-network/virtual-networks-name-resolution-for-vms-and-role-instances", - "name": "Name resolution for resources in Azure virtual networks" - } - ], - "recommendationControl": "Other Best Practices", - "longDescription": "Configure the DNS Server at the Virtual Network level to prevent any inconsistency across the environment.\n", - "pgVerified": true, - "description": "Customer DNS Servers should be configured in the Virtual Network level", - "potentialBenefits": "Ensures DNS consistency", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VM NICs that have DNS Server settings configured in any of the NICs\r\nResources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnotnull(properties.networkProfile.networkInterfaces)\r\n| mv-expand nic=properties.networkProfile.networkInterfaces\r\n| project name, id, tags, nicId = nic.id\r\n| extend nicId = tostring(nicId)\r\n| join kind=inner (\r\n Resources\r\n | where type =~ 'Microsoft.Network/networkInterfaces'\r\n | project name, id, dnsServers = properties.dnsSettings.dnsServers\r\n | extend hasDns = array_length(dnsServers) >= 1\r\n | where hasDns != 0\r\n | project name, nicId = tostring(id)\r\n) on nicId\r\n| project recommendationId = \"1cf8fe21-9593-1e4e-966b-779a294c0d30\", name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "3263a64a-c256-de48-9818-afd3cbc55c2a", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-shared", - "name": "Azure Shared Disk Introduction" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-shared-enable?tabs=azure-portal", - "name": "Enable Shared Disks" - } - ], - "recommendationControl": "Other Best Practices", - "longDescription": "Azure shared disks let you attach a disk to multiple VMs at once for deploying or migrating clustered applications, suitable only when a disk is shared among VM cluster members.\n", - "pgVerified": true, - "description": "Shared disks should only be enabled in clustered servers", - "potentialBenefits": "Enhances clustered server performance", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Disks configured to be Shared. This is not an indication of an issue, but if a disk with this configuration is assigned to two or more VMs without a proper disk control mechanism (like a WSFC) it can lead to data loss\r\nresources\r\n| where type =~ 'Microsoft.Compute/disks'\r\n| where isnotnull(properties.maxShares) and properties.maxShares >= 2\r\n| project id, name, tags, lowerCaseDiskId = tolower(id), diskState = tostring(properties.diskState)\r\n| join kind = leftouter (\r\n resources\r\n | where type =~ 'Microsoft.Compute/virtualMachines'\r\n | project osDiskVmName = name, lowerCaseOsDiskId = tolower(properties.storageProfile.osDisk.managedDisk.id)\r\n | join kind = fullouter (\r\n resources\r\n | where type =~ 'Microsoft.Compute/virtualMachines'\r\n | mv-expand dataDisks = properties.storageProfile.dataDisks\r\n | project dataDiskVmName = name, lowerCaseDataDiskId = tolower(dataDisks.managedDisk.id)\r\n )\r\n on $left.lowerCaseOsDiskId == $right.lowerCaseDataDiskId\r\n | project lowerCaseDiskId = coalesce(lowerCaseOsDiskId, lowerCaseDataDiskId), vmName = coalesce(osDiskVmName, dataDiskVmName)\r\n )\r\n on lowerCaseDiskId\r\n| summarize vmNames = make_set(vmName) by name, id, tostring(tags), diskState\r\n| extend param1 = strcat(\"DiskState: \", diskState), param2 = iif(isempty(vmNames[0]), \"VMName: n/a\", strcat(\"VMName: \", strcat_array(vmNames, \", \")))\r\n| project recommendationId = \"3263a64a-c256-de48-9818-afd3cbc55c2a\", name, id, tags, param1, param2\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "70b1d2be-e6c4-b54e-9959-b1b690f9e485", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-enable-private-links-for-import-export-portal", - "name": "Restrict import/export access for managed disks using Azure Private Link" - } - ], - "recommendationControl": "Security", - "longDescription": "Recommended changing to \"Disable public access and enable private access\" and creating a Private Endpoint to improve security by restricting direct public access and ensuring connections are made privately, enhancing data protection and minimizing potential external threats.\n", - "pgVerified": true, - "description": "Network access to the VM disk should be set to Disable public access and enable private access", - "potentialBenefits": "Enhances VM security and privacy", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Disks with \"Enable public access from all networks\" enabled\r\nresources\r\n| where type =~ 'Microsoft.Compute/disks'\r\n| where properties.publicNetworkAccess == \"Enabled\"\r\n| project id, name, tags, lowerCaseDiskId = tolower(id)\r\n| join kind = leftouter (\r\n resources\r\n | where type =~ 'Microsoft.Compute/virtualMachines'\r\n | project osDiskVmName = name, lowerCaseOsDiskId = tolower(properties.storageProfile.osDisk.managedDisk.id)\r\n | join kind = fullouter (\r\n resources\r\n | where type =~ 'Microsoft.Compute/virtualMachines'\r\n | mv-expand dataDisks = properties.storageProfile.dataDisks\r\n | project dataDiskVmName = name, lowerCaseDataDiskId = tolower(dataDisks.managedDisk.id)\r\n )\r\n on $left.lowerCaseOsDiskId == $right.lowerCaseDataDiskId\r\n | project lowerCaseDiskId = coalesce(lowerCaseOsDiskId, lowerCaseDataDiskId), vmName = coalesce(osDiskVmName, dataDiskVmName)\r\n )\r\n on lowerCaseDiskId\r\n| summarize vmNames = make_set(vmName) by name, id, tostring(tags)\r\n| extend param1 = iif(isempty(vmNames[0]), \"VMName: n/a\", strcat(\"VMName: \", strcat_array(vmNames, \", \")))\r\n| project recommendationId = \"70b1d2be-e6c4-b54e-9959-b1b690f9e485\", name, id, tags, param1\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "c42343ae-2712-2843-a285-3437eb0b28a1", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/cloud-adoption-framework/ready/landing-zone/design-principles#policy-driven-governance", - "name": "Policy-driven governance" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/security-policy", - "name": "Azure Policy Regulatory Compliance controls for Azure Virtual Machines" - } - ], - "recommendationControl": "Governance", - "longDescription": "Keeping your virtual machine (VM) secure is crucial for the applications you run. This involves using various Azure services and features to ensure secure access to your VMs and the secure storage of your data, aiming for overall security of your VM and applications.\n", - "pgVerified": true, - "description": "Ensure that your VMs are compliant with Azure Policies", - "potentialBenefits": "Secure VMs and applications", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs in \"Non-compliant\" state with Azure Policies\r\npolicyresources\r\n| where type =~ \"Microsoft.PolicyInsights/policyStates\" and properties.resourceType =~ \"Microsoft.Compute/virtualMachines\" and properties.complianceState =~ \"NonCompliant\"\r\n| project\r\n policyDefinitionId = tolower(properties.policyDefinitionId),\r\n policyAssignmentId = tolower(properties.policyAssignmentId),\r\n targetResourceId = tolower(properties.resourceId)\r\n// Join the policy definition details\r\n| join kind = leftouter (\r\n policyresources\r\n | where type =~ \"Microsoft.Authorization/policyDefinitions\"\r\n | project policyDefinitionId = tolower(id), policyDefinitionDisplayName = properties.displayName\r\n )\r\n on policyDefinitionId\r\n| project policyDefinitionId, policyDefinitionDisplayName, policyAssignmentId, targetResourceId\r\n// Join the policy assignment details\r\n| join kind = leftouter (\r\n policyresources\r\n | where type =~ \"Microsoft.Authorization/policyAssignments\"\r\n | project policyAssignmentId = tolower(id), policyAssignmentDisplayName = properties.displayName\r\n )\r\n on policyAssignmentId\r\n| project policyDefinitionId, policyDefinitionDisplayName, policyAssignmentId, policyAssignmentDisplayName, targetResourceId\r\n// Join the target resource details\r\n| join kind = leftouter (\r\n resources\r\n | where type =~ \"Microsoft.Compute/virtualMachines\"\r\n | project targetResourceId = tolower(id), targetResourceIdPreservedCase = id, targetResourceName = name, targetResourceTags = tags\r\n )\r\n on targetResourceId\r\n| project\r\n recommendationId = \"c42343ae-2712-2843-a285-3437eb0b28a1\",\r\n name = targetResourceName,\r\n id = targetResourceIdPreservedCase,\r\n tags = targetResourceTags,\r\n param1 = strcat(\"DefinitionName: \", policyDefinitionDisplayName),\r\n param2 = strcat(\"DefinitionID: \", policyDefinitionId),\r\n param3 = strcat(\"AssignmentName: \", policyAssignmentDisplayName),\r\n param4 = strcat(\"AssignmentID: \", policyAssignmentId)\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "f0a97179-133a-6e4f-8a49-8a44da73ffce", - "recommendationTypeId": "a40cc620-e72c-fdf4-c554-c6ca2cd705c0", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disk-encryption-overview", - "name": "Overview of managed disk encryption options" - } - ], - "recommendationControl": "Security", - "longDescription": "Consider enabling Azure Disk Encryption (ADE) for encrypting Azure VM disks using DM-Crypt (Linux) or BitLocker (Windows). Additionally, consider Encryption at host and Confidential disk encryption for enhanced data security.\n", - "pgVerified": true, - "description": "Virtual Machines should have Azure Disk Encryption or EncryptionAtHost enabled", - "potentialBenefits": "Enhances data security and integrity", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure VM disks without Azure Disk Encryption or encryption at host enabled\r\nresources\r\n| where type =~ \"microsoft.compute/disks\"\r\n| project diskId = id, diskName = name, vmId = tolower(managedBy), azureDiskEncryption = iff(properties.encryptionSettingsCollection.enabled == true, true, false)\r\n| join kind=leftouter (resources\r\n| where type =~ \"microsoft.compute/virtualmachines\"\r\n| project vmId = tolower(id), vmName = name, encryptionAtHost = iff(properties.securityProfile.encryptionAtHost == true, true, false)) on vmId\r\n| where not(encryptionAtHost) and not(azureDiskEncryption)\r\n| project recommendationId = 'f0a97179-133a-6e4f-8a49-8a44da73ffce', name = vmName, id =vmId, param1 = strcat('diskName:',diskName), param2 = strcat('azureDiskEncryption:',iff(azureDiskEncryption, \"Enabled\", \"Disabled\")), param3 = strcat('encryptionAtHost:',iff(encryptionAtHost, \"Enabled\", \"Disabled\"))\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b72214bb-e879-5f4b-b9cd-642db84f36f4", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/azure-monitor/vm/vminsights-overview", - "name": "Overview of VM insights" - }, - { - "url": "https://learn.microsoft.com/azure/azure-monitor/vm/vminsights-troubleshoot#did-the-extension-install-properly", - "name": "Did the extension install properly?" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "VM Insights monitors VM and scale set performance, health, running processes, and dependencies. It enhances the predictability of application performance and availability by pinpointing performance bottlenecks and network issues, and it clarifies if problems are related to other dependencies.\n", - "pgVerified": true, - "description": "Enable VM Insights", - "potentialBenefits": "Improves VM performance and health", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Check for VMs without Azure Monitoring Agent extension installed, missing Data Collection Rule or Data Collection Rule without performance enabled.\r\nResources\r\n| where type == 'microsoft.compute/virtualmachines'\r\n| project idVm = tolower(id), name, tags\r\n| join kind=leftouter (\r\n InsightsResources\r\n | where type =~ \"Microsoft.Insights/dataCollectionRuleAssociations\" and id has \"Microsoft.Compute/virtualMachines\"\r\n | project idDcr = tolower(properties.dataCollectionRuleId), idVmDcr = tolower(substring(id, 0, indexof(id, \"/providers/Microsoft.Insights/dataCollectionRuleAssociations/\"))))\r\non $left.idVm == $right.idVmDcr\r\n| join kind=leftouter (\r\n Resources\r\n | where type =~ \"Microsoft.Insights/dataCollectionRules\"\r\n | extend\r\n isPerformanceEnabled = iif(properties.dataSources.performanceCounters contains \"Microsoft-InsightsMetrics\" and properties.dataFlows contains \"Microsoft-InsightsMetrics\", true, false),\r\n isMapEnabled = iif(properties.dataSources.extensions contains \"Microsoft-ServiceMap\" and properties.dataSources.extensions contains \"DependencyAgent\" and properties.dataFlows contains \"Microsoft-ServiceMap\", true, false)//,\r\n | where isPerformanceEnabled or isMapEnabled\r\n | project dcrName = name, isPerformanceEnabled, isMapEnabled, idDcr = tolower(id))\r\non $left.idDcr == $right.idDcr\r\n| join kind=leftouter (\r\n Resources\r\n | where type == 'microsoft.compute/virtualmachines/extensions' and (name contains 'AzureMonitorWindowsAgent' or name contains 'AzureMonitorLinuxAgent')\r\n | extend idVmExtension = tolower(substring(id, 0, indexof(id, '/extensions'))), extensionName = name)\r\non $left.idVm == $right.idVmExtension\r\n| where isPerformanceEnabled != 1 or (extensionName != 'AzureMonitorWindowsAgent' and extensionName != 'AzureMonitorLinuxAgent')\r\n| project recommendationId = \"b72214bb-e879-5f4b-b9cd-642db84f36f4\", name, id = idVm, tags, param1 = strcat('MonitoringExtension:', extensionName), param2 = strcat('DataCollectionRuleId:', idDcr), param3 = strcat('isPerformanceEnabled:', isPerformanceEnabled)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "4a9d8973-6dba-0042-b3aa-07924877ebd5", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/agents/agents-overview", - "name": "Azure Monitor Agent overview" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Azure Monitor Metrics automatically receives platform metrics, but platform logs, which offer detailed diagnostics and auditing for resources and their Azure platform, need to be manually routed for collection.\n", - "pgVerified": true, - "description": "Configure monitoring for all Azure Virtual Machines", - "potentialBenefits": "Enhanced diagnostics and auditing capability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Virtual Machines without diagnostic settings enabled/with diagnostic settings enabled but not configured both performance counters and event logs/syslogs.\r\nresources\r\n| where type =~ \"microsoft.compute/virtualmachines\"\r\n| project name, id, tags, lowerCaseVmId = tolower(id)\r\n| join kind = leftouter (\r\n resources\r\n | where type =~ \"Microsoft.Compute/virtualMachines/extensions\" and properties.publisher =~ \"Microsoft.Azure.Diagnostics\"\r\n | project\r\n lowerCaseVmIdOfExtension = tolower(substring(id, 0, indexof(id, \"/extensions/\"))),\r\n extensionType = properties.type,\r\n provisioningState = properties.provisioningState,\r\n storageAccount = properties.settings.StorageAccount,\r\n // Windows\r\n wadPerfCounters = properties.settings.WadCfg.DiagnosticMonitorConfiguration.PerformanceCounters.PerformanceCounterConfiguration,\r\n wadEventLogs = properties.settings.WadCfg.DiagnosticMonitorConfiguration.WindowsEventLog,\r\n // Linux\r\n ladPerfCounters = properties.settings.ladCfg.diagnosticMonitorConfiguration.performanceCounters.performanceCounterConfiguration,\r\n ladSyslog = properties.settings.ladCfg.diagnosticMonitorConfiguration.syslogEvents\r\n | extend\r\n // Windows\r\n isWadPerfCountersConfigured = iif(array_length(wadPerfCounters) > 0, true, false),\r\n isWadEventLogsConfigured = iif(isnotnull(wadEventLogs) and array_length(wadEventLogs.DataSource) > 0, true, false),\r\n // Linux\r\n isLadPerfCountersConfigured = iif(array_length(ladPerfCounters) > 0, true, false),\r\n isLadSyslogConfigured = isnotnull(ladSyslog)\r\n | project\r\n lowerCaseVmIdOfExtension,\r\n extensionType,\r\n provisioningState,\r\n storageAccount,\r\n isPerfCountersConfigured = case(extensionType =~ \"IaaSDiagnostics\", isWadPerfCountersConfigured, extensionType =~ \"LinuxDiagnostic\", isLadPerfCountersConfigured, false),\r\n isEventLogsConfigured = case(extensionType =~ \"IaaSDiagnostics\", isWadEventLogsConfigured, extensionType =~ \"LinuxDiagnostic\", isLadSyslogConfigured, false)\r\n )\r\n on $left.lowerCaseVmId == $right.lowerCaseVmIdOfExtension\r\n| where isempty(lowerCaseVmIdOfExtension) or provisioningState !~ \"Succeeded\" or not(isPerfCountersConfigured and isEventLogsConfigured)\r\n| extend\r\n param1 = strcat(\"DiagnosticSetting: \", iif(isnotnull(extensionType), strcat(\"Enabled, partially configured (\", extensionType, \")\"), \"Not enabled\")),\r\n param2 = strcat(\"ProvisioningState: \", iif(isnotnull(provisioningState), provisioningState, \"n/a\")),\r\n param3 = strcat(\"storageAccount: \", iif(isnotnull(storageAccount), storageAccount, \"n/a\")),\r\n param4 = strcat(\"PerformanceCounters: \", case(isnull(isPerfCountersConfigured), \"n/a\", isPerfCountersConfigured, \"Configured\", \"Not configured\")),\r\n param5 = strcat(\"EventLogs/Syslogs: \", case(isnull(isEventLogsConfigured), \"n/a\", isEventLogsConfigured, \"Configured\", \"Not configured\"))\r\n| project recommendationId = \"4a9d8973-6dba-0042-b3aa-07924877ebd5\", name, id, tags, param1, param2, param3, param4, param5\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "52ab9e5c-eec0-3148-8bd7-b6dd9e1be870", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/maintenance-configurations", - "name": "Use maintenance configurations to control and manage the VM updates" - } - ], - "recommendationControl": "High Availability", - "longDescription": "The maintenance configuration settings let users schedule and manage updates, making sure the updates or interruptions on the VM are performed within a planned timeframe.\n", - "pgVerified": true, - "description": "Use maintenance configurations for the VMs", - "potentialBenefits": "Scheduled updates for VMs", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find VMS that do not have maintenance configuration assigned\r\nResources\r\n| extend resourceId = tolower(id)\r\n| project name, location, type, id, tags, resourceId, properties\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| join kind=leftouter (\r\nmaintenanceresources\r\n| where type =~ \"microsoft.maintenance/configurationassignments\"\r\n| project planName = name, type, maintenanceProps = properties\r\n| extend resourceId = tostring(maintenanceProps.resourceId)\r\n) on resourceId\r\n| where isnull(maintenanceProps)\r\n| project recommendationId = \"52ab9e5c-eec0-3148-8bd7-b6dd9e1be870\",name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "3201dba8-d1da-4826-98a4-104066545170", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-b-series-burstable", - "name": "B-series burstable virtual machine sizes" - } - ], - "recommendationControl": "Scalability", - "longDescription": "A-series VMs are tailored for entry-level workloads like development and testing, including use cases such as development and test servers, low traffic web servers, and small to medium databases.\n", - "pgVerified": true, - "description": "Don't use A or B-Series VMs for production needing constant full CPU performance", - "potentialBenefits": "Ensures full CPU usage for heavy tasks", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs using A or B series families\r\nresources\r\n| where type == 'microsoft.compute/virtualmachines'\r\n| where properties.hardwareProfile.vmSize contains \"Standard_B\" or properties.hardwareProfile.vmSize contains \"Standard_A\"\r\n| project recommendationId = \"3201dba8-d1da-4826-98a4-104066545170\", name, id, tags, param1=strcat(\"vmSku: \" , properties.hardwareProfile.vmSize)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "df0ff862-814d-45a3-95e4-4fad5a244ba6", - "recommendationTypeId": "58d6648d-32e8-4346-827c-4f288dd8ca24", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types#disk-type-comparison", - "name": "Disk type comparison and decision tree" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Compared to Standard HDD and SSD, Premium SSD, SSD v2, and Ultra Disks offer improved performance, configurability, and higher single-instance VM uptime SLAs. The lowest SLA of all disks on a VM applies, so it is best to use Premium or Ultra Disks for the highest uptime SLA.\n", - "pgVerified": true, - "description": "Mission Critical Workloads should consider using Premium or Ultra Disks", - "potentialBenefits": "Enhanced performance, cost efficiency, and uptime SLA", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that have an attached disk that is not in the Premium or Ultra sku tier.\r\n\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| extend lname = tolower(name)\r\n| join kind=leftouter(resources\r\n | where type =~ 'Microsoft.Compute/disks'\r\n | where not(sku.tier =~ 'Premium') and not(sku.tier =~ 'Ultra')\r\n | extend lname = tolower(tostring(split(managedBy, '/')[8]))\r\n | project lname, name\r\n | summarize disks = make_list(name) by lname) on lname\r\n| where isnotnull(disks)\r\n| project recommendationId = \"df0ff862-814d-45a3-95e4-4fad5a244ba6\", name, id, tags, param1=strcat(\"AffectedDisks: \", disks)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "9ab499d8-8844-424d-a2d4-8f53690eb8f8", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/azure-boost/overview", - "name": "Microsoft Azure Boost" - }, - { - "url": "https://aka.ms/AzureBoostGABlog", - "name": "Announcing the general availability of Azure Boost" - } - ], - "recommendationControl": "High Availability", - "longDescription": "If the workload is Maintenance sensitive, consider Azure Boost compatible VMs. Azure Boost is designed to lessen the impact on customers when Azure maintenance activities occur on the host.\n", - "pgVerified": true, - "description": "Use Azure Boost VMs for Maintenance sensitive workload", - "potentialBenefits": "Less maintenance impact", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "2de8fa5e-14f4-4c4c-857f-1520f87a629f", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/windows/scheduled-event-service", - "name": "Monitor scheduled events for your Azure VMs" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/linux/scheduled-events", - "name": "Azure Metadata Service Scheduled Events for Linux VMs" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/windows/scheduled-events", - "name": "Azure Metadata Service Scheduled Events for Windows VMs" - } - ], - "recommendationControl": "High Availability", - "longDescription": "If your workload is Maintenance sensitive, enable Scheduled Events. This Azure Metadata Service lets your app prepare for virtual machine maintenance by providing information on upcoming events like reboots, reducing disruptions.\n", - "pgVerified": true, - "description": "Enable Scheduled Events for Maintenance sensitive workload VMs", - "potentialBenefits": "Minimize downtime for VMs", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "fa0cf4f5-0b21-47b7-89a9-ee936f193ce1", - "recommendationTypeId": "d4102c0f-ebe3-4b22-8fe0-e488866a87af", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://aka.ms/zrsdisksdoc", - "name": "Redundancy options for managed disks" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Azure disks offers a zone-redundant storage (ZRS) option for workloads that need to be resilient to an entire zone being down. Due to the cross-zone data replication, ZRS disks have higher write latency when compared to the locally-redundant option (LRS), so make sure to benchmark your disks.\n", - "pgVerified": true, - "description": "Use Azure Disks with Zone Redundant Storage for higher resiliency and availability", - "potentialBenefits": "Enhanced Disk resilience to failures", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find eligible Disks that are not zonal nor zone redundant\r\nresources\r\n| where type == 'microsoft.compute/disks'\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where sku has \"Premium_LRS\" or sku has \"StandardSSD_LRS\"\r\n| where sku.name has_cs 'ZRS' or array_length(zones) > 0\r\n| project recommendationId=\"fa0cf4f5-0b21-47b7-89a9-ee936f193ce1\", name, id, tags, param1 = sku, param2 = sku.name\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "302fda08-ee65-4fbe-a916-6dc0b33169c4", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://aka.ms/on-demand-capacity-reservations-docs", - "name": "On-demand Capacity Reservation" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Azure Capacity Reservations ensure high availability for virtual machines by reserving compute capacity in advance within a specific region or availability zone. This guarantees that VMs will have the necessary resources during peak demand or maintenance events, enhancing reliability and uptime.\n", - "pgVerified": false, - "description": "Reserve Compute Capacity for critical workloads", - "potentialBenefits": "Guaranteed capacity in constrained regions/zones", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Virtual Machines not associated with a Capacity Reservation, and provide details for Capacity Reservation like vmSize, location, and zone.\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnull(properties.capacityReservation)\r\n| extend zoneValue = iff(isnull(zones), \"null\", zones)\r\n| project recommendationId = \"302fda08-ee65-4fbe-a916-6dc0b33169c4\", name, id, tags, param1 = strcat(\"VmSize: \", properties.hardwareProfile.vmSize), param2 = strcat(\"Location: \", location), param3 = strcat(\"Zone: \", zoneValue)\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "5f7e8a12-3c4f-456b-919c-2e9adff98c38", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/update-linux-agent?tabs=ubuntu", - "name": "How to update the Azure Linux Agent on a VM" - } - ], - "recommendationControl": "High Availability", - "longDescription": "If you've installed the Azure Linux Agent or are using an endorsed distribution image, ensure your agent version is up-to-date. Some Linux distributions may disable auto-update or use older agent versions.\n", - "pgVerified": false, - "description": "Update the Azure Linux VM Agent", - "potentialBenefits": "Reduces complications with VM provisioning", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "e7495e1c-0c75-0946-b266-b429b5c7f3bf", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-design-overview#when-to-use-scale-sets-instead-of-virtual-machines", - "name": "When to use VMSS instead of VMs" - }, - { - "url": "https://learn.microsoft.com/azure/well-architected/services/compute/virtual-machines/virtual-machines-review", - "name": "Azure Well-Architected Framework review - Virtual Machines and Scale Sets" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Deploying even single instance VMs into a scale set with Flexible orchestration mode future-proofs applications for scaling and availability. This mode guarantees high availability (up to 1000 VMs) by distributing VMs across fault domains in a region or within an Availability Zone.\n", - "pgVerified": true, - "description": "Deploy VMSS with Flex orchestration mode instead of Uniform", - "potentialBenefits": "Higher scalability and availability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all zonal VMs that are NOT deployed with Flex orchestration mode\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| where properties.orchestrationMode != \"Flexible\"\r\n| project recommendationId = \"e7495e1c-0c75-0946-b266-b429b5c7f3bf\", name, id, tags, param1 = strcat(\"orchestrationMode: \", tostring(properties.orchestrationMode))\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "94794d2a-eff0-2345-9b67-6f9349d0a627", - "recommendationTypeId": "3b587048-b04b-4f81-aaed-e43793652b0f", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-health-extension?tabs=rest-api", - "name": "Using Application Health extension with Virtual Machine Scale Sets" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring application health in Azure Virtual Machine Scale Sets is crucial for deployment management. It supports rolling upgrades such as automatic OS-image upgrades and VM guest patching, leveraging health monitoring for upgrading.\n", - "pgVerified": true, - "description": "Enable Azure Virtual Machine Scale Set Application Health Monitoring", - "potentialBenefits": "Enhances deployment management and upgrades", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that do NOT have health monitoring enabled\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| join kind=leftouter (\r\n resources\r\n | where type == \"microsoft.compute/virtualmachinescalesets\"\r\n | mv-expand extension=properties.virtualMachineProfile.extensionProfile.extensions\r\n | where extension.properties.type in ( \"ApplicationHealthWindows\", \"ApplicationHealthLinux\" )\r\n | project id\r\n) on id\r\n| where id1 == \"\"\r\n| project recommendationId = \"94794d2a-eff0-2345-9b67-6f9349d0a627\", name, id, tags, param1 = \"extension: null\"\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "820f4743-1f94-e946-ae0b-45efafd87962", - "recommendationTypeId": "b4d988a9-85e6-4179-b69c-549bdd8a55bb", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-instance-repairs#requirements-for-using-automatic-instance-repairs", - "name": "Automatic instance repairs for Azure Virtual Machine Scale Sets" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Enabling automatic instance repairs in Azure Virtual Machine Scale Sets enhances application availability through a continuous health check and maintenance process.\n", - "pgVerified": true, - "description": "Enable Automatic Repair Policy on Azure Virtual Machine Scale Sets", - "potentialBenefits": "Boosts app availability by auto-repair", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs that do NOT have automatic repair policy enabled\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| where properties.automaticRepairsPolicy.enabled == false\r\n| project recommendationId = \"820f4743-1f94-e946-ae0b-45efafd87962\", name, id, tags, param1 = \"automaticRepairsPolicy: Disabled\"\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "ee66ff65-9aa3-2345-93c1-25827cf79f44", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/azure-monitor/autoscale/autoscale-get-started?WT.mc_id=Portal-Microsoft_Azure_Monitoring", - "name": "Get started with autoscale in Azure" - }, - { - "url": "https://learn.microsoft.com/azure/azure-monitor/autoscale/autoscale-overview", - "name": "Overview of autoscale in Azure" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Use custom autoscale for VMSS based on metrics and schedules to improve performance and cost effectiveness, adjusting instances as demand changes.\n", - "pgVerified": true, - "description": "Configure VMSS Autoscale to custom and configure the scaling metrics", - "potentialBenefits": "Enhances performance and cost-efficiency", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find VMSS instances associated with autoscale settings when autoscale is disabled\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| project name, id, tags\r\n| join kind=leftouter (\r\n resources\r\n | where type == \"microsoft.insights/autoscalesettings\"\r\n | where tostring(properties.targetResourceUri) contains \"Microsoft.Compute/virtualMachineScaleSets\"\r\n | project id = tostring(properties.targetResourceUri), autoscalesettings = properties\r\n) on id\r\n| where isnull(autoscalesettings) or autoscalesettings.enabled == \"false\"\r\n| project recommendationId = \"ee66ff65-9aa3-2345-93c1-25827cf79f44\", name, id, tags, param1 = \"autoscalesettings: Disabled\"\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "3f85a51c-e286-9f44-b4dc-51d00768696c", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/azure-monitor/autoscale/autoscale-predictive", - "name": "Use predictive autoscale to scale out before load demands in virtual machine scale sets" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Predictive autoscale utilizes machine learning to efficiently manage and scale Azure Virtual Machine Scale Sets by forecasting CPU load through historical usage analysis, ensuring timely scale-out to meet demand.\n", - "pgVerified": true, - "description": "Enable Predictive autoscale and configure at least for Forecast Only", - "potentialBenefits": "Optimizes scaling with ML predictions", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find VMSS instances associated with autoscale settings when predictiveAutoscalePolicy_scaleMode is disabled\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| project name, id, tags\r\n| join kind=leftouter (\r\n resources\r\n | where type == \"microsoft.insights/autoscalesettings\"\r\n | where tostring(properties.targetResourceUri) contains \"Microsoft.Compute/virtualMachineScaleSets\"\r\n | project id = tostring(properties.targetResourceUri), autoscalesettings = properties\r\n) on id\r\n| where autoscalesettings.enabled == \"true\" and autoscalesettings.predictiveAutoscalePolicy.scaleMode == \"Disabled\"\r\n| project recommendationId = \"3f85a51c-e286-9f44-b4dc-51d00768696c\", name, id, tags, param1 = \"predictiveAutoscalePolicy_scaleMode: Disabled\"\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b5a63aa0-c58e-244f-b8a6-cbba0560a6db", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-scale-in-policy", - "name": "Use scale-in policies with Azure Virtual Machine Scale Sets" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Microsoft advises disabling strictly even VM instance distribution across Availability Zones in VMSS to improve scalability and flexibility, noting that uneven distribution may better serve application load demands despite the potential trade-off in resilience.\n", - "pgVerified": true, - "description": "Disable Force strictly even balance across zones to avoid scale in and out fail attempts", - "potentialBenefits": "Improves scaling, reduces fail attempts", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find VMSS instances where strictly zoneBalance is set to True\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| where properties.orchestrationMode == \"Uniform\" and properties.zoneBalance == true\r\n| project recommendationId = \"b5a63aa0-c58e-244f-b8a6-cbba0560a6db\", name, id, tags, param1 = \"strictly zoneBalance: Enabled\"\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "1422c567-782c-7148-ac7c-5fc14cf45adc", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-availability-zones", - "name": "Create a Virtual Machine Scale Set that uses Availability Zones" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-availability-zones?tabs=cli-1%2Cportal-2#update-scale-set-to-add-availability-zones", - "name": "Update scale set to add availability zones" - } - ], - "recommendationControl": "High Availability", - "longDescription": "When creating VMSS, implement availability zones as a protection measure for your applications and data against the rare event of datacenter failure.\n", - "pgVerified": true, - "description": "Deploy VMSS across availability zones with VMSS Flex", - "potentialBenefits": "Enhances disaster resilience", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find VMSS instances with one or no Zones selected\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where array_length(zones) <= 1 or isnull(zones)\r\n| project recommendationId = \"1422c567-782c-7148-ac7c-5fc14cf45adc\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "e4ffd7b0-ba24-c84e-9352-ba4819f908c0", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/virtual-machines/automatic-vm-guest-patching", - "name": "Automatic VM Guest Patching for Azure VMs" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade", - "name": "Auto OS Image Upgrades" - } - ], - "recommendationControl": "Other Best Practices", - "longDescription": "Enabling automatic VM guest patching eases update management by safely, automatically patching virtual machines to maintain security compliance, while limiting blast radius of VMs. Note, the KQL will not return sets using Uniform orchestration.\n", - "pgVerified": true, - "description": "Set Patch orchestration options to Azure-orchestrated", - "potentialBenefits": "Eases patch management, enhances security", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph query\r\n// Identifies VMs and VMSS with manual patch settings, excluding automatic patch modes\r\nresources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| join kind=inner (\r\n resources\r\n | where type == \"microsoft.compute/virtualmachines\"\r\n | project id = tostring(properties.virtualMachineScaleSet.id), vmproperties = properties\r\n) on id\r\n| extend recommendationId = \"e4ffd7b0-ba24-c84e-9352-ba4819f908c0\", param1 = \"patchMode: Manual\", vmproperties.osProfile.linuxConfiguration.patchSettings.patchMode\r\n| where isnotnull(vmproperties.osProfile.linuxConfiguration) and vmproperties.osProfile.linuxConfiguration.patchSettings.patchMode !in (\"AutomaticByPlatform\", \"AutomaticByOS\")\r\n| distinct recommendationId, name, id, param1\r\n| union (resources\r\n| where type == \"microsoft.compute/virtualmachinescalesets\"\r\n| join kind=inner (\r\n resources\r\n | where type == \"microsoft.compute/virtualmachines\"\r\n | project id = tostring(properties.virtualMachineScaleSet.id), vmproperties = properties\r\n) on id\r\n| extend recommendationId = \"e4ffd7b0-ba24-c84e-9352-ba4819f908c0\", param1 = \"patchMode: Manual\", vmproperties.osProfile.windowsConfiguration.patchSettings.patchMode\r\n| where isnotnull(vmproperties.osProfile.windowsConfiguration) and vmproperties.osProfile.windowsConfiguration.patchSettings.patchMode !in (\"AutomaticByPlatform\", \"AutomaticByOS\")\r\n| distinct recommendationId, name, id, param1)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "83d61669-7bd6-9642-a305-175db8adcdf4", - "recommendationTypeId": "3b739bd1-c193-4bb6-a953-1362ee3b03b2", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/deprecated-images", - "name": "Deprecated Azure Marketplace images" - } - ], - "recommendationControl": "Governance", - "longDescription": "Ensure current versions of images are in use to avoid disruption after image deprecation. Please review the publisher, offer, sku information of the VM to ensure you are running on a supported image. Enable Auto Guest Patching or Image Upgrades, to get notifications about image deprecation.\n", - "pgVerified": true, - "description": "Upgrade VMSS Image versions scheduled to be deprecated or already retired", - "potentialBenefits": "Avoid disruptions by updating VMSS images.", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "//cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "eb005943-40a8-194b-9db2-474d430046b7", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices", - "name": "Container Registry Best Practices" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Choose a service tier of Azure Container Registry to meet your performance needs. Premium offers the most bandwidth and highest rate of read and write operations for high-volume deployments. Use Basic to start, Standard for production, and Premium for hyper-scale performance and geo-replication.\n", - "pgVerified": false, - "description": "Use Premium tier for critical production workloads", - "potentialBenefits": "High-volume support and geo-replication", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Container Registries that are not using the Premium tier\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| where sku.name != \"Premium\"\r\n| project recommendationId = \"eb005943-40a8-194b-9db2-474d430046b7\", name, id, tags, param1=strcat(\"SkuName: \", tostring(sku.name))\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "63491f70-22e4-3b4a-8b0c-845450e46fac", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/zone-redundancy?toc=%2Fazure%2Freliability%2Ftoc.json&bc=%2Fazure%2Freliability%2Fbreadcrumb%2Ftoc.json&branch=main", - "name": "Registry best practices - Enable zone redundancy" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Azure Container Registry's optional zone redundancy enhances resiliency and high availability for registries or replication resources in a specific region by distributing resources across multiple zones.\n", - "pgVerified": false, - "description": "Enable zone redundancy", - "potentialBenefits": "Enhances resiliency and high availability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Container Registries that do not have zone redundancy enabled\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where properties.zoneRedundancy != \"Enabled\"\r\n| project recommendationId = \"63491f70-22e4-3b4a-8b0c-845450e46fac\", name, id, tags, param1=strcat(\"zoneRedundancy: \", tostring(properties.zoneRedundancy))\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "36ea6c09-ef6e-d743-9cfb-bd0c928a430b", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#geo-replicate-multi-region-deployments", - "name": "Registry best practices - Enable geo-replication" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-geo-replication", - "name": "Geo-Replicate Container Registry" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Use Azure Container Registry's geo-replication for multi-region deployments to simplify registry management and minimize latency. It enables serving global customers from local data centers and supports distributed development teams. Regional webhooks can notify of events in replicas.\n", - "pgVerified": false, - "description": "Enable geo-replication", - "potentialBenefits": "Simplifies management, reduces latency", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Container Registries that do not have geo-replication enabled\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| project registryName = name, registryId = id, tags, primaryRegion = location\r\n| join kind=leftouter (\r\n Resources\r\n | where type =~ \"microsoft.containerregistry/registries/replications\"\r\n | project replicationRegion=name, replicationId = id\r\n | extend registryId=strcat_array(array_slice(split(replicationId, '/'), 0, -3), '/')\r\n ) on registryId\r\n| project-away registryId1, replicationId\r\n| where isempty(replicationRegion)\r\n| project recommendationId = \"36ea6c09-ef6e-d743-9cfb-bd0c928a430b\", name=registryName, id=registryId, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "a5a0101a-a240-8742-90ba-81dbde9a0c0c", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#repository-namespaces", - "name": "Registry best practices - use repository namespaces" - } - ], - "recommendationControl": "Security", - "longDescription": "Using repository namespaces allows a single registry to be shared across multiple groups and deployments within an organization, supporting nested namespaces for group isolation. However, repositories are managed independently, not hierarchically.\n", - "pgVerified": false, - "description": "Use Repository namespaces", - "potentialBenefits": "Enables sharing and group isolation", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "8e389532-5db5-7e4c-9d4d-443b3e55ae82", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#dedicated-resource-group", - "name": "Registry best practices - Use dedicated resource group" - } - ], - "recommendationControl": "Governance", - "longDescription": "Container registries, used across multiple hosts, should be in their own resource group to prevent accidental deletion of images when container instances are deleted, preserving the image collection while experimenting with hosts.\n", - "pgVerified": false, - "description": "Move Container Registry to a dedicated resource group", - "potentialBenefits": "Safeguards image collection", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// List container registries that contain additional resources within the same resource group.\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| project registryName=name, registryId=id, registryTags=tags, resourceGroupId=strcat('/subscriptions/', subscriptionId, '/resourceGroups/', resourceGroup), resourceGroup, subscriptionId\r\n| join kind=inner (\r\n resources\r\n | where not(type =~ \"microsoft.containerregistry/registries\")\r\n | summarize recourceCount=count() by subscriptionId, resourceGroup\r\n | where recourceCount != 0\r\n) on resourceGroup, subscriptionId\r\n| project recommendationId = \"8e389532-5db5-7e4c-9d4d-443b3e55ae82\", name=registryName, id=registryId, tags=registryTags, param1=strcat('resourceGroupName:',resourceGroup), param2=strcat('resourceGroupId:',resourceGroupId)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "3ef86f16-f65b-c645-9901-7830d6dc3a1b", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-best-practices#manage-registry-size", - "name": "Registry best practices - Manage registry size" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-retention-policy#about-the-retention-policy", - "name": "Retention Policy" - } - ], - "recommendationControl": "Scalability", - "longDescription": "The storage constraints of Azure Container Registry's service tiers align with usage scenarios: Basic for starters, Standard for production, and Premium for high-scale performance and geo-replication.\n", - "pgVerified": false, - "description": "Manage registry size", - "potentialBenefits": "Reduce costs, optimize storage", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Container Registries that have their retention policy disabled\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| where properties.policies.retentionPolicy.status == \"disabled\"\r\n| project recommendationId = \"3ef86f16-f65b-c645-9901-7830d6dc3a1b\", name, id, tags, param1='retentionPolicy:disabled'\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "03f4a7d8-c5b4-7842-8e6e-14997a34842b", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/anonymous-pull-access#about-anonymous-pull-access", - "name": "Enable anonymous pull access" - } - ], - "recommendationControl": "Security", - "longDescription": "By default, Azure container registry requires authentication for pull/push actions. Enabling anonymous pull access exposes all content for public read actions. This applies to all repositories, potentially allowing unrestricted access if repository-scoped tokens are used.\n", - "pgVerified": false, - "description": "Disable anonymous pull access", - "potentialBenefits": "Enhanced security and controlled access", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Container Registries that have anonymous pull access enabled\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| where properties.anonymousPullEnabled == \"true\"\r\n| project recommendationId = \"03f4a7d8-c5b4-7842-8e6e-14997a34842b\", name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "44107155-7a32-9348-89f3-d5aa7e7c5a1d", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service-reference#resource-logs", - "name": "Monitoring Azure Container Registry data reference - Resource Logs" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service#collection-and-routing", - "name": "Monitor Azure Container Registry - Enable diagnostic logs" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Resource Logs are not collected and stored until you create a diagnostic setting and route them to one or more locations.\n", - "pgVerified": false, - "description": "Configure Diagnostic Settings for all Azure Container Registries", - "potentialBenefits": "Enhanced tracking and debugging", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "d594cde6-4116-d143-a64a-25f63289a2f8", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service-reference#metrics", - "name": "Monitoring Azure Container Registry data reference" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/monitor-service", - "name": "Monitor Azure Container Registry" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring Azure resources using Azure Monitor enhances their availability, performance, and operation. Azure Container Registry, a full-stack monitoring service, provides features for Azure and other cloud and on-premises resources.\n", - "pgVerified": false, - "description": "Monitor Azure Container Registry with Azure Monitor", - "potentialBenefits": "Enhanced monitoring and operation", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "e7f0fd54-fba0-054e-9ab8-e676f2851f88", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/container-registry/container-registry-soft-delete-policy", - "name": "Enable soft delete policy" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Enabling soft delete in Azure Container Registry (ACR) allows for the management of deleted artifacts with a specified retention period. Users can list, filter, and restore these artifacts until automatically purged post-retention.\n", - "pgVerified": false, - "description": "Enable soft delete policy", - "potentialBenefits": "Recovery of deleted artifacts", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerRegistry/registries", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure Container Registry resources that do not have soft delete enabled\r\nresources\r\n| where type =~ \"microsoft.containerregistry/registries\"\r\n| where properties.policies.softDeletePolicy.status == \"disabled\"\r\n| project recommendationId = \"e7f0fd54-fba0-054e-9ab8-e676f2851f88\", name, id, tags\r\n| order by id asc\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "4f63619f-5001-439c-bacb-8de891287727", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/aks/availability-zones", - "name": "AKS Availability Zones" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-availability-zones#zone-balancing", - "name": "Zone Balancing" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Azure Availability Zones ensure high availability by offering independent locations within regions, equipped with their own power, cooling, and networking to ensure applications and data are protected from datacenter-level failures.\n", - "pgVerified": true, - "description": "Deploy AKS cluster across availability zones", - "potentialBenefits": "Enhanced fault tolerance for AKS", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns AKS clusters that do not have any availability zones enabled or only use a single zone\r\nresources\r\n| where type =~ \"Microsoft.ContainerService/managedClusters\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| project id, name, tags, location, pools = properties.agentPoolProfiles\r\n| mv-expand pool = pools\r\n| extend\r\n numOfAvailabilityZones = iif(isnull(pool.availabilityZones), 0, array_length(pool.availabilityZones))\r\n| where numOfAvailabilityZones < 2\r\n| project\r\n recommendationId = \"4f63619f-5001-439c-bacb-8de891287727\",\r\n id,\r\n name,\r\n tags,\r\n param1 = strcat(\"NodePoolName: \", pool.name),\r\n param2 = strcat(\"Mode: \", pool.mode),\r\n param3 = strcat(\"AvailabilityZones: \", iif(numOfAvailabilityZones == 0, \"None\", strcat(\"Zone \", strcat_array(pool.availabilityZones, \", \")))),\r\n param4 = strcat(\"Location: \", location)\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "5ee083cd-6ac3-4a83-8913-9549dd36cf56", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/aks/use-system-pools?tabs=azure-cli#system-and-user-node-pools", - "name": "System and user node pools" - } - ], - "recommendationControl": "High Availability", - "longDescription": "AKS assigns the kubernetes.azure.com/mode: system label to nodes in system node pools signaling the preference for system pods should be scheduled there. The CriticalAddonsOnly=true:NoSchedule taint can be added to your system nodes to prohibit application pods from being scheduled on them.\n", - "pgVerified": false, - "description": "Isolate system and application pods", - "potentialBenefits": "Enhanced reliability via pod isolation", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns each AKS cluster with nodepools that do not have system pods labelled with CriticalAddonsOnly\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\r\n| where agentPoolProfile.mode =~ 'System' // system node pools\r\n| extend taint = tostring(parse_json(agentPoolProfile.nodeTaints))\r\n| extend hasCriticalAddonsTaint = agentPoolProfile.kubeletConfig has 'CriticalAddonsOnly'\r\n| extend hasNodeLabel = agentPoolProfile.customNodeLabels has 'CriticalAddonsOnly'\r\n| extend hasCriticalAddonsOnly = hasCriticalAddonsTaint or hasNodeLabel or isempty(taint)\r\n| extend nodePool = tostring(parse_json(agentPoolProfile.name))\r\n| where hasCriticalAddonsOnly\r\n| project\r\n recommendationId=\"5ee083cd-6ac3-4a83-8913-9549dd36cf56\",\r\n id,\r\n name,\r\n tags,\r\n param1=strcat(\"nodepoolName: \", nodePool)\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "ca324d71-54b0-4a3e-b9e4-10e767daa9fc", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/aks/concepts-identity#azure-ad-integration", - "name": "Entra integration" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/aks/manage-azure-rbac?source=recommendations", - "name": "Use Azure role-based access control for AKS" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/aks/manage-local-accounts-managed-azure-ad?source=recommendations", - "name": "Manage AKS local accounts" - } - ], - "recommendationControl": "Security", - "longDescription": "Local Kubernetes accounts in AKS, being non-auditable and legacy, are discouraged. Microsoft Entra's integration offers centralized management, multi-factor authentication, RBAC for detailed access, and a secure, scalable authentication system compatible with Azure and external identity providers.\n", - "pgVerified": false, - "description": "Disable local accounts", - "potentialBenefits": "Enhanced security and access control", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns a list of AKS clusters not using AAD enabled\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| extend aadProfile = tostring (parse_json(properties.aadProfile))\r\n| extend disablelocalAdmin = tostring(parse_json(properties.disableLocalAccounts))\r\n| extend RBAC = tostring(parse_json(properties.enableRBAC))\r\n| where RBAC == \"false\"\r\n| project recommendationId=\"ca324d71-54b0-4a3e-b9e4-10e767daa9fc\", name, id, tags, param1=strcat(\"aadProfile: \", aadProfile), param2=strcat(\"disablelocalAdmin: \",disablelocalAdmin), param3=strcat(\"RBAC: \", RBAC)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "c22db132-399b-4e7c-995d-577a60881be8", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/aks/configure-azure-cni-dynamic-ip-allocation", - "name": "Configure Azure CNI networking" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/aks/azure-cni-overlay", - "name": "Configure Azure CNI Overlay networking" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Azure CNI enhances cluster IP and network management, allowing dynamic IP allocation, scalable subnets, direct pod-VNET connectivity, and supports diverse network policies for pods and nodes with Azure Network Policies and Calico, optimizing network efficiency and security\n", - "pgVerified": false, - "description": "Configure Azure CNI networking for dynamic allocation of IPs", - "potentialBenefits": "Dynamic IP allocation, scalable subnets, direct VNET access", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Check AKS Clusters using kubenet network profile\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| extend networkProfile = tostring (parse_json(properties.networkProfile.networkPlugin))\r\n| where networkProfile ==\"kubenet\"\r\n| project recommendationId=\"c22db132-399b-4e7c-995d-577a60881be8\", name, id, tags, param1=strcat(\"networkProfile :\",networkProfile)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "902c82ff-4910-4b61-942d-0d6ef7f39b67", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/aks/cluster-autoscaler?tabs=azure-cli", - "name": "Use the Cluster Autoscaler on AKS" - }, - { - "url": "https://learn.microsoft.com/azure/aks/operator-best-practices-advanced-scheduler", - "name": "Best practices for advanced scheduler features" - }, - { - "url": "https://learn.microsoft.com/azure/aks/best-practices-performance-scale-large#node-pool-scaling", - "name": "Node pool scaling considerations and best practices" - }, - { - "url": "https://learn.microsoft.com/azure/aks/operator-best-practices-scheduler", - "name": "Best practices for basic scheduler features" - } - ], - "recommendationControl": "Scalability", - "longDescription": "The cluster auto-scaler in AKS adjusts node counts based on pod resource needs and available capacity, enabling scaling as per demand to prevent outages.\n", - "pgVerified": true, - "description": "Enable the cluster auto-scaler on an existing cluster", - "potentialBenefits": "Optimizes scaling and prevents outages", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find AKS clusters with auto-scaling disabled\r\nResources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| extend autoScaling = tostring (parse_json(properties.agentPoolProfiles.[0].enableAutoScaling))\r\n| where autoScaling == \"false\"\r\n| project recommendationId=\"902c82ff-4910-4b61-942d-0d6ef7f39b67\", name, id, tags, param1=strcat(\"autoScaling :\", autoScaling)\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "269a9f1a-6675-460a-831e-b05a887a8c4b", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/backup/azure-kubernetes-service-cluster-backup", - "name": "AKS Backups" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/aks/operator-best-practices-storage", - "name": "Best Practices for AKS Backups" - } - ], - "recommendationControl": "Disaster Recovery", - "longDescription": "AKS, popular for stateful apps needing backups, can now use Azure Backup to secure clusters and attached volumes through an installed Backup Extension, enabling backup and restore operations via a Backup Vault.\n", - "pgVerified": true, - "description": "Back up Azure Kubernetes Service", - "potentialBenefits": "Ensures data safety for AKS", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find AKS clusters that do not have backup enabled\r\n\r\nresources\r\n| where type =~ 'Microsoft.ContainerService/managedClusters'\r\n| extend lname = tolower(name)\r\n| join kind=leftouter(recoveryservicesresources\r\n | where type =~ 'microsoft.dataprotection/backupvaults/backupinstances'\r\n | extend lname = tolower(tostring(split(properties.dataSourceInfo.resourceID, '/')[8]))\r\n | extend protectionState = properties.currentProtectionState\r\n | project lname, protectionState) on lname\r\n| where protectionState != 'ProtectionConfigured'\r\n| extend param1 = iif(isnull(protectionState), 'Protection Not Configured', strcat('Protection State: ', protectionState))\r\n| project recommendationId = \"269a9f1a-6675-460a-831e-b05a887a8c4b\", name, id, tags, param1\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "d3111036-355d-431b-ab49-8ddad042800b", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/reliability/availability-zones-overview?tabs=azure-cli", - "name": "Availability zones overview" - }, - { - "url": "https://learn.microsoft.com/azure/storage/common/storage-redundancy#zone-redundant-storage", - "name": "Zone-redundant storage" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-redundancy#zone-redundant-storage-for-managed-disks", - "name": "ZRS disks" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-migrate-lrs-zrs", - "name": "Convert a disk from LRS to ZRS" - }, - { - "url": "https://learn.microsoft.com/azure/storage/container-storage/enable-multi-zone-redundancy", - "name": "Enable multi-zone storage redundancy in Azure Container Storage" - } - ], - "recommendationControl": "High Availability", - "longDescription": "ZRS ensures data replication across three zones, protecting against zonal outages. It's available for Azure Disks, Container Storage, Files, and Blob by setting the SKU to ZRS in storage classes, enhancing multi-zone AKS clusters from v1.29.\n", - "pgVerified": true, - "description": "Use zone-redundant storage for persistent volumes when running multi-zone AKS", - "potentialBenefits": "Increases data durability and availability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b002c030-72e6-4a37-8217-1cb276c43169", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/aks/csi-storage-drivers", - "name": "CSI Storage Drivers" - }, - { - "url": "https://learn.microsoft.com/azure/aks/csi-migrate-in-tree-volumes", - "name": "CSI Migrate in Tree Volumes" - } - ], - "recommendationControl": "Governance", - "longDescription": "From Kubernetes 1.26, Azure Disk and Azure File in-tree drivers are deprecated in favor of CSI drivers. Existing deployments remain operational but untested; users should switch to CSI drivers for new features and SKUs.\n", - "pgVerified": true, - "description": "Upgrade Persistent Volumes using in-tree drivers to Azure CSI drivers", - "potentialBenefits": "Ensures future compatibility", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "9a1c17e5-c9a0-43db-b920-adaf54d1bcb7", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://kubernetes.io/docs/concepts/policy/resource-quotas/", - "name": "Resource Quotas" - } - ], - "recommendationControl": "Scalability", - "longDescription": "A ResourceQuota object sets limits on resource use per namespace, controlling the number and type of objects created, and the total compute resources available.\n", - "pgVerified": false, - "description": "Implement Resource Quota to ensure that Kubernetes resources do not exceed hard resource limits", - "potentialBenefits": "Limits AKS resource usage per namespace", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "b4639ca7-6308-429a-8b98-92f0bf9bf813", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/aks/virtual-nodes", - "name": "Virtual Nodes" - }, - { - "url": "https://learn.microsoft.com/azure/container-instances/container-instances-overview", - "name": "Azure Container Instances" - } - ], - "recommendationControl": "Scalability", - "longDescription": "To rapidly scale AKS workloads, utilize virtual nodes for quick pod provisioning, unlike Kubernetes auto-scaler. For clusters with availability zones, ensure one nodepool per AZ due to persistent volumes not working across AZs, preventing auto-scaler pod creation failures if lacking access.\n", + "longDescription": "Azure Private Link Service lets you securely and privately connect to Azure Key Vault via a Private Endpoint in your VNet, using a private IP and eliminating public Internet exposure.\n", "pgVerified": false, - "description": "Attach Virtual Nodes (ACI) to the AKS cluster", - "potentialBenefits": "Faster scaling with virtual nodes", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "0611251f-e70f-4243-8ddd-cfe894bec2e7", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/en-us/azure/aks/free-standard-pricing-tiers", - "name": "Pricing Tiers" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Faks%2Ftoc.json&bc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Fbread%2Ftoc.json#kubernetes-api-server-sla", - "name": "AKS Baseline Architecture" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Production AKS clusters require the Standard or Premium tier for a financially backed SLA and enhanced node scalability, as the free service lacks these features. Use the Premium tier for mission-critical workloads.\n", - "pgVerified": true, - "description": "Update AKS tier to Standard or Premium", - "potentialBenefits": "SLA guarantee and better scalability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns all AKS clusters not running on the Standard tier or the Premium tier.\r\nresources\r\n| where type =~ \"Microsoft.ContainerService/managedClusters\"\r\n| where sku.tier !in~ (\"Standard\", \"Premium\")\r\n| project recommendationId = \"0611251f-e70f-4243-8ddd-cfe894bec2e7\", id, name, tags, param1 = strcat(\"skuName: \", sku.name), param2 = strcat(\"skuTier: \", sku.tier)\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "dcaf8128-94bd-4d53-9235-3a0371df6b74", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/aks/monitor-aks", - "name": "Monitor AKS" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Azure Monitor enables real-time health and performance insights for AKS by collecting events, capturing container logs, and gathering CPU/Memory data from the Metrics API. It allows data visualization using Azure Monitor Container Insights, Prometheus, Grafana, or others.\n", - "pgVerified": true, - "description": "Enable AKS Monitoring", - "potentialBenefits": "Real-time AKS health/performance insights", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns AKS clusters where either Azure Monitor is not enabled and/or Container Insights is not enabled\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| extend azureMonitor = tostring(parse_json(properties.azureMonitorProfile.metrics.enabled))\r\n| extend insights = tostring(parse_json(properties.addonProfiles.omsagent.enabled))\r\n| where isempty(azureMonitor) or isempty(insights)\r\n| project recommendationId=\"dcaf8128-94bd-4d53-9235-3a0371df6b74\",id, name, tags, param1=strcat(\"azureMonitorProfileEnabled: \", iff(isempty(azureMonitor), \"false\", azureMonitor)), param2=strcat(\"containerInsightsEnabled: \", iff(isempty(insights), \"false\", insights))\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "a7bfcc18-b0d8-4d37-81f3-8131ed8bead5", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/aks/concepts-storage#ephemeral-os-disk", - "name": "Ephemeral OS disk" - }, - { - "url": "https://learn.microsoft.com/azure/aks/cluster-configuration", - "name": "Configure an AKS cluster" - }, - { - "url": "https://learn.microsoft.com/samples/azure-samples/aks-ephemeral-os-disk/aks-ephemeral-os-disk/", - "name": "Everything you want to know about ephemeral OS disks and AKS" - } - ], - "recommendationControl": "Scalability", - "longDescription": "Ephemeral OS disks on AKS offer lower read/write latency due to local attachment, eliminating the need for replication seen with managed disks. This enhances performance and speeds up cluster operations such as scaling or upgrading due to quicker re-imaging and boot times.\n", - "pgVerified": true, - "description": "Use Ephemeral OS disks on AKS clusters", - "potentialBenefits": "Lower latency, faster re-imaging and booting", - "publishedToLearn": false, + "description": "Private endpoint should be configured for Key Vault", + "potentialBenefits": "Secure Key Vault with Private Link", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationResourceType": "Microsoft.KeyVault/vaults", "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns any AKS cluster nodepools that do not have Ephemeral Disks\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\r\n| extend type = tostring(agentPoolProfile.osDiskType)\r\n| where type != 'Ephemeral'\r\n| project recommendationId=\"a7bfcc18-b0d8-4d37-81f3-8131ed8bead5\", name, id, param1=strcat(\"osDiskType: \", type)\r\n" + "query": "// Azure Resource Graph Query\n// This resource graph query will return all Key Vaults that does not have a Private Endpoint Connection or where a private endpoint exists but public access is enabled\n\nresources\n| where type == \"microsoft.keyvault/vaults\"\n| where isnull(properties.privateEndpointConnections) or properties.privateEndpointConnections[0].properties.provisioningState != (\"Succeeded\") or (isnull(properties.networkAcls) and properties.publicNetworkAccess == 'Enabled')\n| extend param1 = strcat('Private Endpoint: ', iif(isnotnull(properties.privateEndpointConnections),split(properties.privateEndpointConnections[0].properties.privateEndpoint.id,'/')[8],'No Private Endpoint'))\n| extend param2 = strcat('Access: ', iif(properties.publicNetworkAccess == 'Disabled', 'Public Access Disabled', iif(isnotnull(properties.networkAcls), 'NetworkACLs in place','Public Access Enabled')))\n| project recommendationId = \"00c3d2b0-ea6e-4c4b-89be-b78a35caeb51\", name, id, tags, param1, param2\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "26ebaf1f-c70d-4ebd-8641-4b60a0ce0094", + "aprlGuid": "e7091145-3642-bd41-bb58-66502e64d2cd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Faks%2Ftoc.json&bc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Fbread%2Ftoc.json#policy-management", - "name": "AKS Baseline - Policy Management" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/aks/policy-reference", - "name": "Built-in Policy Definitions for AKS" + "url": "https://learn.microsoft.com/azure/key-vault/general/best-practices#why-we-recommend-separate-key-vaults", + "name": "Azure Key Vault best practices overview" } ], "recommendationControl": "Governance", - "longDescription": "Azure Policies in AKS clusters help enforce governance best practices concerning security, authentication, provisioning, networking, and more, ensuring a robust and secure environment for operations.\n", + "longDescription": "Key vaults are security boundaries for secret storage. Grouping secrets together increases risk during a security event, as attacks could access multiple secrets.\n", "pgVerified": false, - "description": "Enable and remediate Azure Policies configured for AKS", - "potentialBenefits": "Enhanced AKS governance and security", - "publishedToLearn": false, + "description": "Use separate key vaults per application per environment", + "potentialBenefits": "Enhanced security, Reduced risk", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns a count of non-compliant policy items per AKS cluster\r\nPolicyResources\r\n| where type =~ 'Microsoft.PolicyInsights/PolicyStates'\r\n| extend complianceState = tostring(properties.complianceState)\r\n| where complianceState == 'NonCompliant'\r\n| where properties.resourceType =~ 'Microsoft.ContainerService/managedClusters'\r\n| extend\r\n id = tostring(properties.resourceId)\r\n| summarize count() by id\r\n| join kind=inner (\r\n resources\r\n | where type =~ 'Microsoft.ContainerService/managedClusters'\r\n | project id, name\r\n) on id\r\n| project recommendationId=\"26ebaf1f-c70d-4ebd-8641-4b60a0ce0094\", id, name, param1=strcat(\"numNonCompliantAlerts: \", count_)\r\n" + "recommendationResourceType": "Microsoft.KeyVault/vaults", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5f3cbd68-692a-4121-988c-9770914859a9", - "recommendationTypeId": null, + "aprlGuid": "1dc0821d-4f14-7644-bab4-ba208ff5f7fa", + "recommendationTypeId": "88bbc99c-e5af-ddd7-6105-6150b2bfa519", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/architecture/guide/aks/aks-cicd-github-actions-and-gitops", - "name": "GitOps with AKS" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/architecture/example-scenario/gitops-aks/gitops-blueprint-aks", - "name": "GitOps for AKS - Reference Architecture" + "url": "https://learn.microsoft.com/azure/key-vault/general/logging?tabs=Vault", + "name": "Azure Key Vault logging overview" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "GitOps, an operating model for cloud-native apps, uses Git for storing application and infrastructure code as a source of truth for continuous delivery.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Enable logs, set up alerts, and adhere to retention requirements for improved monitoring and security of Key Vault access, detailing the frequency and identity of users.\n", "pgVerified": false, - "description": "Enable GitOps when using DevOps frameworks", - "potentialBenefits": "Ensures AKS config consistency", - "publishedToLearn": false, + "description": "Diagnostic logs in Key Vault should be enabled", + "potentialBenefits": "Enhanced monitoring and security compliance", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationResourceType": "Microsoft.KeyVault/vaults", "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns AKS clusters where GitOps is not enabled\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| extend gitops = tostring (parse_json(properties.addOnProfiles.gitops.enabled))\r\n| where isempty(gitops)\r\n| project recommendationId=\"5f3cbd68-692a-4121-988c-9770914859a9\", id, name, tags, param1=strcat(\"gitopsEnabled: \", \"false\")\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "928fcc6f-5e9a-42d9-9bd4-260af42de2e5", + "aprlGuid": "b36fd2ac-dd83-664a-ab48-ff7b8d3b189d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/", - "name": "Topology Spread Constraints" + "url": "https://learn.microsoft.com/azure/azure-monitor/logs/logs-data-export", + "name": "Log Analytics workspace data export in Azure Monitor" }, { - "url": "https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/", - "name": "Assign Pod Node" + "url": "https://learn.microsoft.com/azure/azure-monitor/best-practices-logs#configuration-recommendations", + "name": "Azure Monitor configuration recommendations" } ], - "recommendationControl": "High Availability", - "longDescription": "Enhance availability and reliability by using pod topology spread constraints to control pod distribution based on node or zone topology, ensuring pods are spread across your cluster.\n", + "recommendationControl": "Governance", + "longDescription": "Data export in a Log Analytics workspace to an Azure Storage account enhances data protection against regional failures by using geo-redundant (GRS) or geo-zone-redundant storage (GZRS), mainly for compliance and integration with other Azure services and tools.\n", "pgVerified": true, - "description": "Use pod topology spread constraints to ensure that pods are spread across different nodes or zones", - "potentialBenefits": "Ensures high availability and efficient use", - "publishedToLearn": false, + "description": "Enable Log Analytics data export to GRS or GZRS", + "potentialBenefits": "Enhances compliance and regional fault tolerance", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.OperationalInsights/workspaces", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "cd6791b1-c60e-4b37-ac98-9897b1e6f4b8", + "aprlGuid": "4b77191c-cc3c-8c4e-844b-0f56d0927890", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/", - "name": "Configure probes" + "url": "https://learn.microsoft.com/azure/azure-monitor/logs/log-analytics-workspace-health", + "name": "Monitor Log Analytics workspace health" }, { - "url": "https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/", - "name": "Assign Pod Node" + "url": "https://learn.microsoft.com/azure/azure-monitor/best-practices-logs#configuration-recommendations", + "name": "Azure Monitor configuration recommendations" } ], - "recommendationControl": "High Availability", - "longDescription": "AKS kubelet controller uses liveness probes to validate containers and applications health, ensuring the system knows when to restart a container based on its health status.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "A health status alert will proactively notify you if a workspace becomes unavailable because of a datacenter or regional failure.\n", "pgVerified": true, - "description": "Configures Pods Liveness, Readiness, and Startup Probes", - "potentialBenefits": "Enhances container health monitoring", - "publishedToLearn": false, + "description": "Create a health status alert rule for your Log Analytics workspace", + "potentialBenefits": "Early alert for workspace failure", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.OperationalInsights/workspaces", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "bcfe71f1-ebed-49e5-a84a-193b81ad5d27", - "recommendationTypeId": null, + "aprlGuid": "f0bf9ae6-25a5-974d-87d5-025abec73539", + "recommendationTypeId": "eade5b56-eefd-444f-95c8-23f29e5d93cb", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/", - "name": "Replica Sets" - } - ], - "recommendationControl": "High Availability", - "longDescription": "Configuring multiple replicas in Pod or Deployment manifests stabilizes the number of replica Pods, ensuring that a specified number of identical Pods are always available, thereby guaranteeing their availability.\n", - "pgVerified": true, - "description": "Use deployments with multiple replicas in production applications to guarantee availability", - "potentialBenefits": "Ensures stable pod availability", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "7f7ae535-a5ba-4665-b7e0-c451dbdda01f", - "recommendationTypeId": null, - "recommendationMetadataState": "Active", - "learnMoreLink": [ + "url": "https://learn.microsoft.com/azure/virtual-network/concepts-and-best-practices", + "name": "Azure Virtual Network - Concepts and best practices | Microsoft Learn" + }, { - "url": "https://learn.microsoft.com/azure/aks/use-system-pools?tabs=azure-cli", - "name": "System nodepools" + "url": "https://learn.microsoft.com/en-us/azure/vpn-gateway/vpn-gateway-about-vpn-gateway-settings#gwsub", + "name": "GatewaySUbnet" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/route-server/route-server-faq#can-i-associate-a-network-security-group-nsg-to-the-routeserversubnet", + "name": "Can I associate a network security group (NSG) to the RouteServerSubnet?" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/firewall/firewall-faq#are-network-security-groups--nsgs--supported-on-the-azurefirewallsubnet", + "name": "Are Network Security Groups (NSGs) supported on the AzureFirewallSubnet?" } ], - "recommendationControl": "High Availability", - "longDescription": "The system node pool should be configured with a minimum node count of two to ensure critical system pods are resilient to node outages.\n", + "recommendationControl": "Security", + "longDescription": "Network security groups and application security groups allow filtering of inbound and outbound traffic by IP, port, and protocol, adding a security layer at the Subnet level.\n", "pgVerified": true, - "description": "Configure system nodepool count", - "potentialBenefits": "Ensures pod resilience", - "publishedToLearn": false, + "description": "All Subnets should have a Network Security Group associated", + "potentialBenefits": "Enhanced subnet security and traffic control", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/virtualNetworks", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns each AKS cluster with nodepools that have system nodepools with less than 2 nodes\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\r\n| extend taints = tostring(parse_json(agentPoolProfile.nodeTaints))\r\n| extend nodePool = tostring(parse_json(agentPoolProfile.name))\r\n| where taints has \"CriticalAddonsOnly=true:NoSchedule\" and agentPoolProfile.minCount < 2\r\n| project recommendationId=\"7f7ae535-a5ba-4665-b7e0-c451dbdda01f\", id, name, param1=strcat(\"nodePoolName: \", nodePool), param2=strcat(\"nodePoolMinNodeCount: \", agentPoolProfile.minCount)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find Subnets without NSG associated\nresources\n| where type =~ 'Microsoft.Network/virtualnetworks'\n| mv-expand subnets = properties.subnets\n| extend sn = string_size(subnets.properties.networkSecurityGroup)\n| where sn == 0 and subnets.name !in (\"GatewaySubnet\", \"AzureFirewallSubnet\", \"AzureFirewallManagementSubnet\", \"RouteServerSubnet\")\n| project recommendationId = \"f0bf9ae6-25a5-974d-87d5-025abec73539\", name, id, tags, param1 = strcat(\"SubnetName: \", subnets.name), param2 = \"NSG: False\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "005ccbbd-aeab-46ef-80bd-9bd4479412ec", + "aprlGuid": "69ea1185-19b7-de40-9da1-9e8493547a5c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/service-guides/azure-kubernetes-service#design-checklist", - "name": "Azure Well-Architected Framework review for Azure Kubernetes Service (AKS)" + "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/azure-virtual-network/reliability", + "name": "Reliability and Azure Virtual Network - Microsoft Azure Well-Architected Framework | Microsoft Learn" } ], - "recommendationControl": "High Availability", - "longDescription": "Configuring the user node pool with at least two nodes is essential for applications needing high availability, ensuring they remain operational and accessible without interruption.\n", + "recommendationControl": "Security", + "longDescription": "Azure DDoS Protection offers enhanced mitigation features against DDoS attacks and is auto-tuned to protect specific resources in a virtual network, combined with application design best practices.\n", "pgVerified": true, - "description": "Configure user nodepool count", - "potentialBenefits": "Ensures high app availability", - "publishedToLearn": false, + "description": "Shield public endpoints in Azure VNets with Azure DDoS Standard Protection Plans", + "potentialBenefits": "Enhanced DDoS attack mitigation", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationResourceType": "Microsoft.Network/virtualNetworks", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns each AKS cluster with nodepools that have user nodepools with less than 2 nodes\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\r\n| extend taints = tostring(parse_json(agentPoolProfile.nodeTaints))\r\n| extend nodePool = tostring(parse_json(agentPoolProfile.name))\r\n| where taints !has \"CriticalAddonsOnly=true:NoSchedule\" and agentPoolProfile.minCount < 2\r\n| project recommendationId=\"005ccbbd-aeab-46ef-80bd-9bd4479412ec\", id, name, param1=strcat(\"nodePoolName: \", nodePool), param2=strcat(\"nodePoolMinNodeCount: \", agentPoolProfile.minCount)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find virtual networks without DDoS Protection\nresources\n| where type =~ 'Microsoft.Network/virtualNetworks'\n| where isnull(properties.enableDdosProtection) or properties.enableDdosProtection contains \"false\"\n| project recommendationId = \"69ea1185-19b7-de40-9da1-9e8493547a5c\", name, id, tags, param1 = strcat(\"EnableDdosProtection: \", properties.enableDdosProtection)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a08a06a0-e41a-4b99-83bb-69ce8bca54cb", + "aprlGuid": "24ae3773-cc2c-3649-88de-c9788e25b463", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://kubernetes.io/docs/tasks/run-application/configure-pdb/", - "name": "Configure PDBs" + "url": "https://learn.microsoft.com/azure/virtual-network/virtual-networks-faq", + "name": "Azure Virtual Network FAQ | Microsoft Learn" }, { - "url": "https://learn.microsoft.com/azure/aks/operator-best-practices-scheduler#plan-for-availability-using-pod-disruption-budgets", - "name": "Plan availability using PDBs" + "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/network-connectivity/reliability", + "name": "Reliability and Network connectivity - Microsoft Azure Well-Architected Framework | Microsoft LearnNetworking Reliability" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/private-link/availability", + "name": "Azure Private Link availability" } ], - "recommendationControl": "High Availability", - "longDescription": "A Pod Disruption Budget is a Kubernetes resource configuring the minimum number or percentage of pods that should remain available during disruptions like maintenance or scaling, ensuring a minimum number of pods are always available in the cluster.\n", + "recommendationControl": "Security", + "longDescription": "Use VNet service endpoints only if Private Link isn't available and no data movement concerns. This feature restricts Azure service access to specified VNet and subnet, enhancing network security and isolating service traffic.\n", "pgVerified": true, - "description": "Configure pod disruption budgets (PDBs)", - "potentialBenefits": "Ensures cluster resiliency during disruptions", - "publishedToLearn": false, + "description": "When available, use Private Endpoints instead of Service Endpoints for PaaS Services", + "potentialBenefits": "Enhanced security and data isolation", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationResourceType": "Microsoft.Network/virtualNetworks", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Subnets with Service Endpoint enabled for services that offer Private Link\nresources\n| where type =~ 'Microsoft.Network/virtualnetworks'\n| mv-expand subnets = properties.subnets\n| extend se = array_length(subnets.properties.serviceEndpoints)\n| where se >= 1\n| project name, id, tags, subnets, serviceEndpoints=todynamic(subnets.properties.serviceEndpoints)\n| mv-expand serviceEndpoints\n| project name, id, tags, subnetName=subnets.name, serviceName=tostring(serviceEndpoints.service)\n| where serviceName in (parse_json('[\"Microsoft.CognitiveServices\",\"Microsoft.AzureCosmosDB\",\"Microsoft.DBforMariaDB\",\"Microsoft.DBforMySQL\",\"Microsoft.DBforPostgreSQL\",\"Microsoft.EventHub\",\"Microsoft.KeyVault\",\"Microsoft.ServiceBus\",\"Microsoft.Sql\", \"Microsoft.Storage\",\"Microsoft.StorageSync\",\"Microsoft.Synapse\",\"Microsoft.Web\"]'))\n| project recommendationId = \"24ae3773-cc2c-3649-88de-c9788e25b463\", name, id, tags, param1 = strcat(\"subnet=\", subnetName), param2=strcat(\"serviceName=\",serviceName), param3=\"ServiceEndpoints=true\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e620fa98-7a40-41a0-bfc9-b4407297fb58", + "aprlGuid": "c63b81fb-7afc-894c-a840-91bb8a8dcfaf", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/aks/configure-azure-cni-dynamic-ip-allocation", - "name": "Azure CNI Dynamic IP Allocation" + "url": "https://learn.microsoft.com/azure/virtual-network/ip-services/public-ip-addresses#availability-zone", + "name": "Public IP addresses - Availability Zones" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/public-ip-basic-upgrade-guidance#steps-to-complete-the-upgrade", + "name": "Upgrading a basic public IP address to Standard SKU" } ], "recommendationControl": "High Availability", - "longDescription": "Nodepool subnets sized for max auto-scale settings enable AKS to efficiently scale out nodes, meeting increased demand while reducing resource constraints and potential service disruptions.\n", - "pgVerified": false, - "description": "Nodepool subnet size needs to accommodate maximum auto-scale settings", - "potentialBenefits": "Efficient scaling, reduced disruptions", - "publishedToLearn": false, + "longDescription": "Public IP addresses in Azure can be of standard SKU, available as non-zonal, zonal, or zone-redundant. Zone-redundant IPs are accessible across all zones, resisting any single zone failure, thereby providing higher resilience.\n", + "pgVerified": true, + "description": "Use Standard SKU and Zone-Redundant IPs when applicable", + "potentialBenefits": "Enhanced resilience with zone redundancy", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationResourceType": "Microsoft.Network/publicIPAddresses", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns each AKS cluster with nodepools that have user nodepools with a subnetmask that does not match autoscale configured max-nodes\r\n// Subtracting the network address, broadcast address, and default 3 addresses Azure reserves within each subnet\r\n\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| extend nodePools = properties['agentPoolProfiles']\r\n| mv-expand nodePools = properties.agentPoolProfiles\r\n| where nodePools.enableAutoScaling == true\r\n| extend nodePoolName=nodePools.name, maxNodes = nodePools.maxCount, subnetId = tostring(nodePools.vnetSubnetID)\r\n| project clusterId = id, clusterName=name, nodePoolName=nodePools.name, toint(maxNodes), subnetId\r\n| join kind = leftouter (\r\n resources\r\n | where type == 'microsoft.network/virtualnetworks'\r\n | extend subnets = properties.subnets\r\n | mv-expand subnets\r\n | project id = tostring(subnets.id), addressPrefix = tostring(subnets.properties['addressPrefix'])\r\n | extend subnetmask = toint(substring(addressPrefix, indexof(addressPrefix, '/')+1, string_size(addressPrefix)))\r\n | extend possibleMaxNodeCount = toint(exp2(32-subnetmask) - 5)\r\n) on $left.subnetId == $right.id\r\n| project-away id, subnetmask\r\n| where possibleMaxNodeCount <= maxNodes\r\n| extend param1 = strcat(nodePoolName, \" autoscaler upper limit: \", maxNodes)\r\n| extend param2 = strcat(\"ip addresses on subnet: \", possibleMaxNodeCount)\r\n| project recommendationId=\"e620fa98-7a40-41a0-bfc9-b4407297fb58\", name=clusterName, id=clusterId, param1, param2\r\n\r\n" + "query": "// Azure Resource Graph query\n// List public IP addresses that are not Zone-Redundant\nResources\n| where type =~ \"Microsoft.Network/publicIPAddresses\" and sku.tier =~ \"Regional\"\n| where isempty(zones) or array_length(zones) <= 1\n| extend az = case(isempty(zones), \"Non-zonal\", array_length(zones) <= 1, strcat(\"Zonal (\", strcat_array(zones, \",\"), \")\"), zones)\n| project recommendationId = \"c63b81fb-7afc-894c-a840-91bb8a8dcfaf\", name, id, tags, param1 = strcat(\"sku: \", sku.name), param2 = strcat(\"availabilityZone: \", az)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a01afc4c-7439-4919-b2da-3565992ea2a7", + "aprlGuid": "1adba190-5c4c-e646-8527-dd1b2a6d8b15", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/quotas/quotas-overview", - "name": "Azure Quotas" + "url": "https://learn.microsoft.com/azure/advisor/advisor-reference-reliability-recommendations#use-nat-gateway-for-outbound-connectivity", + "name": "Use NAT GW for outbound connectivity" + }, + { + "url": "https://learn.microsoft.com/azure/architecture/framework/services/compute/azure-app-service/reliability#tcp-and-snat-ports", + "name": "TCP and SNAT Ports" } ], "recommendationControl": "High Availability", - "longDescription": "Node pool settings should not exceed the subscription core quota to ensure AKS can scale out nodes efficiently, meeting increased demand while reducing resource constraints and potential service disruptions.\n", - "pgVerified": false, - "description": "Node pool auto-scale settings should not exceed subscription core quota", - "potentialBenefits": "Reduced disruptions", - "publishedToLearn": false, + "longDescription": "Prevent connectivity failures due to SNAT port exhaustion by employing NAT gateway for outbound traffic from virtual networks, ensuring dynamic scaling and secure internet connections.\n", + "pgVerified": true, + "description": "Use NAT gateway for outbound connectivity to avoid SNAT Exhaustion", + "potentialBenefits": "Avoids SNAT port exhaustion risks", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.Network/publicIPAddresses", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph query\n// Lists VMs with PIPs\nresources\n| where type =~ 'Microsoft.Network/publicIPAddresses'\n| where tostring(properties.ipConfiguration.id) contains \"microsoft.network/networkinterfaces\"\n| project recommendationId=\"1adba190-5c4c-e646-8527-dd1b2a6d8b15\", name, id, tags, param1=strcat(\"Migrate from instance IP to NAT Gateway\")\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f46b0d1d-56ef-4795-b98a-f6ee00cb341a", + "aprlGuid": "5cea1501-6fe4-4ec4-ac8f-f72320eb18d3", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/aks/use-azure-linux", - "name": "Azure Linux" + "url": "https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/public-ip-basic-upgrade-guidance", + "name": "Upgrading a basic public IP address to Standard SKU - Guidance" + }, + { + "url": "https://azure.microsoft.com/en-us/updates/upgrade-to-standard-sku-public-ip-addresses-in-azure-by-30-september-2025-basic-sku-will-be-retired/", + "name": "Upgrade to Standard SKU public IP addresses in Azure by 30 September 2025 as Basic SKU will be retired" } ], "recommendationControl": "High Availability", - "longDescription": "Azure Linux on AKS boosts resiliency with a native image using validated, source-built components. It's lightweight, reducing the attack surface and maintenance. A Microsoft-hardened kernel, optimized for Azure, enhances stability and security for container workloads.\n", - "pgVerified": false, - "description": "Use Azure Linux for Linux nodepools", - "potentialBenefits": "Reduced disruptions", - "publishedToLearn": false, + "longDescription": "Basic SKU public IP addresses will be retired on September 30, 2025. Users are advised to upgrade to Standard SKU public IP addresses before this date to avoid service disruptions.\n", + "pgVerified": true, + "description": "Upgrade Basic SKU public IP addresses to Standard SKU", + "potentialBenefits": "Avoids service disruption", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/publicIPAddresses", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Returns each AKS cluster with nodepools that have Linux nodepools not using Azure Linux\r\nresources\r\n| where type == \"microsoft.containerservice/managedclusters\"\r\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\r\n| where agentPoolProfile.osType == 'Linux' and agentPoolProfile.osSKU != 'AzureLinux'\r\n| project recommendationid=\"f46b0d1d-56ef-4795-b98a-f6ee00cb341a\", name, id, param1=strcat(\"nodePoolName: \", agentPoolProfile.name)\r\n" + "query": "// Azure Resource Graph query\n// List Basic SKU public IP addresses\nResources\n| where type =~ \"Microsoft.Network/publicIPAddresses\"\n| where sku.name =~ \"Basic\"\n| project recommendationId = \"5cea1501-6fe4-4ec4-ac8f-f72320eb18d3\", name, id, tags, param1 = strcat(\"sku: \", sku.name)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9200aca6-0e83-4749-a5eb-e3939367bdc2", + "aprlGuid": "c4254c66-b8a5-47aa-82f6-e7d7fb418f47", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/aks/best-practices-app-cluster-reliability#multi-replica-applications", - "name": "Multi-replica apps" + { + "url": "https://learn.microsoft.com/en-us/azure/ddos-protection/ddos-protection-overview", + "name": "Azure DDoS Protection" } ], - "recommendationControl": "High Availability", - "longDescription": "Deploying at least two replicas of your application ensures that your application is highly available and can tolerate node failures.\n", - "pgVerified": false, - "description": "Deploy at least two replicas of your application", - "potentialBenefits": "Ensures high app availability", - "publishedToLearn": false, + "recommendationControl": "Security", + "longDescription": "DDoS attacks can be targeted at any endpoint that is publicly reachable through the internet.\n", + "pgVerified": true, + "description": "Public IP addresses should have DDoS protection enabled", + "potentialBenefits": "Avoids service disruption", "tags": null, - "recommendationResourceType": "Microsoft.ContainerService/managedClusters", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.Network/publicIPAddresses", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph query\n// Public IP addresses should have DDoS protection enabled\nresources\n| where type =~ 'Microsoft.Network/publicIPAddresses'\n| where properties.ddosSettings.protectionMode !in~ (\"Enabled\", \"VirtualNetworkInherited\")\n| project recommendationId=\"c4254c66-b8a5-47aa-82f6-e7d7fb418f47\", name, id, tags, param1=strcat(\"Apply either DDoS Network protection or DDoS IP Protrection to the public IP address.\")\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6cd57b65-ef84-4088-9ada-c0d8de74c2f7", + "aprlGuid": "23b2dfc7-7e5d-9443-9f62-980ca621b561", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/managed-grafana/high-availability", - "name": "Azure Managed Grafana service reliability" - }, - { - "url": "https://learn.microsoft.com/Azure/managed-grafana/how-to-enable-zone-redundancy", - "name": "Enable zone redundancy in Azure Managed Grafana" + "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/activity-log?tabs=powershell", + "name": "Azure activity log - Azure Monitor | Microsoft Learn" } ], - "recommendationControl": "High Availability", - "longDescription": "Managed Grafana Standard tier is hosted on a dedicated set of VMs to provide redundancy. With zone redundancy enabled, VMs are spread across availability zones (AZ). Related resources are also configured for AZ. Zone redundancy can only be enabled when creating the Azure Managed Grafana instance.\n", - "pgVerified": false, - "description": "Enable zone redundancy in Managed Grafana", - "potentialBenefits": "Enhanced Managed Grafana resilience to failures", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Create Alerts with Azure Monitor for operations like Create or Update Route Table to spot unauthorized/undesired changes in production resources. This setup aids in identifying improper routing changes, including efforts to evade firewalls or access resources from outside.\n", + "pgVerified": true, + "description": "Monitor changes in Route Tables with Azure Monitor", + "potentialBenefits": "Enhanced security and change detection", "tags": null, - "recommendationResourceType": "Microsoft.Dashboard/grafana", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/routeTables", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure Managed Grafana resources that do not zone redundancy enabled.\r\nresources\r\n| where type =~ \"Microsoft.Dashboard/grafana\"\r\n| extend zoneRedundancy = properties.zoneRedundancy\r\n| where zoneRedundancy !~ \"Enabled\"\r\n| project\r\n recommendationId = \"6cd57b65-ef84-4088-9ada-c0d8de74c2f7\",\r\n name,\r\n id,\r\n tags,\r\n param1 = strcat(\"location: \", location),\r\n param2 = strcat(\"sku: \", sku.name),\r\n param3 = strcat(\"zoneRedundancy: \", zoneRedundancy)\r\n" + "query": "// Azure Resource Graph Query\n// Find all Route Tables without alerts for modification configured.\nresources\n| where type =~ \"Microsoft.Network/routeTables\"\n| project name, id, tags, lowerCaseRouteTableId = tolower(id)\n| join kind = leftouter (\n resources\n | where type =~ \"Microsoft.Insights/activityLogAlerts\" and properties.enabled == true\n | mv-expand scope = properties.scopes\n | where scope has \"Microsoft.Network/routeTables\"\n | project alertName = name, conditionJson = dynamic_to_json(properties.condition.allOf), scope\n | where conditionJson has '\"Administrative\"' and (\n // Create or Update Route Table\n (conditionJson has '\"Microsoft.Network/routeTables/write\"') or\n // All Administrative operations\n (conditionJson !has '\"Microsoft.Network/routeTables/write\"' and conditionJson !has '\"Microsoft.Network/routeTables/delete\"' and conditionJson !has '\"Microsoft.Network/routeTables/join/action\"')\n )\n | project lowerCaseRouteTableIdOfScope = tolower(scope)\n )\n on $left.lowerCaseRouteTableId == $right.lowerCaseRouteTableIdOfScope\n| where isempty(lowerCaseRouteTableIdOfScope)\n| project recommendationId = \"23b2dfc7-7e5d-9443-9f62-980ca621b561\", name, id, tags, param1 = \"ModificationAlert: Not configured/Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0e835cc2-2551-a247-b1f1-3c5f25c9cb70", + "aprlGuid": "89d1166a-1a20-0f46-acc8-3194387bf127", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/databricks-runtime-ver", - "name": "Databricks runtime support lifecycles" + "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources?toc=%2Fazure%2Fvirtual-network%2Ftoc.json&tabs=json", + "name": "Protect your Azure resources with a lock - Azure Resource Manager | Microsoft Learn" } ], "recommendationControl": "Governance", - "longDescription": "Databricks recommends migrating workloads to the latest or LTS version of its runtime for enhanced stability and support. If on Runtime 11.3 LTS or above, move directly to the latest 12.x version. If below, first migrate to 11.3 LTS, then to the latest 12.x version as per the migration guide.\n", + "longDescription": "As an administrator, you can protect Azure subscriptions, resource groups, or resources from accidental deletions and modifications by setting locks.\n", "pgVerified": true, - "description": "Databricks runtime version is not latest or is not LTS version", - "potentialBenefits": "Enhanced stability and support", - "publishedToLearn": false, + "description": "Configure locks for Route Tables to avoid accidental changes or deletion", + "potentialBenefits": "Prevents accidental edits/deletions", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/routeTables", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c166602e-0804-e34b-be8f-09b4d56e1fcd", + "aprlGuid": "4281631c-3d19-4994-8d96-084c2a51a534", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-gateway-design#scale-a-nat-gateway-to-meet-the-demand-of-a-dynamic-workload", + "name": "Scale a NAT gateway to meet the demand of a dynamic workload" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-metrics#total-snat-connection-count", + "name": "Total SNAT Connection Count" } ], "recommendationControl": "Scalability", - "longDescription": "Databricks pools pre-provision VMs, reducing risks of provisioning errors during cluster start or scale, enhancing reliability.\n", - "pgVerified": true, - "description": "Use Databricks Pools", - "potentialBenefits": "Reduces provisioning errors", - "publishedToLearn": false, + "longDescription": "NAT Gateway provides 64,512 SNAT ports per public IP address and supports up to 16 public IP addresses. Monitor \"Total SNAT connection count\" metric to determine if you're nearing the connection limit of NAT gateway. You can scale the NAT gateway by adding more public IP addresses.\n", + "pgVerified": false, + "description": "Scale a NAT gateway to meet the demand of a dynamic workload", + "potentialBenefits": "Enhances reliability and scalability", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/natGateways", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5877a510-8444-7a4c-8412-a8dab8662f7e", + "aprlGuid": "babf75d6-6407-4d90-b01e-5a1768e621f5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-machines/disks-types#premium-ssd", - "name": "Azure managed disk types" + "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-metrics", + "name": "What is Azure NAT Gateway metrics and alerts?" + }, + { + "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/natGateways/", + "name": "AMBA - NAT Gateway" } ], - "recommendationControl": "Scalability", - "longDescription": "Upgrade HDDs in premium VMs to SSDs for better speed and reliability. Premium SSDs boost IO-heavy apps; Standard SSDs balance cost and performance. Ideal for critical workloads, upgrading improves connectivity with brief reboot. Consider for vital VMs\n", - "pgVerified": true, - "description": "Use SSD backed VMs for Worker VM Type and Driver type", - "potentialBenefits": "Faster, reliable VM performance", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Use Network Insights for monitoring and alerting on your NAT gateway.Use Total SNAT connection count metric to determine if you're nearing the connection limit of NAT gateway. Set alerts based on Azure Monitor Baseline Alerts (AMBA) thresholds for NAT Gateway\n", + "pgVerified": false, + "description": "Configure monitoring and alerting for NAT gateway", + "potentialBenefits": "Enhanced network performance and health", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/natGateways", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5c72f0d6-55ec-d941-be84-36c194fa78c0", + "aprlGuid": "419df1ea-336b-460a-b6b2-fefe2588fcef", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#enable-autoscaling-for-batch-workloadss", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-availability-zones#zonal-nat-gateway-resource-for-each-zone-in-a-region-to-create-zone-resiliency", + "name": "Zonal NAT gateway resource for each zone in a region to create zone-resiliency" } ], - "recommendationControl": "Scalability", - "longDescription": "Autoscaling adjusts cluster sizes automatically based on workload demands, offering benefits for many use cases in terms of costs and performance. It includes guidance on when and how to best utilize Autoscaling. For streaming, Delta Live Tables with autoscaling is advised.\n", - "pgVerified": true, - "description": "Enable autoscaling for batch workloads", - "potentialBenefits": "Cost and performance optimization", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "A zonal promise for zone isolation scenarios exists when a virtual machine instance using a NAT gateway resource is in the same zone as the NAT gateway resource and its public IP addresses. The pattern you want to use for zone isolation is creating a \"zonal stack\" per availability zone.\n", + "pgVerified": false, + "description": "Consider zonal NAT gateway deployment for zone isolation scenarios", + "potentialBenefits": "Enhances reliability and scalability", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/natGateways", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "362ad2b6-b92c-414f-980a-0cf69467ccce", + "aprlGuid": "4d703025-dafc-f840-a183-5dc440456134", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#enable-autoscaling-for-sql-warehouse", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering", + "name": "Designing for disaster recovery with ExpressRoute private peering" } ], - "recommendationControl": "Scalability", - "longDescription": "The scaling parameter of a SQL warehouse defines the min and max number of clusters for distributing queries. By default, it's set to one. Increasing the cluster count can accommodate more concurrent users effectively.\n", + "recommendationControl": "High Availability", + "longDescription": "Connecting each ExpressRoute Gateway to a minimum of two circuits in different peering locations enhances redundancy and reliability by ensuring alternate pathways for data in case one circuit fails.\n", "pgVerified": true, - "description": "Enable autoscaling for SQL warehouse", - "potentialBenefits": "Improves concurrency and efficiency", - "publishedToLearn": false, + "description": "Connect on-prem networks to Azure critical workloads via multiple ExpressRoutes", + "potentialBenefits": "Enhanced reliability and redundancy", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "cd77db98-9b13-6e4b-bd2b-74c2cb538628", + "aprlGuid": "0e19cc41-8274-1342-b0db-0e4146eacef8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/designing-for-high-availability-with-expressroute", + "name": "Designing for high availability with ExpressRoute" }, { - "url": "https://learn.microsoft.com/azure/databricks/delta-live-tables/settings#use-autoscaling-to-increase-efficiency-and-reduce-resource-usage", - "name": "Databricks enhanced autoscaling" + "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-expressroute#recommendations", + "name": "Azure Well-Architected Framework review - Azure ExpressRoute - Design Checklist" } ], - "recommendationControl": "Scalability", - "longDescription": "Databricks enhanced autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact on the data processing latency of your pipelines.\n", + "recommendationControl": "High Availability", + "longDescription": "Microsoft or the ExpressRoute provider always ensures physical redundancy in their services. It's essential to maintain this level of physical redundancy (two devices, two links) from the ExpressRoute peering location to your network for optimal performance and reliability.\n", "pgVerified": true, - "description": "Use Delta Live Tables enhanced autoscaling", - "potentialBenefits": "Optimized resource use and minimal latency", - "publishedToLearn": false, + "description": "Ensure ExpressRoute's physical links connect to distinct network edge devices", + "potentialBenefits": "Enhanced reliability and fault tolerance", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3d3e53b5-ebd1-db42-b43b-d4fad74824ec", + "aprlGuid": "f06a2bbe-5839-d447-9f39-fc3d20562d88", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/expressroute/designing-for-high-availability-with-expressroute#active-active-connections", + "name": "Designing for high availability with ExpressRoute - Active-active connections" } ], "recommendationControl": "High Availability", - "longDescription": "To conserve cluster resources, you can terminate a cluster to store its configuration for future reuse or autostart jobs. Clusters can auto-terminate after inactivity, but this only tracks Spark jobs, not local processes, which might still be running even after Spark jobs end.\n", + "longDescription": "Operating both connections of an ExpressRoute circuit in active-active mode enhances high availability as the Microsoft network will load balance the traffic across the connections on a per-flow basis.\n", "pgVerified": true, - "description": "Automatic Job Termination is enabled, ensure there are no user-defined local processes", - "potentialBenefits": "Saves cluster resources, avoids idle use", - "publishedToLearn": false, + "description": "Ensure both connections of an ExpressRoute circuit are configured in active-active mode", + "potentialBenefits": "Improved high availability and load balancing", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7fb90127-5364-bb4d-86fa-30778ed713fb", + "aprlGuid": "2a5bf650-586d-db4c-a292-d922be7d3e0e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/clusters/configure#cluster-log-delivery", - "name": "Create a cluster" + "url": "https://learn.microsoft.com/azure/expressroute/expressroute-bfd", + "name": "Configure BFD over ExpressRoute" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "When creating a Databricks cluster, you can set a log delivery location for the Spark driver, worker nodes, and events. Logs are delivered every 5 mins and archived hourly. Upon cluster termination, all generated logs until that point are guaranteed to be delivered.\n", + "recommendationControl": "High Availability", + "longDescription": "Enabling BFD over ExpressRoute speeds up link failure detection between MSEE devices and routers configured for ExpressRoute (CE/PE), applicable over both customer and Partner Edge routing devices with managed Layer 3 service.\n", "pgVerified": true, - "description": "Enable Logging-Cluster log delivery", - "potentialBenefits": "Improved troubleshooting and audit", - "publishedToLearn": false, + "description": "Activate Bidirectional Forwarding Detection on edge devices for faster failover", + "potentialBenefits": "Faster link failure detection", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "da4ea916-4df3-8c4d-8060-17b49da45977", + "aprlGuid": "9771a435-d031-814e-9827-9b5fdafc0f87", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/expressRouteCircuits/", + "name": "Azure Monitor Baseline Alerts - expressRouteCircuits" } ], - "recommendationControl": "High Availability", - "longDescription": "Delta Lake is an open source storage format enhancing data lakes' reliability with ACID transactions, schema enforcement, and scalable metadata handling.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Use Network Insights for monitoring ExpressRoute circuit availability, QoS, and throughput. Set alerts based on Azure Monitor Baseline Alerts for availability, QoS metrics, and throughput metrics exceeding specific thresholds.\n", "pgVerified": true, - "description": "Use Delta Lake for higher reliability", - "potentialBenefits": "Enhances data reliability and processing", - "publishedToLearn": false, + "description": "Configure monitoring and alerting for ExpressRoute circuits", + "potentialBenefits": "Enhanced network performance and health", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "892ca809-e2b5-9a47-924a-71132bf6f902", + "aprlGuid": "26cb547f-aabc-dc40-be02-d0a9b6b04b1a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#use-apache-spark-or-photon-for-distributed-compute", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/expressroute/maintenance-alerts", + "name": "How to view and configure alerts for Azure ExpressRoute circuit maintenance" } ], - "recommendationControl": "High Availability", - "longDescription": "Apache Spark in Databricks Lakehouse ensures resilient distributed data processing by automatically rescheduling failed tasks, aiding in overcoming external issues like network problems or revoked VMs.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "ExpressRoute leverages service health for notifications on both planned and unplanned maintenance, ensuring users are informed about any changes to their ExpressRoute circuits.\n", "pgVerified": true, - "description": "Use Photon Acceleration", - "potentialBenefits": "Boosts speed and reliability for Spark tasks", - "publishedToLearn": false, + "description": "Configure service health to receive ExpressRoute circuit maintenance notification", + "potentialBenefits": "Stay informed on circuit updates", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7e52d64d-8cc0-8548-a593-eb49ab45630d", + "aprlGuid": "d40c769d-2f08-4980-8d8f-a386946276e6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/rate-limit", + "name": "Rate limiting for ExpressRoute Direct circuits (Preview)" } ], - "recommendationControl": "Business Continuity", - "longDescription": "Invalid or nonconforming data can crash workloads dependent on specific data formats. Best practices recommend filtering such data at ingestion to improve end-to-end resilience, ensuring no data is lost or missed.\n", + "recommendationControl": "Scalability", + "longDescription": "Rate limiting controls traffic volume between on-premises networks and Azure via ExpressRoute Direct, applying to private or Microsoft peering. It distributes port bandwidth, ensures stability, and prevents congestion, with steps outlined for enabling on circuits.\n", "pgVerified": true, - "description": "Automatically rescue invalid or nonconforming data with Databricks Auto Loader or Delta Live Tables", - "potentialBenefits": "Enhanced data resilience and integrity", - "publishedToLearn": false, + "description": "Implement rate-limiting across ExpressRoute Direct Circuits to optimize network flow", + "potentialBenefits": "Optimizes network, prevents congestion", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This query will return all the ExpressRoute circuits (Direct Based) that have Direct Port Rate Limiting disabled\nresources\n| where type =~ \"microsoft.network/expressroutecircuits\"\n| where properties.expressRoutePort != \"\" or isnotnull(properties.expressRoutePort)\n| where properties.enableDirectPortRateLimit == false\n| project recommendationId = \"d40c769d-2f08-4980-8d8f-a386946276e6\", name, id, tags, param1=strcat(\"enableDirectPortRateLimit: \",properties.enableDirectPortRateLimit)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "84e44da6-8cd7-b349-b02c-c8bf72cf587c", + "aprlGuid": "d2976d3e-294b-4b49-a1f0-c42566a3758f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/diagnostic-settings", + "name": "Diagnostic settings in Azure Monitor" } ], - "recommendationControl": "High Availability", - "longDescription": "Use Databricks and MLflow for deploying models as Spark UDFs for job scheduling, retries, autoscaling. Model serving offers scalable infrastructure, processes models using MLflow, and serves them via REST API using serverless compute managed in Databricks cloud.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Resource Logs are not collected and stored until you create a diagnostic setting and route them to one or more locations.\n", "pgVerified": true, - "description": "Configure jobs for automatic retries and termination", - "potentialBenefits": "Enhanced reliability and autoscaling", - "publishedToLearn": false, + "description": "Configure Diagnostic Settings for all network security groups", + "potentialBenefits": "Enhanced monitoring and security insights", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4cbb7744-ff3d-0447-badb-baf068c95696", + "aprlGuid": "8bb4a57b-55e4-d24e-9c19-2679d8bc779f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/activity-log?tabs=powershell", + "name": "Azure Monitor activity log" } ], - "recommendationControl": "Scalability", - "longDescription": "Use Databricks and MLflow for deploying models as Apache Spark UDFs, benefiting from job scheduling, retries, autoscaling, etc.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Create Alerts with Azure Monitor for operations like creating or updating Network Security Group rules to catch unauthorized/undesired changes to resources and spot attempts to bypass firewalls or access resources from the outside.\n", "pgVerified": true, - "description": "Use a scalable and production-grade model serving infrastructure", - "potentialBenefits": "Enhances scalability and reliability", - "publishedToLearn": false, + "description": "Monitor changes in Network Security Groups with Azure Monitor", + "potentialBenefits": "Enhanced security and change monitoring", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Network Security Groups without alerts for modification configured.\nresources\n| where type =~ \"Microsoft.Network/networkSecurityGroups\"\n| project name, id, tags, lowerCaseNsgId = tolower(id)\n| join kind = leftouter (\n resources\n | where type =~ \"Microsoft.Insights/activityLogAlerts\" and properties.enabled == true\n | mv-expand scope = properties.scopes\n | where scope has \"Microsoft.Network/networkSecurityGroups\"\n | project alertName = name, conditionJson = dynamic_to_json(properties.condition.allOf), scope\n | where conditionJson has '\"Administrative\"' and (\n // Create or Update Network Security Group\n (conditionJson has '\"Microsoft.Network/networkSecurityGroups/write\"') or\n // All administrative operations\n (conditionJson !has '\"Microsoft.Network/networkSecurityGroups/write\"' and conditionJson !has '\"Microsoft.Network/networkSecurityGroups/delete\"' and conditionJson !has '\"Microsoft.Network/networkSecurityGroups/join/action\"')\n )\n | project lowerCaseNsgIdOfScope = tolower(scope)\n )\n on $left.lowerCaseNsgId == $right.lowerCaseNsgIdOfScope\n| where isempty(lowerCaseNsgIdOfScope)\n| project recommendationId = \"8bb4a57b-55e4-d24e-9c19-2679d8bc779f\", name, id, tags, param1 = \"ModificationAlert: Not configured/Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1b0d0893-bf0e-8f4c-9dc6-f18f145c1ecf", + "aprlGuid": "52ac35e8-9c3e-f84d-8ce8-2fab955333d3", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/azure-resource-manager/management/lock-resources?toc=%2Fazure%2Fvirtual-network%2Ftoc.json&tabs=json", + "name": "Lock your resources to protect your infrastructure" } ], - "recommendationControl": "High Availability", - "longDescription": "Curate data by creating a layered architecture to increase data quality across layers. Start with a raw layer for ingested source data, continue with a curated layer for cleansed and refined data, and finish with a final layer catered to business needs, focusing on security and performance.\n", + "recommendationControl": "Governance", + "longDescription": "As an administrator, you can lock an Azure subscription, resource group, or resource to protect them from accidental deletions and modifications. The lock overrides user permissions. Locks can prevent either deletions or modifications and are known as Delete and Read-only in the portal.\n", "pgVerified": true, - "description": "Use a layered storage architecture", - "potentialBenefits": "Enhances data quality and trust", - "publishedToLearn": false, + "description": "Configure locks for Network Security Groups to avoid accidental changes and/or deletion", + "potentialBenefits": "Prevents accidental edits/deletions", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e93fe702-e385-d741-ba37-1f1656482ecd", + "aprlGuid": "da1a3c06-d1d5-a940-9a99-fcc05966fe7c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/network-watcher/network-watcher-nsg-flow-logging-overview", + "name": "Flow logging for network security groups" } ], - "recommendationControl": "Business Continuity", - "longDescription": "Copying data leads to redundancy, lost integrity, lineage, and access issues, affecting lakehouse data quality. Temporary copies are useful for agility and innovation but can become problematic operational data silos, questioning data's master status and currency.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Monitoring, managing, and understanding your network is crucial for protection and optimization. Knowing the current state, who and from where connections are made, open internet ports, expected and irregular behavior, and traffic spikes is essential.\n", "pgVerified": true, - "description": "Improve data integrity by reducing data redundancy", - "potentialBenefits": "Enhanced data integrity and quality", - "publishedToLearn": false, + "description": "Configure NSG Flow Logs", + "potentialBenefits": "Enhances security and optimizes network", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Network Security Groups without NSG Flow logs configured or disabled.\nresources\n| where type =~ \"Microsoft.Network/networkSecurityGroups\"\n| project name, id, tags, lowerCaseNsgId = tolower(id)\n| join kind = leftouter (\n resources\n | where type == \"microsoft.network/networkwatchers/flowlogs\" and properties.enabled == true\n | project flowLogName = name, lowerCaseTargetNsgId = tolower(properties.targetResourceId)\n )\n on $left.lowerCaseNsgId == $right.lowerCaseTargetNsgId\n| where isempty(lowerCaseTargetNsgId)\n| project recommendationId = \"da1a3c06-d1d5-a940-9a99-fcc05966fe7c\", name, id, tags, param1 = \"NSGFlowLog: Not configured/Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b7e1d13f-54c9-1648-8a52-34c0abe8ce16", + "aprlGuid": "8291c1fa-650c-b44b-b008-4deb7465919d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/virtual-network/network-security-groups-overview#security-rules", + "name": "Security rules" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Uncontrolled schema changes can lead to invalid data and failing jobs. Databricks validates and enforces schema through Delta Lake, which prevents bad records during ingestion, and Auto Loader, which detects new columns and supports schema evolution to maintain data integrity.\n", + "recommendationControl": "Security", + "longDescription": "Azure network security groups filter network traffic between resources in a virtual network, using security rules to allow or deny inbound or outbound traffic based on source, destination, port, and protocol.\n", "pgVerified": true, - "description": "Actively manage schemas", - "potentialBenefits": "Prevents invalid data and job failures", - "publishedToLearn": false, + "description": "The NSG only has Default Security Rules, make sure to configure the necessary rules", + "potentialBenefits": "Enhanced traffic control and security", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This query will return all NSGs that have NO security rules\nresources\n| where type =~ \"microsoft.network/networksecuritygroups\"\n| extend sr = string_size(properties.securityRules)\n| where sr <=2 or isnull(properties.securityRules)\n| project recommendationId = \"8291c1fa-650c-b44b-b008-4deb7465919d\", name, id\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a42297c4-7e4f-8b41-8d4b-114033263f0e", + "aprlGuid": "c72b7fee-1fa0-5b4b-98e5-54bcae95bb74", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#use-constraints-and-data-expectations", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/azure-firewall", + "name": "Azure Well Architected Framework - Azure Firewall" + }, + { + "url": "https://learn.microsoft.com/azure/firewall/deploy-availability-zone-powershell", + "name": "Deploy Azure Firewall across multiple availability zones" } ], - "recommendationControl": "Business Continuity", - "longDescription": "Delta tables verify data quality automatically with SQL constraints, triggering an error for violations. Delta Live Tables enhance this by defining expectations for data quality, utilizing Python or SQL, to manage actions for record failures, ensuring data integrity and compliance.\n", + "recommendationControl": "High Availability", + "longDescription": "Azure Firewall offers different SLAs depending on its deployment; in a single availability zone or across multiple, potentially improving reliability and performance.\n", "pgVerified": true, - "description": "Use constraints and data expectations", - "potentialBenefits": "Ensures data quality and integrity", - "publishedToLearn": false, + "description": "Deploy Azure Firewall across multiple availability zones", + "potentialBenefits": "Enhanced SLA and reliability", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/azureFirewalls", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// List all Azure Firewalls that are not configured with multiple availability zones or deployed without a zone\nresources\n| where type == 'microsoft.network/azurefirewalls'\n| where array_length(zones) <= 1 or isnull(zones)\n| where isempty(properties.virtualHub.id) or isnull(properties.virtualHub.id)\n| project recommendationId = \"c72b7fee-1fa0-5b4b-98e5-54bcae95bb74\", name, id, tags, param1=\"multipleZones:false\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "932d45d6-b46d-e341-abfb-d97bce832f1f", + "aprlGuid": "3c8fa7c6-6b78-a24a-a63f-348a7c71acb9", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#create-regular-backups", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkazurefirewalls", + "name": "Azure Firewall metrics supported in Azure Monitor" + }, + { + "url": "https://learn.microsoft.com/azure/firewall/firewall-performance", + "name": "Azure Firewall performance" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "To recover from a failure, regular backups are needed. The Databricks Labs project migrate lets admins create backups by exporting workspace assets using the Databricks CLI/API. These backups help in restoring or migrating workspaces.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Monitor Azure Firewall for overall health, processed throughput, and outbound SNAT port usage. Get alerted before limits impact services. Consider NAT gateway integration with zonal deployments; note limitations with zone redundant firewalls and secure virtual hub networks.\n", "pgVerified": true, - "description": "Create regular backups", - "potentialBenefits": "Ensures data recovery and migration", - "publishedToLearn": false, + "description": "Monitor Azure Firewall metrics", + "potentialBenefits": "Improve health and performance monitoring", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/azureFirewalls", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// List all Azure Firewalls resources in-scope, along with any metrics associated to Azure Monitor alert rules, that are not fully configured.\nresources\n| where type == \"microsoft.network/azurefirewalls\"\n| project firewallId = tolower(id), name, tags\n| join kind = leftouter (\n resources\n | where type == \"microsoft.insights/metricalerts\"\n | mv-expand properties.scopes\n | mv-expand properties.criteria.allOf\n | where properties_scopes contains \"azureFirewalls\"\n | project metricId = tolower(properties_scopes), monitoredMetric = properties_criteria_allOf.metricName, tags\n | summarize monitoredMetrics = make_list(monitoredMetric) by tostring(metricId)\n | project\n metricId,\n monitoredMetrics,\n allAlertsConfigured = monitoredMetrics contains(\"FirewallHealth\") and monitoredMetrics contains (\"Throughput\") and monitoredMetrics contains (\"SNATPortUtilization\")\n) on $left.firewallId == $right.metricId\n| extend alertsNotFullyConfigured = isnull(allAlertsConfigured) or not(allAlertsConfigured)\n| where alertsNotFullyConfigured\n| project recommendationId = \"c8fa7c6-6b78-a24a-a63f-348a7c71acb9\", name, id = firewallId, tags, param1 = strcat(\"MetricsAlerts:\", monitoredMetrics)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "12e9d852-5cdc-2743-bffe-ee21f2ef7781", + "aprlGuid": "1b2dbf4a-8a0b-5e4b-8f4e-3f758188910d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#recover-from-structured-streaming-query-failures", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/ddos-protection/ddos-protection-overview", + "name": "Azure DDoS Protection overview" } ], - "recommendationControl": "High Availability", - "longDescription": "Structured Streaming ensures fault-tolerance and data consistency in streaming queries. With Azure Databricks workflows, you can set up your queries to automatically restart after failure, picking up precisely where they left off.\n", + "recommendationControl": "Security", + "longDescription": "Associate a DDoS protection plan with the virtual network hosting Azure Firewall to provide enhanced mitigation against DDoS attacks. Azure Firewall Manager integrates the creation of firewall infrastructure and DDoS protection plans.\n", "pgVerified": true, - "description": "Recover from Structured Streaming query failures", - "potentialBenefits": "Fault-tolerance and auto-restart for queries", - "publishedToLearn": false, + "description": "Configure DDoS Protection on the Azure Firewall VNet", + "potentialBenefits": "Enhanced DDoS attack defense", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/azureFirewalls", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// List all in-scope Azure Firewall resources, where the VNet is not associated to a DDoS Protection Plan\nresources\n| where type =~ \"Microsoft.Network/azureFirewalls\"\n| where isempty(properties.virtualHub.id) or isnull(properties.virtualHub.id)\n| mv-expand ipConfig = properties.ipConfigurations\n| project\n name,\n firewallId = id,\n tags,\n vNetName = split(ipConfig.properties.subnet.id, \"/\", 8)[0],\n vNetId = tolower(substring(ipConfig.properties.subnet.id, 0, indexof(ipConfig.properties.subnet.id, \"/subnet\")))\n| join kind=fullouter (\n resources\n | where type =~ \"Microsoft.Network/ddosProtectionPlans\"\n | mv-expand vNet = properties.virtualNetworks\n | project ddosProtectionPlanId = id, vNetId = tolower(vNet.id)\n )\n on vNetId\n| where isempty(ddosProtectionPlanId)\n| project recommendationId = \"1b2dbf4a-8a0b-5e4b-8f4e-3f758188910d\", name, id = firewallId, tags, param1 = strcat(\"vNet: \", vNetName), param2 = \"ddosProtection: Disabled\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a18d60f8-c98c-ba4e-ad6e-2fac72879df1", + "aprlGuid": "3a63560a-1ed3-6140-acd1-d1d23f9a2e12", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#recover-etl-jobs-based-on-delta-time-travel", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/azure/firewall-manager/rule-hierarchy", + "name": "Azure Firewall Policy hierarchy" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Despite thorough testing, a production job can fail or yield unexpected data. Sometimes, repairs are done by adding jobs post-issue identification and pipeline correction.\n", + "recommendationControl": "Governance", + "longDescription": "Azure Firewall policy supports rule hierarchies for compliance enforcement, using a central base policy with higher priority over child policies, and employs Azure custom roles to safeguard base policy and manage access within subscriptions or groups.\n", "pgVerified": true, - "description": "Recover ETL jobs based on Delta time travel", - "potentialBenefits": "Easy rollback and fix for ETL jobs", - "publishedToLearn": false, + "description": "Leverage Azure Firewall policy inheritance model", + "potentialBenefits": "Enhanced compliance and rule hierarchy", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/azureFirewalls", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c0e22580-3819-444d-8546-a80e4ed85c83", + "aprlGuid": "d2e4a38e-2307-4299-a217-4c0cebc9a7f6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", - "name": "Best practices for reliability" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/service-guides/azure-firewall#recommendations", + "name": "Azure Well-Architected Framework review - Azure Firewall" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Databricks Workflows enable efficient error recovery in multi-task jobs by offering a matrix view for issue examination. Fixes can be applied to initiate repair runs targeting only failed and dependent tasks, preserving successful outcomes and thereby saving time and money.\n", - "pgVerified": true, - "description": "Use Databricks Workflows and built-in recovery", - "potentialBenefits": "Saves time and money with smart recovery", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Configure a minimum of two to four public IP addresses per Azure Firewall to avoid SNAT exhaustion. Azure Firewall offers SNAT for all outbound traffic to public IPs, providing 2,496 SNAT ports for each additional PIP.\n", + "pgVerified": false, + "description": "Configure 2-4 PIPs for SNAT Port utilization", + "potentialBenefits": "Avoids SNAT exhaustion.", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Network/azureFirewalls", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4fdb7112-4531-6f48-b60e-c917a6068d9b", + "aprlGuid": "8faace2d-a36e-425c-aa58-2ad99e3e0b7a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://github.com/Azure/AzureDatabricksBestPractices/tree/master", - "name": "Azure Databricks Best Practices" + "url": "https://learn.microsoft.com/azure/well-architected/service-guides/azure-firewall#recommendations", + "name": "Azure Well-Architected Framework review - Azure Firewall" + }, + { + "url": "https://learn.microsoft.com/azure/firewall/metrics", + "name": "Azure Firewall metrics overview" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Implementing a disaster recovery pattern is vital for Azure Databricks, ensuring data teams' access even during rare regional outages.\n\nIt is important to note that the Azure Databricks service is not entirely zone redudant and does support zonal failover.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Creating a metric to monitor latency probes over 20ms for periods longer than 30ms helps identify when firewall instance CPUs are stressed, potentially indicating issues.\n", "pgVerified": false, - "description": "Configure a disaster recovery pattern", - "potentialBenefits": "Ensures service continuity during disasters", - "publishedToLearn": false, + "description": "Monitor \"AZFW Latency Probe\" metric", + "potentialBenefits": "Improved CPU stress detection", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/azureFirewalls", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "42aedaa8-6151-424d-b782-b8666c779969", + "aprlGuid": "b89c9acc-0aba-fb44-9ff2-3dbfcf97dce7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/operational-excellence/best-practices#2-automate-deployments-and-workloads", - "name": "Best practices for operational excellence" + "url": "https://learn.microsoft.com/azure/private-link/manage-private-endpoint?tabs=manage-private-link-powershell#private-endpoint-connections", + "name": "Private endpoint connections" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "The Databricks Terraform provider manages Azure Databricks workspaces and cloud infrastructure flexibly and powerfully.\n", - "pgVerified": false, - "description": "Automate deployments and workloads", - "potentialBenefits": "Efficient, reliable automation", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "A private endpoint has two custom properties, static IP address and the network interface name, which must be set at creation. If not in Succeeded state, there may be issues with the endpoint or associated resource.\n", + "pgVerified": true, + "description": "Resolve issues with Private Endpoints in non Succeeded connection state", + "potentialBenefits": "Ensure connection availability", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/privateEndpoints", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This query will return all Private Endpoints that are not in a Succeeded state\nresources\n| where type =~ \"microsoft.network/privateendpoints\"\n| where (properties.provisioningState =~ \"Succeeded\" and (properties.privateLinkServiceConnections[0].properties.provisioningState =~ \"Succeeded\" or properties.manualPrivateLinkServiceConnections[0].properties.provisioningState =~ \"Succeeded\")) == false\n| project recommendationId = \"b89c9acc-0aba-fb44-9ff2-3dbfcf97dce7\", name, id, tags, param1 = strcat(\"provisioningState: \", tostring(properties.provisioningState)), param2 = strcat(\"provisioningState: \", tostring(properties.privateLinkServiceConnections[0].properties.provisioningState)), param3 = strcat(\"manualProvisioningState: \", tostring(properties.manualPrivateLinkServiceConnections[0].properties.provisioningState))\n" }, { "publishedToAdvisor": null, - "aprlGuid": "20193ff9-dbcd-a74e-b197-71d7d9d3c1e6", + "aprlGuid": "f05a3e6d-49db-2740-88e2-2b13706c1f67", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/operational-excellence/best-practices#system-monitoring", - "name": "Best practices for operational excellence" + "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-monitoring", + "name": "Azure Traffic Manager endpoint monitoring" + }, + { + "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-monitoring#enable-or-disable-health-checks-preview", + "name": "Enable or disable health checks" + }, + { + "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-troubleshooting-degraded", + "name": "Troubleshooting degraded state on Azure Traffic Manager" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "The Databricks Terraform provider is a flexible, powerful tool for managing Azure Databricks workspaces and cloud infrastructure.\n", - "pgVerified": false, - "description": "Set up monitoring, alerting, and logging", - "potentialBenefits": "Enhanced reliability and automation", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Monitor status should be online to ensure failover for application workload. If Traffic Manager's health shows Degraded, one or more endpoints may also be Degraded.\n", + "pgVerified": true, + "description": "Traffic Manager Monitor Status Should be Online", + "potentialBenefits": "Ensures failover functionality", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find traffic manager profiles that have an endpoint monitor status of not 'Online'\nresources\n| where type == \"microsoft.network/trafficmanagerprofiles\"\n| mv-expand properties.endpoints\n| where properties_endpoints.properties.endpointMonitorStatus != \"Online\"\n| project recommendationId = \"f05a3e6d-49db-2740-88e2-2b13706c1f67\", name, id, tags, param1 = strcat('Profile name: ',properties_endpoints.name), param2 = strcat('endpointMonitorStatus: ', properties_endpoints.properties.endpointMonitorStatus)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "397cdebb-9d6e-ab4f-83a1-8c481de0a3a7", - "recommendationTypeId": null, + "aprlGuid": "5b422a7f-8caa-3d48-becb-511599e5bba9", + "recommendationTypeId": "6cd70072-c45c-4716-bf7b-b35c18e46e72", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://github.com/Azure/AzureDatabricksBestPractices/blob/master/toc.md#deploy-workspaces-in-multiple-subscriptions-to-honor-azure-capacity-limits", - "name": "Azure Databricks Best Practices" + "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-endpoint-types", + "name": "Traffic Manager Endpoint Types" } ], - "recommendationControl": "Scalability", - "longDescription": "Customers often naturally divide workspaces by teams or departments. However, it's crucial to also consider Azure Subscription and Azure Databricks (ADB) Workspace limits when partitioning.\n", - "pgVerified": false, - "description": "Deploy workspaces in separate Subscriptions", - "potentialBenefits": "Enhanced limits management, team separation", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "When configuring the Azure traffic manager, provision at least two endpoints to ensure workloads can fail-over to another instance, enhancing reliability and availability.\n", + "pgVerified": true, + "description": "Traffic manager profiles should have more than one endpoint", + "potentialBenefits": "Enhances failover capabilities", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find traffic manager profiles that have less than 2 endpoints\nresources\n| where type == \"microsoft.network/trafficmanagerprofiles\"\n| where array_length(properties.endpoints) < 2\n| project recommendationId = \"5b422a7f-8caa-3d48-becb-511599e5bba9\", name, id, tags, param1 = strcat('EndpointCount: ', array_length(properties.endpoints))\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5e722c4f-415a-9b4c-bd4c-96b74dce29ad", - "recommendationTypeId": null, + "aprlGuid": "1ad9d7b7-9692-1441-a8f4-93792efbe97a", + "recommendationTypeId": "0db76759-6d22-4262-93f0-2f989ba2b58e", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://github.com/Azure/AzureDatabricksBestPractices/blob/master/toc.md#consider-isolating-each-workspace-in-its-own-vnet", - "name": "Azure Databricks Best Practices" + "url": "https://learn.microsoft.com/azure/advisor/advisor-reference-reliability-recommendations#add-at-least-one-more-endpoint-to-the-profile-preferably-in-another-azure-region", + "name": "Reliability recommendations" } ], - "recommendationControl": "Scalability", - "longDescription": "Deploying only one Databricks Workspace per VNet aligns with Azure Databricks' isolation model.\n", - "pgVerified": false, - "description": "Isolate each workspace in its own VNet", - "potentialBenefits": "Enhanced security and resource isolation", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Profiles should have multiple endpoints to ensure availability in case an endpoint fails. It's also advised to distribute these endpoints across different regions for enhanced reliability.\n", + "pgVerified": true, + "description": "Configure at least one endpoint within a another region", + "potentialBenefits": "Enhances availability across regions", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "14310ba6-77ad-3641-a2db-57a2218b9bc7", - "recommendationTypeId": null, + "aprlGuid": "c31f76a0-48cd-9f44-aa43-99ee904db9bc", + "recommendationTypeId": "0bbe0a49-3c63-49d3-ab4a-aa24198f03f7", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://github.com/Azure/AzureDatabricksBestPractices/blob/master/toc.md#do-not-store-any-production-data-in-default-dbfs-folders", - "name": "Azure Databricks Best Practices" + "url": "https://learn.microsoft.com/azure/advisor/advisor-reference-reliability-recommendations#add-an-endpoint-configured-to-all-world", + "name": "Add an endpoint configured to \"All (World)\"" + }, + { + "url": "https://aka.ms/Rf7vc5", + "name": "Traffic Manager profile - GeographicProfile (Add an endpoint configured to \"\"All (World)\"\")." } ], - "recommendationControl": "High Availability", - "longDescription": "Driven by security and data availability concerns, each Azure Databricks Workspace comes with a default DBFS designed for system-level artifacts like libraries and Init scripts, not for production data.\n", - "pgVerified": false, - "description": "Do not Store any Production Data in Default DBFS Folders", - "potentialBenefits": "Enhanced security, data protection", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "For geographic routing, traffic is directed to endpoints based on specific regions. If a region fails, without a predefined failover, configuring an endpoint to \"All (World)\" for geographic profiles can prevent traffic black holes, ensuring service remains available.\n", + "pgVerified": true, + "description": "Ensure endpoint configured to (All World) for geographic profiles", + "potentialBenefits": "Avoids traffic black holing, ensures availability", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Traffic Manager resources that are not confirgured for all-World access\nResources\n| where type == 'microsoft.network/trafficmanagerprofiles'\n| where properties.trafficRoutingMethod =~ \"Geographic\"\n| extend endpoints = properties.endpoints\n| mv-expand endpoint = endpoints\n| where endpoint.properties.geoMapping !contains \"WORLD\"\n| extend endpointName = endpoint.name\n| project recommendationId=\"c31f76a0-48cd-9f44-aa43-99ee904db9bc\", name, id, tags, param1=strcat(\"endpointName:\",endpointName), param2=strcat(\"GeoMapping:\", tostring(endpoint.properties.geoMapping))\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b5af7e26-3939-1b48-8fba-f8d4a475c67a", + "aprlGuid": "9987c813-d687-4163-a511-95f31bc5e536", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/spot-vms", - "name": "Use Azure Spot Virtual Machines" + "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering", + "name": "Designing for disaster recovery with ExpressRoute private peering" } ], "recommendationControl": "High Availability", - "longDescription": "Azure Spot VMs are not suitable for critical production workloads needing high availability and reliability. They are meant for fault-tolerant tasks and can be evicted with 30-seconds notice if Azure needs the capacity, with no SLA guarantees.\n", + "longDescription": "To increase reliability, it's advised that each v-Hub's ExpressRoute gateway connects to at least two circuits, with each circuit originating from a different peering location than the other, ensuring diverse connectivity paths for enhanced resilience.", "pgVerified": false, - "description": "Do not use Azure Spot VMs for critical Production workloads", - "potentialBenefits": "Ensures high reliability for production", - "publishedToLearn": false, + "description": "Connect v-Hub's ExpressRoute gateway to circuits from diverse peering locations for resilience", + "potentialBenefits": "Enhance resiliency for Azure Service", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/expressRouteGateways", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8aa63c34-dd9d-49bd-9582-21ec310dfbdd", + "aprlGuid": "17e8d380-e4b4-41a1-9b37-2e4df9fd5125", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/databricks/resources/supported-regions#--azure-databricks-control-plane-addresses", - "name": "Azure Databricks control plane addresses" - }, - { - "url": "https://github.com/databrickslabs/migrate", - "name": "Migrate - maintained by Databricks Inc." - }, - { - "url": "https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/experimental-exporter", - "name": "Databricks Terraform Exporter - maintained by Databricks Inc. (Experimental)" + "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#expressroute-gateway", + "name": "Virtual WAN Monitoring Best Practices" } ], - "recommendationControl": "High Availability", - "longDescription": "Move workspaces to in-region control plane for increased regional isolation. Identify current control plane region using the workspace URL and nslookup. When region from CNAME differs from workspace region and an in-region control is available, consider migration using tools provided below.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Set up monitoring and alerts for Virtual WAN ExpressRoute Gateway. Create alert rule for ensuring promptly response to critical events such as exceeding packets per second, exceeding BGP routes prefixes, Gateway overutilization and high frequency in route changes.", "pgVerified": false, - "description": "Evaluate regional isolation for workspaces", - "potentialBenefits": "Improves resilience and data sovereignty", - "publishedToLearn": false, + "description": "Monitor health for v-Hub's ExpressRoute gateway", + "potentialBenefits": "Detection and mitigation to avoid disruptions.", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/expressRouteGateways", "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "028593be-956e-4736-bccf-074cb10b92f4", + "aprlGuid": "ae054bf2-aefa-cf4a-8282-741194cef8da", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/databricks/compute/cluster-config-best-practices", - "name": "Compute configuration best practices" - }, - { - "url": "https://learn.microsoft.com/azure/databricks/compute/gpu", - "name": "GPU-enabled compute" + "url": "https://learn.microsoft.com/en-us/azure/ddos-protection/monitor-ddos-protection-reference", + "name": "Monitoring Azure DDoS Protection" } ], - "recommendationControl": "Personalized", - "longDescription": "Azure Databricks planning should include VM SKU swap strategies for capacity issues. VMs are regional, and allocation failures may occur, shown by a \"CLOUD PROVIDER\" error.\n", - "pgVerified": false, - "description": "Define alternate VM SKUs", - "potentialBenefits": "Ensures service availability", - "publishedToLearn": false, + "recommendationControl": "Security", + "longDescription": "Azure DDoS Plan metrics differentiate packets and bytes by tags: Dropped (packets scrubbed by DDoS), Forwarded (packets to VIP not filtered), and No tag (total packets, sum of dropped and forwarded).\n", + "pgVerified": true, + "description": "Monitor Azure DDoS Protection Plan metrics", + "potentialBenefits": "Enhanced security and traffic insight", "tags": null, - "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationResourceType": "Microsoft.Network/ddosProtectionPlans", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "88856605-53d8-4bbd-a75b-4a7b14939d32", + "aprlGuid": "f6a14b32-a727-4ace-b5fa-7b1c6bdff402", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/mysql/flexible-server/concepts-high-availability", - "name": "High availability concepts in Azure Database for MySQL - Flexible Server" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/about-fastpath", + "name": "About ExpressRoute FastPath" } ], - "recommendationControl": "High Availability", - "longDescription": "Enable HA with zone redundancy on flexible server instances to deploy a standby replica in a different zone, offering automatic failover capability for improved reliability and disaster recovery.\n", + "recommendationControl": "Scalability", + "longDescription": "ExpressRoute gateways facilitate network traffic and route exchanges. FastPath enhances on-premises to virtual network data path performance by directing traffic straight to virtual machines, bypassing the gateway for improved resiliency through reduced gateway utilization.\n", "pgVerified": true, - "description": "Enable HA with zone redundancy", - "potentialBenefits": "Enhanced uptime and data protection", - "publishedToLearn": false, + "description": "For better data path performance enable FastPath on ExpressRoute Direct and Gateway", + "potentialBenefits": "Enhances speed and resiliency", "tags": null, - "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for MySQL instances that are not zone redundant\r\nresources\r\n| where type == \"microsoft.dbformysql/flexibleservers\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where properties.highAvailability.mode != \"ZoneRedundant\"\r\n| project recommendationId = \"88856605-53d8-4bbd-a75b-4a7b14939d32\", name, id, tags, param1 = \"ZoneRedundant: False\"\r\n" + "recommendationResourceType": "Microsoft.Network/connections", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "82a9a0f2-24ee-496f-9ad2-25f81710942d", + "aprlGuid": "a5f3a4bd-4cf1-4196-a3cb-f5a0876198b2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/mysql/flexible-server/concepts-maintenance", - "name": "Scheduled maintenance in Azure Database for MySQL - Flexible Server" + "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources?tabs=json", + "name": "Protect your Azure resources with a lock - Azure Resource Manager | Microsoft Learn" } ], - "recommendationControl": "Scalability", - "longDescription": "Use custom maintenance schedule on flexible server instances to select a preferred time for service updates to be applied.\n", + "recommendationControl": "High Availability", + "longDescription": "Configure an Azure Resource lock for Gateway Connection resources to prevent accidental deletion and maintain connectivity between on-premises networks and Azure workloads.\n", "pgVerified": true, - "description": "Enable custom maintenance schedule", - "potentialBenefits": "Control update timings", - "publishedToLearn": false, + "description": "Configure an Azure Resource Lock on connections to prevent accidental deletion", + "potentialBenefits": "Prevents accidental deletion of connections", "tags": null, - "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationResourceType": "Microsoft.Network/connections", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for MySQL instances that do not have a custom maintenance window\r\nresources\r\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\r\n| where properties.maintenanceWindow.customWindow != \"Enabled\"\r\n| project recommendationId = \"82a9a0f2-24ee-496f-9ad2-25f81710942d\", name, id, tags, param1 = strcat(\"customWindow:\", properties['maintenanceWindow']['customWindow'])\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5c96afc3-7d2e-46ff-a4c7-9c32850c441b", + "aprlGuid": "f0d4f766-ac19-48c4-b228-4601cc038baa", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-backup-restore", - "name": "Backup and restore in Azure Database for MySQL - Flexible Server" + "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#virtual-wan-gateways", + "name": "Virtual WAN Monitoring Best Practices" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Configure GRS to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", - "pgVerified": true, - "description": "Configure geo redundant backup storage", - "potentialBenefits": "Recover from regional failure and/or disaster", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Set up monitoring and alerts for v-Hub's VPN Gateway. Create alert rule for ensuring promptly response to critical events such as packet drop counts, BGP status, Gateway overutilization.", + "pgVerified": false, + "description": "Monitor gateway for Site-to-site v-Hub's VPN gateway", + "potentialBenefits": "Detection and mitigation to avoid disruptions.", "tags": null, - "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationResourceType": "Microsoft.Network/vpnGateways", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for MySQL instances that do not have geo redundant backup storage enabled\r\nresources\r\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\r\n| where properties.backup.geoRedundantBackup != \"Enabled\"\r\n| project recommendationId = \"5c96afc3-7d2e-46ff-a4c7-9c32850c441b\", name, id, tags, param1 = strcat(\"geoRedundantBackup:\", properties['backup']['geoRedundantBackup'])\r\n" + "automationAvailable": false, + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b49a8653-cc43-48c9-8513-a2d2e3f14dd1", + "aprlGuid": "4e133bd0-8762-bc40-a95b-b29142427d73", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-read-replicas", - "name": "Read replicas in Azure Database for MySQL - Flexible Server" + "url": "https://learn.microsoft.com/azure/network-watcher/network-watcher-overview", + "name": "What is Azure Network Watcher?" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Configure one or more read replicas to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Azure Network Watcher offers tools for monitoring, diagnosing, viewing metrics, and managing logs for IaaS resources. It helps maintain the health of VMs, VNets, application gateways, load balancers, but not for PaaS or Web analytics.\n", "pgVerified": true, - "description": "Configure one or more read replicas", - "potentialBenefits": "Recover from regional failure and/or disaster", - "publishedToLearn": false, + "description": "Deploy Network Watcher in all regions where you have networking services", + "potentialBenefits": "Enhanced monitoring and diagnostics for Azure IaaS", "tags": null, - "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/networkWatchers", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for MySQL instances that do not have a read replica configured\r\nresources\r\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\r\n| where properties.replicationRole == \"None\"\r\n| project recommendationId = \"b49a8653-cc43-48c9-8513-a2d2e3f14dd1\", name, id, tags, param1 = strcat(\"replicationRole:\", properties['replicationRole'])\r\n" + "query": "// Azure Resource Graph Query\n// This query will return all locations that do not have a Network Watcher deployed\nresources\n| where location != \"global\"\n| union (Resources\n | where type =~ \"microsoft.network/networkwatchers\")\n| summarize NetworkWatcherCount = countif(type =~ 'Microsoft.Network/networkWatchers') by location\n| where NetworkWatcherCount == 0\n| project recommendationId = \"4e133bd0-8762-bc40-a95b-b29142427d73\", name=location, id=\"n/a\", param1 = strcat(\"LocationMisingNetworkWatcher:\", location)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8176a79d-8645-4e52-96be-a10fc0204fe5", + "aprlGuid": "22a769ed-0ecb-8b49-bafe-8f52e6373d9c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-service-tiers-storage#storage-auto-grow", - "name": "Azure Database for MySQL - Flexible Server service tiers - Storage auto grow" + "url": "https://learn.microsoft.com/azure/network-watcher/nsg-flow-logging", + "name": "Manage NSG flow logs using the Azure portal" } ], - "recommendationControl": "Scalability", - "longDescription": "Configure storage auto-grow to prevent the server from running out of storage and becoming read-only.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Network security group flow logging is a feature of Azure Network Watcher that logs IP traffic info through a network security group. If in Failed state, monitoring data from the associated resource is not collected.\n", "pgVerified": true, - "description": "Configure storage auto-grow", - "potentialBenefits": "Scale storage automatically to meet increasing demand", - "publishedToLearn": false, + "description": "Fix Flow Log configurations in Failed state or Disabled Status", + "potentialBenefits": "Ensures IP traffic logging", "tags": null, - "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/networkWatchers", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for MySQL instances that do not have a storage auto-grow\r\nresources\r\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\r\n| where properties.storage.autoGrow != \"Enabled\"\r\n| project recommendationId = \"8176a79d-8645-4e52-96be-a10fc0204fe5\", name, id, tags, param1 = strcat(\"autoGrow:\", properties['storage']['autoGrow'])\r\n" + "query": "// Azure Resource Graph Query\n// This query will return all Network Watcher Flow Logs that are not enabled or in a succeeded state\nresources\n| where type =~ \"microsoft.network/networkwatchers/flowlogs\" and isnotnull(properties)\n| extend targetResourceId = tostring(properties.targetResourceId)\n| extend status = iff(properties.enabled =~ 'true', \"Enabled\", \"Disabled\")\n| extend provisioningState = tostring(properties.provisioningState)\n| extend flowLogType = iff(properties.targetResourceId contains \"Microsoft.Network/virtualNetworks\", 'Virtual network', 'Network security group')\n| where provisioningState != \"Succeeded\" or status != \"Enabled\"\n| project recommendationId = \"22a769ed-0ecb-8b49-bafe-8f52e6373d9c\", name, id, tags, param1 = strcat(\"provisioningState:\", provisioningState), param2=strcat(\"Status:\", status), param3=strcat(\"targetResourceId:\",targetResourceId), param4=strcat(\"flowLogType:\",flowLogType)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ca87914f-aac4-4783-ab67-82a6f936f194", + "aprlGuid": "1e28bbc1-1eb7-486f-8d7f-93943f40219c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/postgresql/flexible-server/concepts-high-availability", - "name": "Overview of high availability with Azure Database for PostgreSQL" + "url": "https://learn.microsoft.com/en-us/azure/network-watcher/connection-monitor-overview", + "name": "Connection monitor overview" } ], - "recommendationControl": "High Availability", - "longDescription": "Enable HA with zone redundancy on flexible server instances to deploy a standby replica in a different zone, offering automatic failover capability for improved reliability and disaster recovery.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Improves monitoring for Azure and Hybrid connectivity\n", "pgVerified": true, - "description": "Enable HA with zone redundancy", - "potentialBenefits": "Enhanced uptime and data protection", - "publishedToLearn": false, + "description": "Configure Network Watcher Connection monitor", + "potentialBenefits": "Improves monitoring for Azure and Hybrid connectivity", "tags": null, - "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", + "recommendationResourceType": "Microsoft.Network/networkWatchers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for PostgreSQL instances that are not zone redundant\r\nresources\r\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where properties.highAvailability.mode != \"ZoneRedundant\"\r\n| project recommendationId = \"ca87914f-aac4-4783-ab67-82a6f936f194\", name, id, tags, param1 = \"ZoneRedundant: False\"\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b2bad57d-7e03-4c0f-9024-597c9eb295bb", + "aprlGuid": "a1317a0b-402d-4604-be40-a25a004ba171", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/postgresql/flexible-server/concepts-maintenance", - "name": "Scheduled maintenance in Azure Database for PostgreSQL - Flexible Server" + "url": "https://learn.microsoft.com/en-us/azure/network-watcher/nsg-flow-logs-overview", + "name": "Flow logging for network security groups" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/network-watcher/vnet-flow-logs-overview", + "name": "Virtual network flow logs" } ], - "recommendationControl": "Scalability", - "longDescription": "Use custom maintenance schedule on flexible server instances to select a preferred time for service updates to be applied.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Improves monitoring and security for Azure and Hybrid connectivity\n", "pgVerified": true, - "description": "Enable custom maintenance schedule", - "potentialBenefits": "Control update timings", - "publishedToLearn": false, + "description": "Enable Network Security Group and Virtual Network Flow Logs", + "potentialBenefits": "Improves monitoring and security for Azure connectivity", "tags": null, - "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", + "recommendationResourceType": "Microsoft.Network/networkWatchers", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for PostgreSQL instances that do not have a custom maintenance window\r\nresources\r\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\r\n| where properties.maintenanceWindow.customWindow != \"Enabled\"\r\n| project recommendationId = \"b2bad57d-7e03-4c0f-9024-597c9eb295bb\", name, id, tags, param1 = strcat(\"customWindow:\", properties['maintenanceWindow']['customWindow'])\r\n" + "automationAvailable": false, + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "31f4ac4b-29cb-4588-8de2-d8fe6f13ceb3", + "aprlGuid": "bf0b7dbd-016d-458c-af99-70fcb03ad451", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-backup-restore", - "name": "Backup and restore in Azure Database for PostgreSQL - Flexible Server" + "url": "https://learn.microsoft.com/en-us/azure/network-watcher/traffic-analytics", + "name": "Network Watcher traffic analytics" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Configure GRS to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Improves monitoring, security and troubleshooting for Azure and Hybrid connectivity\n", "pgVerified": true, - "description": "Configure geo redundant backup storage", - "potentialBenefits": "Recover from regional failure and/or disaster", - "publishedToLearn": false, + "description": "Enable traffic analytics in Network Security Group and Virtual Network Flow Logs configuration.", + "potentialBenefits": "Improves monitoring, security and troubleshooting.", "tags": null, - "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", + "recommendationResourceType": "Microsoft.Network/networkWatchers", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for PostgreSQL instances that do not have geo redundant backup storage configured\r\nresources\r\n| where type == \"microsoft.dbforpostgresql/flexibleservers\"\r\n| where properties.backup.geoRedundantBackup != \"Enabled\"\r\n| project recommendationId = \"31f4ac4b-29cb-4588-8de2-d8fe6f13ceb3\", name, id, tags, param1 = strcat(\"geoRedundantBackup:\", properties['backup']['geoRedundantBackup'])\r\n" + "automationAvailable": false, + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2ab85a67-26be-4ed2-a0bb-101b2513ec63", - "recommendationTypeId": null, + "aprlGuid": "823b0cff-05c0-2e4e-a1e7-9965e1cfa16f", + "recommendationTypeId": "c9c9750b-9ddb-436f-b19a-9c725539a0b5", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/concepts-read-replicas", - "name": "Read replicas in Azure Database for PostgreSQL - Flexible Server" + "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-autoscaling-zone-redundant#autoscaling-and-high-availability", + "name": "Application Gateway Autoscaling Zone-Redundant" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Configure one or more read replicas to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", + "recommendationControl": "Scalability", + "longDescription": "Azure Application Gateways v2 are always deployed in a highly available fashion with multiple instances by default. Enabling autoscale ensures the service is not reliant on manual intervention for scaling.\n", "pgVerified": true, - "description": "Configure one or more read replicas", - "potentialBenefits": "Recover from regional failure and/or disaster", - "publishedToLearn": false, + "description": "Ensure Autoscale feature has been enabled", + "potentialBenefits": "Enhances uptime and enables autoscaling", "tags": null, - "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Database for PostgreSQL instances that do not have read replicas\r\nresources\r\n| where type == \"microsoft.dbforpostgresql/flexibleservers\" and properties.replicationRole == \"AsyncReplica\"\r\n| project replicaServerId = id, id = tostring(properties.sourceServerResourceId)\r\n| join kind=fullouter (resources | where type == \"microsoft.dbforpostgresql/flexibleservers\" and properties.replicationRole != \"AsyncReplica\") on id\r\n| where isempty(replicaServerId)\r\n| project recommendationId = \"2ab85a67-26be-4ed2-a0bb-101b2513ec63\", name, id = id1, tags, param1 = strcat(\"replicationRole:\", properties['replicationRole'])\r\n" + "query": "// Azure Resource Graph Query\n// This query will return all Application Gateways that do not have autoscale enabled or have a min capacity of 1\nresources\n| where type =~ \"microsoft.network/applicationGateways\"\n| where isnull(properties.autoscaleConfiguration) or properties.autoscaleConfiguration.minCapacity <= 1\n| project recommendationId = \"823b0cff-05c0-2e4e-a1e7-9965e1cfa16f\", name, id, tags, param1 = \"autoScaleConfiguration: isNull or MinCapacity <= 1\"\n| order by id asc\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6293a3cc-6b4a-4c0f-9ea7-b8ae8d7dd3d5", + "aprlGuid": "233a7008-71e9-e745-923e-1a1c7a0b92f3", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/postgresql/flexible-server/how-to-auto-grow-storage-portal", - "name": "Storage autogrow using Azure portal in Azure Database for PostgreSQL - Flexible Server" + "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway#security", + "name": "Application Gateway Security" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/ssl-overview", + "name": "Application Gateway SSL Overview" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-ssl-policy-overview", + "name": "Application Gateway SSL Policy Overview" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/key-vault-certs", + "name": "Application Gateway KeyVault Certs" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/ssl-certificate-management", + "name": "Application Gateway SSL Cert Management" } ], - "recommendationControl": "Scalability", - "longDescription": "Configure storage auto-grow to prevent the server from running out of storage and becoming read-only.\n", - "pgVerified": false, - "description": "Configure storage auto-grow", - "potentialBenefits": "Scale storage automatically to meet increasing demand", - "publishedToLearn": false, + "recommendationControl": "Security", + "longDescription": "Secure all incoming connections using HTTPS for production services with end-to-end SSL/TLS or SSL/TLS termination at the Application Gateway to protect against attacks and ensure data remains private and encrypted between the web server and browsers.\n", + "pgVerified": true, + "description": "Secure all incoming connections with SSL", + "potentialBenefits": "Enhanced security and privacy", "tags": null, - "recommendationResourceType": "Microsoft.DBforPostgreSQL/flexibleServers", + "recommendationResourceType": "Microsoft.Network/applicationGateways", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// You can use the following Azure Resource Graph query to check if an HTTP rule is using an SSL certificate or is using Azure Key Vault to store the certificates\nresources\n| where type =~ \"microsoft.network/applicationGateways\"\n| mv-expand frontendPorts = properties.frontendPorts\n| mv-expand httpListeners = properties.httpListeners\n| where isnull(parse_json(httpListeners.properties.sslCertificate))\n| project recommendationId=\"233a7008-71e9-e745-923e-1a1c7a0b92f3\", name, id, tags, param1=strcat(\"frontendPort: \", frontendPorts.properties.port), param2=\"tls: false\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "013ac34e-7c4b-425f-9e0c-216f0cc06181", - "recommendationTypeId": null, + "aprlGuid": "8d9223c4-730d-ca47-af88-a9a024c37270", + "recommendationTypeId": "efe75f01-6fff-5d9d-08e6-092b98d3fb3f", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/configure-validation-environment?tabs=azure-portal", - "name": "Configure a host pool as a validation environment" + "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway", + "name": "Well-Architected Framework Application Gateway Overview" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/features#web-application-firewall", + "name": "Application Gateway - Web Application Firewall" } ], - "recommendationControl": "Governance", - "longDescription": "Validation host pools let you monitor service updates before the service applies them to your standard or non-validation environment.\n", + "recommendationControl": "Security", + "longDescription": "Use Application Gateway with Web Application Firewall (WAF) in an application virtual network to safeguard inbound HTTP/S internet traffic. WAF offers centralized defense against potential exploits through OWASP core rule sets-based rules.\n", "pgVerified": true, - "description": "Create a validation host pool", - "potentialBenefits": "Enhanced environment stability", - "publishedToLearn": false, + "description": "Enable Web Application Firewall policies", + "potentialBenefits": "Enhanced security for HTTP/S traffic", "tags": null, - "recommendationResourceType": "Microsoft.DesktopVirtualization/hostPools", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n" + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This query will return all Application Gateways that do not have WAF enabled\nResources\n| where type =~ \"microsoft.network/applicationGateways\"\n| where properties.firewallpolicy != \"\"\n| project recommendationId = \"8d9223c4-730d-ca47-af88-a9a024c37270\", name, id, tags, param1 = \"webApplicationFirewallConfiguration: isNull\"\n| order by id asc\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "979ff8be-5f3a-4d8e-9aa3-407ecdd6d6f7", - "recommendationTypeId": null, + "aprlGuid": "7893f0b3-8622-1d47-beed-4b50a19f7895", + "recommendationTypeId": "0e19257e-dcef-4d00-8de1-5fe1ae0fd948", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/scheduled-agent-updates", - "name": "Scheduled Agent Updates for Azure Virtual Desktop host pools" + "url": "https://learn.microsoft.com/azure/application-gateway/overview-v2", + "name": "Application Gateway Overview V2" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/overview-v2#feature-comparison-between-v1-sku-and-v2-sku", + "name": "Application Gateway Feature Comparison Between V1 and V2" + }, + { + "url": "https://azure.microsoft.com/updates/application-gateway-v1-will-be-retired-on-28-april-2026-transition-to-application-gateway-v2/", + "name": "Application Gateway V1 Retirement" } ], - "recommendationControl": "Governance", - "longDescription": "Create up to two maintenance windows for the Azure Virtual Desktop agent, side-by-side stack, and Geneva Monitoring agent to get updated so that updates don't happen during peak business hours.\n", + "recommendationControl": "Scalability", + "longDescription": "Use Application Gateway v2 for built-in features like autoscaling, static VIPs, Azure KeyVault integration for better traffic management and performance, unless v1 is necessary.\n", "pgVerified": true, - "description": "Configure host pool scheduled agent updates", - "potentialBenefits": "Enhanced environment stability", - "publishedToLearn": false, + "description": "Migrate to Application Gateway v2", + "potentialBenefits": "Better performance, autoscaling, more features", "tags": null, - "recommendationResourceType": "Microsoft.DesktopVirtualization/hostPools", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// Azure Resource Graph Query\r\n// This resource graph query will return all AVD host pools that does not have scheduled agent updates configured\r\nresources\r\n| where type =~ \"Microsoft.DesktopVirtualization/hostpools\"\r\n| where isnull(properties.agentUpdate)\r\n| project recommendationId = \"979ff8be-5f3a-4d8e-9aa3-407ecdd6d6f7\", name, id, tags, param1 = 'No scheduled agent updates'\r\n" + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Get all Application Gateways, which are using the deprecated V1 SKU\nresources\n| where type =~ 'microsoft.network/applicationgateways'\n| extend tier = properties.sku.tier\n| where tier == 'Standard' or tier == 'WAF'\n| project recommendationId = \"7893f0b3-8622-1d47-beed-4b50a19f7895\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "939cb85c-102a-4e0a-ab82-5c92116d3778", + "aprlGuid": "5d035919-898d-a047-8d5d-454e199692e5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/windows-server/identity/ad-ds/deploy/virtual-dc/adds-on-azure-vm#configure-the-vms-and-install-active-directory-domain-services", - "name": "Configure the VMs and install Active Directory Domain Services" + "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-metrics", + "name": "Application Gateway Metrics" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-diagnostics", + "name": "Application Gateway Diagnostics" } ], - "recommendationControl": "Governance", - "longDescription": "Place domain joined session hosts VMs in unique OUs. Segregate Prod and DR units for environment-specific settings. This ensures targeted configurations for session hosts, including FSLogix, session controls, etc.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Enable logging in storage accounts, Log Analytics, and monitoring services for auditing and insights. If using NSGs, enable NSG flow logs to be stored, providing in-depth traffic analysis into Azure Cloud.\n", "pgVerified": true, - "description": "Ensure a unique OU is used when deploying host pools with domain joined session hosts", - "potentialBenefits": "Improved AVD hostpool config & segmentation", - "publishedToLearn": false, + "description": "Monitor and Log the configurations and traffic", + "potentialBenefits": "Enhanced traffic insight and audit", "tags": null, - "recommendationResourceType": "Microsoft.DesktopVirtualization/hostPools", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "38721758-2cc2-4d6b-b7b7-8b47dadbf7df", + "aprlGuid": "847a8d88-21c4-bc48-a94e-562206edd767", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/site-recovery/site-recovery-overview", - "name": "About Site Recovery" + "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-probe-overview", + "name": "Application Gateway Probe Overview" + }, + { + "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway", + "name": "Well-Architected Framework Application Gateway Overview" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Implement Azure Site Recovery (ASR) to replicate or backup stateful session hosts. This replicates VMs to a secondary Azure region or availability zone, ensuring recovery from a known VM state in case of an outage.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Using custom health probes enhances understanding of backend availability and facilitates monitoring of backend services for any impact.\n", "pgVerified": true, - "description": "Use Azure Site Recovery to protect stateful session hosts", - "potentialBenefits": "Ensures VM recovery & failover", - "publishedToLearn": false, + "description": "Use Health Probes to detect backend availability", + "potentialBenefits": "Ensures backend uptime monitoring.", "tags": null, - "recommendationResourceType": "Microsoft.Compute/virtualMachines", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Application Gateways are not using health probes to monitor the availability of the backend systems\nresources\n| where type =~ \"microsoft.network/applicationGateways\"\n| where array_length(properties.probes) == 0\n| project recommendationId=\"847a8d88-21c4-bc48-a94e-562206edd767\", name, id, tags, param1=\"customHealthProbeUsed: false\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "499769ae-67c9-492e-9ca5-cfd4cece5209", - "recommendationTypeId": null, + "aprlGuid": "c9c00f2a-3888-714b-a72b-b4c9e8fcffb2", + "recommendationTypeId": "5c488377-be3e-4365-92e8-09d1e8d9038c", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/autoscale-scaling-plan?tabs=portal", - "name": "Create and assign an autoscale scaling plan" + "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway#reliability", + "name": "Well-Architected Framework Application Gateway Reliability" + }, + { + "url": "https://learn.microsoft.com/azure/application-gateway/overview-v2", + "name": "Application Gateway V2 Overview" } ], - "recommendationControl": "Scalability", - "longDescription": "Scaling plans can only be assigned to host pools in the same region, on multi-region deployment scenario each region should has its own scaling plan.\n", + "recommendationControl": "High Availability", + "longDescription": "Deploying Application Gateway in a zone-aware configuration ensures continued customer access to services even if a specific zone goes down, as services in other zones remain available.\n", "pgVerified": true, - "description": "Create scaling plans per region", - "potentialBenefits": "Enhanced scaling", - "publishedToLearn": false, + "description": "Deploy Application Gateway in a zone-redundant configuration", + "potentialBenefits": "Enhanced uptime and customer access", "tags": null, - "recommendationResourceType": "Microsoft.DesktopVirtualization/scalingPlans", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n" + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// list Application Gateways that are not configured to use at least 2 Availability Zones\nresources\n| where type =~ \"microsoft.network/applicationGateways\"\n| where isnull(zones) or array_length(zones) < 2\n| extend zoneValue = iff((isnull(zones)), \"null\", zones)\n| project recommendationId = \"c9c00f2a-3888-714b-a72b-b4c9e8fcffb2\", name, id, tags, param1=\"Zones: No Zone or Zonal\", param2=strcat(\"Zones value: \", zoneValue )\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "783c6c18-760b-4867-9ced-3010a0bc5aa3", + "aprlGuid": "10f02bc6-e2e7-004d-a2c2-f9bf9f16b915", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-bulk-identity-mgmt", - "name": "Import and export IoT Hub device identities in bulk" + "url": "https://learn.microsoft.com/azure/application-gateway/features#connection-draining", + "name": "Application Gateway Connection Draining" }, { - "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-ha-dr#manual-failover", - "name": "IoT Hub high availability and disaster recovery" + "url": "https://learn.microsoft.com/azure/application-gateway/configuration-http-settings#connection-draining", + "name": "Application Gateway Connection Draining HTTP Settings" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Device Identities should be copied to the failover region IoT Hub for all IoT devices to ensure connectivity in case of a failover. Manual Failover to another region is quicker (RTO), suitable for mission critical workloads.\n", - "pgVerified": false, - "description": "Device Identities are exported to a secondary region", - "potentialBenefits": "Faster failover; Ensures device connectivity", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Using connection draining for backend maintenance ensures graceful removal of backend pool members during updates or health issues. It's enabled via Backend Setting and applies to all members during rule creation.\n", + "pgVerified": true, + "description": "Plan for backend maintenance by using connection draining", + "potentialBenefits": "Smooth updates, no dropped users", "tags": null, - "recommendationResourceType": "Microsoft.Devices/IotHubs", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This query will check if connection draining is enabled\nresources\n| where type =~ \"microsoft.network/applicationGateways\"\n| mv-expand backendHttpSettings = properties.backendHttpSettingsCollection\n| extend connectionDrainingEnabled = backendHttpSettings.properties.connectionDraining.enabled\n| where connectionDrainingEnabled != true\n| extend backendPoolName = backendHttpSettings.name\n| project recommendationId = \"10f02bc6-e2e7-004d-a2c2-f9bf9f16b915\", name, id, tags, param1 = \"connectionDraining: Disabled\", param2 = strcat(\"backendSettingsName: \", backendPoolName)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "eeba3a49-fef0-481f-a471-7ff01139b474", - "recommendationTypeId": null, + "aprlGuid": "8364fd0a-7c0e-e240-9d95-4bf965aec243", + "recommendationTypeId": "ef4da732-f541-4109-bc0e-465c68b6c7eb", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-scaling", - "name": "Choose the right IoT Hub tier and size for your solution" + "url": "https://learn.microsoft.com/en-us/azure/application-gateway/configuration-infrastructure#size-of-the-subnet", + "name": "Azure Application Gateway infrastructure configuration | Microsoft Learn" } ], - "recommendationControl": "High Availability", - "longDescription": "In a production scenario, the IoT Hub tier should not be Free because the Free tier does not provide the necessary Service Level Agreement.\n", - "pgVerified": false, - "description": "Do not use free tier", - "potentialBenefits": "Ensures SLA for production", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "Application Gateway v2 (Standard_v2 or WAF_v2 SKU) can support up to 125 instances. A /24 subnet isn't mandatory for deployment but is advised to provide enough space for autoscaling and maintenance upgrades.\n", + "pgVerified": true, + "description": "Ensure Application Gateway Subnet is using a /24 subnet mask", + "potentialBenefits": "Allows autoscaling and maintenance", "tags": null, - "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationResourceType": "Microsoft.Network/applicationGateways", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// list all IoT Hubs that are using the Free tier\r\nresources\r\n| where type =~ \"microsoft.devices/iothubs\" and\r\n tostring(sku.tier) =~ 'Free'\r\n| project recommendationId=\"eeba3a49-fef0-481f-a471-7ff01139b474\", name, id, tags, param1=strcat(\"tier:\", tostring(sku.tier))\r\n\r\n" + "query": "// Azure Resource Graph Query\n// This query will validate the subnet id for an appGW ends with a /24\n\nresources\n| where type =~ 'Microsoft.Network/applicationGateways'\n| extend subnetid = tostring(properties.gatewayIPConfigurations[0].properties.subnet.id)\n| join kind=leftouter(resources\n | where type == \"microsoft.network/virtualnetworks\"\n | mv-expand properties.subnets\n | extend subnetid = tostring(properties_subnets.id)\n | extend addressprefix = tostring(properties_subnets.properties.addressPrefix)\n | project subnetid, addressprefix) on subnetid\n| where addressprefix !endswith '/24'\n| project recommendationId = \"8364fd0a-7c0e-e240-9d95-4bf965aec243\", name, id, tags, param1 = strcat('AppGW subnet prefix: ', addressprefix)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "214cbc46-747e-4354-af6e-6bf0054196a5", - "recommendationTypeId": null, + "aprlGuid": "d37db635-157f-584d-9bce-4f6fc8c65ce5", + "recommendationTypeId": "8d61a7d4-5405-4f43-81e3-8c6239b844a6", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-ha-dr#availability-zones", - "name": "Azure IoT Hub high availability and disaster recovery" + "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering", + "name": "Designing for disaster recovery with ExpressRoute private peering" } ], "recommendationControl": "High Availability", - "longDescription": "In regions supporting Availability Zones for IoT Hub, using these zones boosts availability. They're automatically activated for new IoT Hubs in supported areas.\n", - "pgVerified": false, - "description": "Use Availability Zones", - "potentialBenefits": "Boosts IoT Hub availability", - "publishedToLearn": false, + "longDescription": "To increase reliability, it's advised that each ExpressRoute gateway connects to at least two circuits, with each circuit originating from a different peering location than the other, ensuring diverse connectivity paths for enhanced resilience.\n", + "pgVerified": true, + "description": "Connect ExpressRoute gateway with circuits from diverse peering locations for resilience", + "potentialBenefits": "Enhanced resiliency for Azure service", "tags": null, - "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of ExpressRoute Gateways that are not connected to two or more ExpressRoute Circuits. Baremetal circuits are excluded from consideration\n//This query assumes that the running entity has visibilty to the gateway, connection, and circuit scopes.\n//Start with a full list of gateways\n(resources\n| where type == \"microsoft.network/virtualnetworkgateways\"\n| where properties.gatewayType == \"ExpressRoute\"\n| extend exrGatewayId = tolower(tostring(id))\n| join kind=inner(\nresources\n| where type == \"microsoft.network/virtualnetworkgateways\"\n| where properties.gatewayType == \"ExpressRoute\"\n| extend exrGatewayId = tolower(tostring(id))\n| join kind=leftouter(\n//connections joined with circuit peer info\nresources\n| where type == \"microsoft.network/connections\"\n| extend connectionType = properties.connectionType\n| extend exrGatewayId = tolower(tostring(properties.virtualNetworkGateway1.id))\n| extend peerId = tolower(tostring(properties.peer.id))\n| extend connectionId = tolower(tostring(id))\n| where connectionType == \"ExpressRoute\"\n| join kind=leftouter(\n resources\n | where type == \"microsoft.network/expressroutecircuits\"\n //should this be location instead of peeringLocation\n | extend circuitId = tolower(tostring(id))\n | extend peeringLocation = tostring(properties.serviceProviderProperties.peeringLocation)\n | extend peerId = tolower(id)\n) on peerId ) on exrGatewayId\n//remove bare metal services connections/circuits\n| where not(isnotnull(connectionId) and isnull(sku1))\n//group by gateway ID's and peering locations\n| summarize by exrGatewayId, peeringLocation\n//summarize to connections with fewer than two unique connections\n| summarize connCount = count() by exrGatewayId\n| where connCount < 2) on exrGatewayId\n| project recommendationId = \"d37db635-157f-584d-9bce-4f6fc8c65ce5\", name, id, tags, param1 = \"twoOrMoreCircuitsConnectedFromDifferentPeeringLocations: false\")\n| union\n(\nresources\n| where type == \"microsoft.network/virtualnetworkgateways\"\n| where properties.gatewayType == \"ExpressRoute\"\n| extend exrGatewayId = tolower(tostring(id))\n| join kind=leftouter(\n//connections joined with circuit peer info\nresources\n| where type == \"microsoft.network/connections\"\n| extend connectionType = properties.connectionType\n| extend exrGatewayId = tolower(tostring(properties.virtualNetworkGateway1.id))\n| extend peerId = tolower(tostring(properties.peer.id))\n| extend connectionId = tolower(tostring(id))\n| where connectionType == \"ExpressRoute\") on exrGatewayId\n| where isnull(connectionType)\n| project recommendationId = \"d37db635-157f-584d-9bce-4f6fc8c65ce5\", name, id, tags, param1 = \"twoOrMoreCircuitsConnectedFromDifferentPeeringLocations: false\", param2 = \"noConnectionsOnGateway: true\"\n)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b1e1378d-4572-4414-bebd-b8872a6d4d1c", - "recommendationTypeId": null, + "aprlGuid": "bbe668b7-eb5c-c746-8b82-70afdedf0cae", + "recommendationTypeId": "c9af1ef6-55bc-48af-bfe4-2c80490159f8", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/iot-dps/concepts-service", - "name": "IoT Hub Device Provisioning Service (DPS) terminology" + "url": "https://learn.microsoft.com/azure/expressroute/expressroute-about-virtual-network-gateways#zrgw", + "name": "About ExpressRoute virtual network gateways - Zone-redundant gateway SKUs" }, { - "url": "https://learn.microsoft.com/en-us/azure/iot-dps/concepts-deploy-at-scale", - "name": "Best practices for large-scale IoT device deployments" + "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways", + "name": "About zone-redundant virtual network gateway in Azure availability zones" }, { - "url": "https://learn.microsoft.com/en-us/azure/iot-dps/iot-dps-ha-dr", - "name": "IoT Hub Device Provisioning Service high availability and disaster recovery" + "url": "https://learn.microsoft.com/azure/vpn-gateway/create-zone-redundant-vnet-gateway", + "name": "Create a zone-redundant virtual network gateway in Azure Availability Zones" } ], - "recommendationControl": "Scalability", - "longDescription": "Device Provisioning Service (DPS) enables easy redistribution of IoT devices for scaling and availability, allowing devices to be reassigned and not bound to specific IoT Hub instances. Devices in IoT Hubs using DPS should be verified for DPS utilization.\n", - "pgVerified": false, - "description": "Use Device Provisioning Service", - "potentialBenefits": "Enhances scalability and availability", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure ExpressRoute gateway offers variable SLAs based on deployment in single or multiple availability zones. To deploy virtual network gateways across zones automatically, use zone-redundant gateways for accessing critical, scalable services with increased resilience.\n", + "pgVerified": true, + "description": "Use Zone-redundant ExpressRoute gateway SKUs", + "potentialBenefits": "Enhanced SLA and resilience", "tags": null, - "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// list all IoT Hubs that do not have a linked IoT Hub Device Provisioning Service (DPS)\r\nresources\r\n| where type =~ \"microsoft.devices/iothubs\"\r\n| project id, iotHubName=tostring(properties.hostName), tags, resourceGroup\r\n| join kind=fullouter (\r\n resources\r\n | where type == \"microsoft.devices/provisioningservices\"\r\n | mv-expand iotHubs=properties.iotHubs\r\n | project iotHubName = tostring(iotHubs.name), dpsName = name, name=iotHubs.name\r\n) on iotHubName\r\n| where dpsName == ''\r\n| project recommendationId=\"b1e1378d-4572-4414-bebd-b8872a6d4d1c\", name=iotHubName, id, tags, param1='DPS:none'\r\n\r\n" + "query": "// Azure Resource Graph Query\n// For all VNGs of type ExpressRoute, show any that do not have AZ in the SKU tier\nresources\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\n| where properties.gatewayType == \"ExpressRoute\"\n| where properties.sku.tier !contains 'AZ'\n| project recommendationId = \"bbe668b7-eb5c-c746-8b82-70afdedf0cae\", name, id, tags, param1= strcat(\"sku-tier: \" , properties.sku.tier), param2=location\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "02568a5d-335e-4e51-9f7c-fe2ada977300", + "aprlGuid": "c0f23a92-d322-4d4d-97e9-a238b5e3bbb8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-ha-dr", - "name": "IoT Hub high availability and disaster recovery" + "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources?tabs=json", + "name": "Protect your Azure resources with a lock - Azure Resource Manager | Microsoft Learn" } ], "recommendationControl": "High Availability", - "longDescription": "In case of a regional failure, an IoT Hub can failover to a second region, automatically or manually, to ensure your application continues working.\n", - "pgVerified": false, - "description": "Define Failover Guidelines", - "potentialBenefits": "Ensures business continuity", - "publishedToLearn": false, + "longDescription": "Configuring an Azure Resource lock for ExpressRoute gateway prevents accidental deletion by enabling administrators to lock an Azure subscription, resource group, or resource, thereby protecting them from unintended user deletions and modifications, with the lock overriding all user permissions.\n", + "pgVerified": true, + "description": "Configure an Azure Resource lock for ExpressRoute gateway to prevent accidental deletion", + "potentialBenefits": "Prevents accidental deletions", "tags": null, - "recommendationResourceType": "Microsoft.Devices/IotHubs", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e7dbd21f-b27a-4b8c-a901-cedb1e6d8e1e", + "aprlGuid": "1c34faa8-8b99-974c-adbf-71922eae943c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-devguide-messages-d2c#fallback-route", - "name": "Use message routing - Fallback route" - } - ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Using message routing for custom endpoints in IoT Hub, messages might not reach these destinations if specific conditions are unmet. A default route ensures all messages are received, but disabling this safety net risks leaving some messages undelivered.\n", - "pgVerified": false, - "description": "Disabled Fallback Route", - "potentialBenefits": "Prevents undelivered messages", - "publishedToLearn": false, - "tags": null, - "recommendationResourceType": "Microsoft.Devices/IotHubs", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// list all IoT Hubs that have the fallback route disabled\r\nresources\r\n| where type == \"microsoft.devices/iothubs\"\r\n| extend fallbackEnabled=properties.routing.fallbackRoute.isEnabled\r\n| where fallbackEnabled == false\r\n| project recommendationId=\"e7dbd21f-b27a-4b8c-a901-cedb1e6d8e1e\", name, id, tags, param1='FallbackRouteEnabled:false'\r\n\r\n" - }, - { - "publishedToAdvisor": null, - "aprlGuid": "43663217-a1d3-844b-80ea-571a2ce37c6c", - "recommendationTypeId": "b57f7a29-dcc8-43de-86fa-18d3f9d3764d", - "recommendationMetadataState": "Active", - "learnMoreLink": [ - { - "url": "https://learn.microsoft.com/azure/cosmos-db/distribute-data-globally", - "name": "Distribute data globally with Azure Cosmos DB" + "url": "https://learn.microsoft.com/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways", + "name": "ExpressRoute monitoring, metrics, and alerts | ExpressRoute gateways" }, { - "url": "https://learn.microsoft.com/azure/cosmos-db/high-availability#tips-for-building-highly-available-applications", - "name": "Tips for building highly available applications" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/expressroute-network-insights", + "name": "Azure ExpressRoute Insights using Network Insights" } ], - "recommendationControl": "High Availability", - "longDescription": "Enable a secondary region in Cosmos DB for higher SLA without downtime. Simple as pinning a location on a map. For Strong consistency, configure at least three regions for write availability in case of failure.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Use Network Insights for monitoring ExpressRoute Gateway's health, including availability, performance, and scalability.\n", "pgVerified": true, - "description": "Configure at least two regions for high availability", - "potentialBenefits": "Enhances SLA and resilience", - "publishedToLearn": false, + "description": "Monitor gateway health for ExpressRoute gateways", + "potentialBenefits": "Enhanced monitoring and alerting", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to find Azure Cosmos DB accounts that have less than 2 regions or less than 3 regions with strong consistency level\r\nResources\r\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\r\n| where\r\n array_length(properties.locations) < 2 or\r\n (array_length(properties.locations) < 3 and properties.consistencyPolicy.defaultConsistencyLevel == 'Strong')\r\n| project recommendationId='43663217-a1d3-844b-80ea-571a2ce37c6c', name, id, tags\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9cabded7-a1fc-6e4a-944b-d7dd98ea31a2", - "recommendationTypeId": "5de9f2e6-087e-40da-863a-34b7943beed4", + "aprlGuid": "194c14ac-0d7a-5a48-ae32-75fa450ee564", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/how-to-manage-database-account#automatic-failover", - "name": "Manage an Azure Cosmos DB account by using the Azure portal" + "url": "https://learn.microsoft.com/azure/expressroute/expressroute-about-virtual-network-gateways#vnet-to-vnet-connectivity", + "name": "About ExpressRoute virtual network gateways - VNet-to-VNet connectivity" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Cosmos DB boasts high uptime and resiliency. Even so, issues may arise. With Service-Managed failover, if a region is down, Cosmos DB automatically switches to the next available region, requiring no user action.\n", + "recommendationControl": "High Availability", + "longDescription": "While multiple VNets can connect via the same ExpressRoute gateway, Microsoft recommends using alternatives like VNet peering, Azure Firewall, NVA, Azure Route Server, site-to-site VPN, virtual WAN, or SD-WAN for VNet-to-VNet communication to optimize network performance and management.\n", "pgVerified": true, - "description": "Enable service-managed failover for multi-region accounts with single write region", - "potentialBenefits": "Auto failover for high uptime", - "publishedToLearn": false, + "description": "Avoid using ExpressRoute circuits for VNet to VNet communication", + "potentialBenefits": "Enhanced VNet integration efficiency", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to list all Azure Cosmos DB accounts that do not have multiple write locations or automatic failover enabled\r\nResources\r\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\r\n| where\r\n array_length(properties.locations) > 1 and\r\n tobool(properties.enableAutomaticFailover) == false and\r\n tobool(properties.enableMultipleWriteLocations) == false\r\n| project recommendationId='9cabded7-a1fc-6e4a-944b-d7dd98ea31a2', name, id, tags\r\n" + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "921631f6-ed59-49a5-94c1-f0f3ececa580", + "aprlGuid": "3e115044-a3aa-433e-be01-ce17d67e50da", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/reliability/reliability-cosmos-db-nosql", - "name": "High availability in Azure Cosmos DB" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/customer-controlled-gateway-maintenance#azure-portal-steps", + "name": "Configure customer-controlled maintenance for your virtual network gateway - ExpressRoute | Microsoft Learn" } ], "recommendationControl": "High Availability", - "longDescription": "When availability zones are configured, Azure Cosmos DB intelligently distributes the 4 replicas of your data across all available zones. It ensures that your Azure Cosmos DB can withstand an outage in one availability zone and remain fully operational throughout.\n", - "pgVerified": false, - "description": "Enable availability zones", - "potentialBenefits": "Enhances high availability", - "publishedToLearn": false, + "longDescription": "ExpressRoute gateways are updated for improved functionality, reliability, performance, and security. Customer-controlled maintenance configuration and scheduling minimize update impact and align with your maintenance windows.\n", + "pgVerified": true, + "description": "Configure customer-controlled ExpressRoute gateway maintenance", + "potentialBenefits": "Minimizes update impact", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to find Azure Cosmos DB accounts that do not utilize availability zones and are deployed in availability-zone supported regions\r\nResources\r\n| where type == \"microsoft.documentdb/databaseaccounts\"\r\n| where properties.capabilities !has_cs 'EnableServerless'\r\n| project recommendationId='921631f6-ed59-49a5-94c1-f0f3ececa580', name, id, tags, locations=properties.locations\r\n| mv-expand locations\r\n| where not(locations.isZoneRedundant) //filter out already AZ enabled regions\r\n| extend location=tostring(locations.locationName)\r\n| project-away locations\r\n| where location in (\r\n 'Australia East', 'Brazil South', 'Canada Central', 'Central India', 'Central US',\r\n 'China North 3', 'East Asia', 'East US', 'East US 2', 'France Central',\r\n 'Germany West Central', 'Israel Central', 'Italy North', 'Japan East', 'Japan West',\r\n 'Korea Central', 'Mexico Central', 'New Zealand North', 'North Europe', 'Norway East',\r\n 'Poland Central', 'Qatar Central', 'South Africa North', 'South Central US', 'Southeast Asia',\r\n 'Spain Central', 'Sweden Central', 'Switzerland North', 'UAE North', 'UK South',\r\n 'US Gov Virginia', 'West Europe', 'West US 2', 'West US 3') // filter out regions unsupported for AZs\r\n| project-rename param1=location\r\n" + "query": "// Azure Resource Graph Query\n// Find all Virtual Network Gateways without Maintenance Configurations\n\nresources\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\n| extend resourceId = tolower(id)\n| join kind=leftouter (\n maintenanceresources\n | where type =~ \"Microsoft.Maintenance/configurationAssignments\"\n | project JsonData = parse_json(properties)\n | extend maintenanceConfigurationId = tolower(tostring(JsonData.maintenanceConfigurationId))\n | join kind=inner (\n resources\n | where type =~ \"Microsoft.Maintenance/maintenanceConfigurations\"\n | project maintenanceConfigurationId=tolower(id)\n ) on maintenanceConfigurationId\n | project maintenanceConfigurationId, resourceId=tolower(tostring(JsonData.resourceId))\n) on resourceId\n| where isempty(maintenanceConfigurationId)\n| project recommendationId = \"3e115044-a3aa-433e-be01-ce17d67e50da\", name, id, tags, param1= strcat(\"sku-tier: \" , properties.sku.tier), param2=location\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9ce78192-74a0-104c-b5bb-9a443f941649", + "aprlGuid": "5b1933a6-90e4-f642-a01f-e58594e5aab2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/distribute-data-globally", - "name": "Distribute data globally with Azure Cosmos DB" + "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways", + "name": "Zone redundant Virtual network gateway in availability zone" }, { - "url": "https://learn.microsoft.com/azure/cosmos-db/conflict-resolution-policies", - "name": "Conflict resolution types and resolution policies in Azure Cosmos DB" + "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways#gwskus", + "name": "Gateway SKU" + }, + { + "url": "https://www.microsoft.com/licensing/docs/view/Service-Level-Agreements-SLA-for-Online-Services?lang=1", + "name": "SLA summary for Azure services" } ], "recommendationControl": "High Availability", - "longDescription": "Multi-region write capability allows for designing applications that are highly available across multiple regions, though it demands careful attention to consistency requirements and conflict resolution. Improper setup may decrease availability and cause data corruption due to unhandled conflicts.\n", + "longDescription": "Azure VPN gateway offers variable SLAs based on deployment in one or two availability zones. Deploying zone-redundant virtual network gateways across availability zones ensures zone-resiliency, improving access to mission-critical, scalable services on Azure.\n", "pgVerified": true, - "description": "Evaluate multi-region write capability", - "potentialBenefits": "Enhances high availability", - "publishedToLearn": false, + "description": "Choose a Zone-redundant VPN gateway", + "potentialBenefits": "Enhanced reliability and scalability", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query to find Azure Cosmos DB accounts that have multiple read locations but do not have multiple write locations enabled\r\nResources\r\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\r\n| where\r\n array_length(properties.locations) > 1 and\r\n properties.enableMultipleWriteLocations == false\r\n| project recommendationId='9ce78192-74a0-104c-b5bb-9a443f941649', name, id, tags\r\n\r\n" + "query": "// Azure Resource Graph Query\n// For all VNGs of type Vpn, show any that do not have AZ in the SKU tier\nresources\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\n| where properties.gatewayType == \"Vpn\"\n| where properties.sku.tier !contains 'AZ'\n| project recommendationId = \"5b1933a6-90e4-f642-a01f-e58594e5aab2\", name, id, tags, param1= strcat(\"sku-tier: \" , properties.sku.tier), param2=location\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e544520b-8505-7841-9e77-1f1974ee86ec", - "recommendationTypeId": "52fef986-5897-4359-8b92-0f22749f0d73", + "aprlGuid": "281a2713-c0e0-3c48-b596-19f590c46671", + "recommendationTypeId": "c249dc0e-9a17-423e-838a-d72719e8c5dd", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/continuous-backup-restore-introduction", - "name": "Continuous backup with point in time restore feature in Azure Cosmos DB" + "url": "https://learn.microsoft.com/azure/vpn-gateway/active-active-portal#gateway", + "name": "Active-active VPN gateway" + }, + { + "url": "https://learn.microsoft.com/azure/vpn-gateway/vpn-gateway-about-vpn-gateway-settings#gwsku", + "name": "Gateway SKU" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Cosmos DB's backup is always on, offering protection against data mishaps. Continuous mode allows for self-serve restoration to a pre-mishap point, unlike periodic mode which requires contacting Microsoft support, leading to longer restore times.\n", + "recommendationControl": "High Availability", + "longDescription": "The active-active mode is available for all SKUs except Basic, allowing for two Gateway IP configurations and two public IP addresses, enhancing redundancy and traffic handling.\n", "pgVerified": true, - "description": "Configure continuous backup mode", - "potentialBenefits": "Faster self-serve data restore", - "publishedToLearn": false, + "description": "Enable Active-Active VPN Gateways for redundancy", + "potentialBenefits": "Enhanced reliability and network capacity", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Query all Azure Cosmos DB accounts that do not have continuous backup mode configured\r\nResources\r\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\r\n| where\r\n properties.backupPolicy.type == 'Periodic' and\r\n properties.enableMultipleWriteLocations == false and\r\n properties.enableAnalyticalStorage == false\r\n| project recommendationId='e544520b-8505-7841-9e77-1f1974ee86ec', name, id, tags\r\n" + "query": "// Azure Resource Graph Query\n// Identifies non-active-active VPN type virtual network gateways\nresources\n| where type =~ 'Microsoft.Network/virtualNetworkGateways'\n| where properties.gatewayType =~ \"vpn\"\n| extend gatewayType = properties.gatewayType, vpnType = properties.vpnType, connections = properties.connections, activeactive=properties.activeActive\n| where activeactive == false\n| project recommendationId = \"281a2713-c0e0-3c48-b596-19f590c46671\", name, id, tags\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c006604a-0d29-684c-99f0-9729cb40dac5", + "aprlGuid": "af11fc4c-c06c-4f4c-b98d-6eee6d5c4c70", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/nosql/query/pagination#handling-multiple-pages-of-results", - "name": "Pagination in Azure Cosmos DB" + "url": "https://learn.microsoft.com/azure/vpn-gateway/vpn-gateway-highlyavailable#dual-redundancy-active-active-vpn-gateways-for-both-azure-and-on-premises-networks", + "name": "Dual-redundancy active-active VPN gateways for both Azure and on-premises networks" } ], - "recommendationControl": "Scalability", - "longDescription": "Cosmos DB has a 4 MB response limit, leading to paginated results for large or partition-spanning queries. Each page shows availability and provides a continuation token for the next. A while loop in code is necessary to traverse all pages until completion.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Deploying active-active VPN concentrators and Azure VPN Gateways maximizes resilience and availability using a fully-meshed topology with four IPSec tunnels.\n", "pgVerified": true, - "description": "Ensure query results are fully drained", - "potentialBenefits": "Maximizes data retrieval efficiency", - "publishedToLearn": false, + "description": "Deploy active-active VPN concentrators on your premises for maximum resiliency with VPN gateways", + "potentialBenefits": "Maximizes resilience and availability", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7eb32cf9-9a42-1540-acf8-597cbba8a418", + "aprlGuid": "9eab120e-f6d3-ee49-ba0d-766562ce7df1", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/nosql/conceptual-resilient-sdk-applications", - "name": "Designing resilient applications with Azure Cosmos DB SDKs" + "url": "https://learn.microsoft.com/azure/vpn-gateway/monitor-vpn-gateway-reference", + "name": "VPN gateway data reference" } ], - "recommendationControl": "Scalability", - "longDescription": "Using a single instance of the SDK client for each account and application is crucial as connections are tied to the client. Compute environments have a limit on open connections, affecting connectivity when exceeded.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Set up monitoring and alerts for Virtual Network Gateway health to utilize a variety of metrics for ensuring operational efficiency and prompt response to any disruptions.\n", "pgVerified": true, - "description": "Maintain singleton pattern in your client", - "potentialBenefits": "Optimizes connections and efficiency", - "publishedToLearn": false, + "description": "Monitor VPN gateway connections and health", + "potentialBenefits": "Improved uptime and issue awareness", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fa6ac22f-0584-bb4b-80e4-80f4755d1a97", + "aprlGuid": "9186dae0-7ddc-8f4b-bea5-55538cea4893", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/nosql/conceptual-resilient-sdk-applications", - "name": "Designing resilient applications with Azure Cosmos DB SDKs" + "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/metrics-getting-started", + "name": "Getting started with Azure Metrics Explorer" + }, + { + "url": "https://learn.microsoft.com/azure/vpn-gateway/monitor-vpn-gateway-reference#metrics", + "name": "Monitor VPN gateway" } ], - "recommendationControl": "High Availability", - "longDescription": "Cosmos DB SDKs automatically manage many transient errors through retries. Despite this, it's crucial for applications to implement additional retry policies targeting specific cases that the SDKs can't generically address, ensuring more robust error handling.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "VPN gateway leverages service health to inform users about both planned and unplanned maintenance, ensuring they are notified about modifications to their VPN connectivity.\n", "pgVerified": true, - "description": "Implement retry logic in your client", - "potentialBenefits": "Enhances error handling resilience", - "publishedToLearn": false, + "description": "Enable VPN gateway service health", + "potentialBenefits": "Improves VPN maintenance alerts", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "deaea200-013c-414b-ac9f-bfa7a7fb13f0", + "aprlGuid": "4bae5a28-5cf4-40d9-bcf1-623d28f6d917", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cosmos-db/create-alerts", - "name": "Create alerts for Azure Cosmos DB using Azure Monitor" + "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways", + "name": "About zone-redundant virtual network gateway in Azure availability zones" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring the availability and responsiveness of Azure Cosmos DB resources and having alerts set up for your workload is a good practice. This ensures you stay proactive in handling unforeseen events.\n", + "recommendationControl": "High Availability", + "longDescription": "For zone-redundant VPN gateways, always use zone-redundant Standard SKU public IPs to avoid deploying all instances in one zone. This ensures the gateway's reliability, applying to both active-passive (single IP) and active-active (dual IP) setups.\n", "pgVerified": true, - "description": "Monitor Cosmos DB health and set up alerts", - "potentialBenefits": "Proactive issue management", - "publishedToLearn": false, + "description": "Deploy zone-redundant VPN gateways with zone-redundant Public IP(s)", + "potentialBenefits": "Enhanced reliability and disaster recovery", "tags": null, - "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n" + "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of zone-redundant Azure VPN gateways associated with non-zone-redundant Public IPs\nresources\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\n| where properties.gatewayType == \"Vpn\"\n| where properties.sku.tier contains 'AZ'\n| mv-expand ipconfig = properties.ipConfigurations\n| extend pipId = tostring(ipconfig.properties.publicIPAddress.id)\n| join kind=inner (\n resources\n | where type == \"microsoft.network/publicipaddresses\"\n | where isnull(zones) or array_length(zones) < 3 )\n on $left.pipId == $right.id\n| project recommendationId = \"4bae5a28-5cf4-40d9-bcf1-623d28f6d917\", name, id, tags, param1 = strcat(\"PublicIpAddressName: \", name1), param2 = strcat (\"PublicIpAddressId: \",id1), param3 = strcat (\"PublicIpAddressTags: \",tags1)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "54c3191b-b535-1946-bba9-b754f44060f6", + "aprlGuid": "fd43ea32-2ccf-49a8-ada4-9a78794e3ff1", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/event-grid/enable-diagnostic-logs-topic", - "name": "Azure Event Grid - Enable diagnostic logs for Event Grid resources" + "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#point-to-site-vpn-gateway", + "name": "Virtual WAN Monitoring Best Practices" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Enabling diagnostic settings on Azure Event Grid resources like custom topics, system topics, and domains lets you capture and view diagnostic information to troubleshoot failures effectively.\n", + "longDescription": "Set up monitoring and alerts for Point-to-Site VPN gateways. Create alert rule for ensuring promptly response to critical events such as Gateway overutilization, connection count limits and User VPN route limits.", "pgVerified": false, - "description": "Configure Diagnostic Settings for all Azure Event Grid resources", - "potentialBenefits": "Enhanced troubleshooting for Event Grid", - "publishedToLearn": false, + "description": "Monitor health for v-Hub's Point-to-Site VPN gateways", + "potentialBenefits": "Detection and mitigation to avoid disruptions.", "tags": null, - "recommendationResourceType": "Microsoft.EventGrid/topics", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Network/p2sVpnGateways", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "92162eb5-4323-3145-8a6c-525ce2f0700e", + "aprlGuid": "7d09523b-b3c0-403e-b104-d5d46240d683", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/event-grid/delivery-and-retry#dead-letter-events", - "name": "Azure Event Grid delivery and retry" + "url": "https://learn.microsoft.com/azure/reliability/reliability-dns", + "name": "Reliability in Azure DNS" } ], - "recommendationControl": "Personalized", - "longDescription": "Event Grid may not deliver an event within a specific time or after several attempts, leading to dead-lettering where undelivered events are sent to a storage account.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Azure DNS allows the Time-To-Live (TTL) for record sets in the zone to be set to a value between 1 and 2147483647 seconds. You should ensure that the TTL for the DNS record sets in your DNS Zones are set appropriately to meet your RPO targets.\n", "pgVerified": false, - "description": "Configure Dead-letter to save events that cannot be delivered", - "potentialBenefits": "Saves undelivered events", - "publishedToLearn": false, + "description": "Ensure Time-To-Live (TTL) is set appropriately to ensure RPOs can be met", + "potentialBenefits": "Ensures that no cached DNS records exist past RPO targets", "tags": null, - "recommendationResourceType": "Microsoft.EventGrid/topics", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Network/dnsZones", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b2069f64-4741-3d4a-a71d-50c8b03f5ab7", - "recommendationTypeId": "bdac9c7b-b9b8-f572-0450-f161c430861c", + "aprlGuid": "60077378-7cb1-4b35-89bb-393884d9921d", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/event-grid/configure-private-endpoints", - "name": "Configure private endpoints for Azure Event Grid topics or domains" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/expressroute-howto-erdirect#state", + "name": "How to configure ExpressRoute Direct Change Admin State of links" } ], - "recommendationControl": "Security", - "longDescription": "Use private endpoints for secure event ingress to custom topics/domains via a private link, avoiding the public internet. It employs an IP from the VNet space for your topic/domain.\n", - "pgVerified": false, - "description": "Azure Event Grid topics should use Private Link Private Endpoints", - "potentialBenefits": "Secure, private VNet ingress", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "In Azure ExpressRoute Direct, the \"Admin State\" indicates the administrative status of layer 1 links, showing if a link is enabled or disabled, effectively turning the physical port on or off.\n", + "pgVerified": true, + "description": "The Admin State of both Links of an ExpressRoute Direct should be in Enabled state", + "potentialBenefits": "Ensures optimal connectivity.", "tags": null, - "recommendationResourceType": "Microsoft.EventGrid/topics", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/ExpressRoutePorts", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all eventgrid services not protected by private endpoints.\r\nResources\r\n| where type contains \"eventgrid\"\r\n| where properties['publicNetworkAccess'] == \"Enabled\"\r\n| project recommendationId = \"b2069f64-4741-3d4a-a71d-50c8b03f5ab7\", name, id, tags\r\n| order by id asc\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all Express Route Directs that do not have Admin State of both Links Enabled\nresources\n| where type == \"microsoft.network/expressrouteports\"\n| where properties['links'][0]['properties']['adminState'] == \"Disabled\" or properties['links'][1]['properties']['adminState'] == \"Disabled\"\n| project recommendationId = \"60077378-7cb1-4b35-89bb-393884d9921d\", name, id, tags, param1 = strcat(\"Link1AdminState: \", properties['links'][0]['properties']['adminState']), param2 = strcat(\"Link2AdminState: \", properties['links'][1]['properties']['adminState'])\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "84636c6c-b317-4722-b603-7b1ffc16384b", + "aprlGuid": "0bee356b-7348-4799-8cab-0c71ffe13018", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/event-hubs/event-hubs-geo-dr?tabs=portal#availability-zones", - "name": "Azure Event Hubs - Geo-disaster recovery" + "url": "https://learn.microsoft.com/en-us/azure/expressroute/expressroute-erdirect-about?source=recommendations#circuit-sizes", + "name": "About ExpressRoute Direct Circuit Sizes" } ], - "recommendationControl": "High Availability", - "longDescription": "When using the Azure portal, zone redundancy is automatically enabled. However, some Infrastructure as Code (IaC) tools may default this to false. To ensure replication of metadata and events across data centers in an availability zone, always verify that zone redundancy is enabled.\n", + "recommendationControl": "Scalability", + "longDescription": "Provisioning ExpressRoute circuits on a 10-Gbps or 100-Gbps ExpressRoute Direct resource up to 20-Gbps or 200-Gbps is possible but not recommended for resiliency. If an ExpressRoute Direct port fails, and circuits are using full capacity, the remaining port won't handle the extra load.\n", "pgVerified": true, - "description": "Ensure zone redundancy is enabled in supported regions", - "potentialBenefits": "Enhanced fault tolerance for Event Hub", - "publishedToLearn": false, + "description": "Ensure you do not over-subscribe an ExpressRoute Direct", + "potentialBenefits": "Improves resilience during port failures", "tags": null, - "recommendationResourceType": "Microsoft.EventHub/namespaces", + "recommendationResourceType": "Microsoft.Network/ExpressRoutePorts", "recommendationImpact": "High", "automationAvailable": true, - "query": null + "query": "// Azure Resource Graph Query\n// Find all Express Route Directs that are over subscribed\nresources\n| where type == \"microsoft.network/expressrouteports\"\n| where toint(properties['provisionedBandwidthInGbps']) > toint(properties['bandwidthInGbps'])\n| project recommendationId = \"0bee356b-7348-4799-8cab-0c71ffe13018\", name, id, tags, param1 = strcat(\"provisionedBandwidthInGbps: \", properties['provisionedBandwidthInGbps']), param2 = strcat(\"bandwidthInGbps: \", properties['bandwidthInGbps'])\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fbfef3df-04a5-41b2-a8fd-b8541eb04956", + "aprlGuid": "55815823-d588-4cb7-a5b8-ae581837356e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/event-hubs/event-hubs-auto-inflate", - "name": "Azure Event Hubs - Automatically scale throughput units" + "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/expressRoutePorts/", + "name": "Azure Monitor Baseline Alerts - expressRoutePorts" } ], - "recommendationControl": "Scalability", - "longDescription": "Enable auto-inflate on Event Hub Standard tier namespaces to automatically scale up throughput units (TUs), meeting usage needs and preventing data ingress or egress throttle scenarios by adjusting to allowed rates.\n", - "pgVerified": true, - "description": "Enable auto-inflate on Event Hub Standard tier", - "potentialBenefits": "Prevents throttling by autoscaling TUs", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Use Network Insights for monitoring ExpressRoute Port light levels, bits per second in/out, and line protocol. Set alerts based on Azure Monitor Baseline Alerts for light levels, bits per second in/out, and line protocol exceeding specific thresholds.\n", + "pgVerified": false, + "description": "Configure monitoring and alerting for ExpressRoute Ports", + "potentialBenefits": "Enhanced network performance and health", "tags": null, - "recommendationResourceType": "Microsoft.EventHub/namespaces", + "recommendationResourceType": "Microsoft.Network/expressRoutePorts", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Event Hub namespace instances that are Standard tier and do not have Auto Inflate enabled\r\nresources\r\n| where type == \"microsoft.eventhub/namespaces\"\r\n| where sku.tier == \"Standard\"\r\n| where properties.isAutoInflateEnabled == \"false\"\r\n| project recommendationId = \"fbfef3df-04a5-41b2-a8fd-b8541eb04956\", name, id, tags, param1 = \"AutoInflateEnabled: False\"\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "be448849-0d7d-49ba-9c94-9573ee533d5d", + "aprlGuid": "38c3bca1-97a1-eb42-8cd3-838b243f35ba", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/service-health/resource-health-overview", - "name": "Resource Health" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/service-health/resource-health-alert-monitor-guide#create-a-resource-health-alert-rule-in-the-azure-portal", - "name": "Configure Resource Health alerts in the Azure portal" + "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/azure-load-balancer/reliability", + "name": "Reliability and Azure Load Balancer" }, { - "url": "https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal", - "name": "Alerts Health" + "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#azure-load-balancer", + "name": "Resiliency checklist for specific Azure services- Azure Load Balancer" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Configure Resource Health Alerts for all applicable resources to stay informed about the current and historical health status of your Azure resources. They notify you when these resources have a change in their health status.\n", + "recommendationControl": "High Availability", + "longDescription": "Selecting Standard SKU Load Balancer enhances reliability through availability zones and zone resiliency, ensuring deployments withstand zone and region failures. Unlike Basic, it supports global load balancing and offers an SLA.\n", "pgVerified": true, - "description": "Configure Resource Health Alerts", - "potentialBenefits": "Stay informed on resource status", - "publishedToLearn": false, + "description": "Use Standard Load Balancer SKU", + "potentialBenefits": "Enhanced reliability and SLA support", "tags": null, - "recommendationResourceType": "Microsoft.Insights/activityLogAlerts", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/loadBalancers", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all LoadBalancers using Basic SKU\nresources\n| where type =~ 'Microsoft.Network/loadBalancers'\n| where sku.name == 'Basic'\n| project recommendationId = \"38c3bca1-97a1-eb42-8cd3-838b243f35ba\", name, id, tags, Param1=strcat(\"sku-tier: basic\")\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9729c89d-8118-41b4-a39b-e12468fa872b", + "aprlGuid": "6d82d042-6d61-ad49-86f0-6a5455398081", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/service-health/overview", - "name": "What is Azure Service Health?" - }, - { - "url": "https://learn.microsoft.com/azure/service-health/alerts-activity-log-service-notifications-portal", - "name": "Configure alerts for service health events" + "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#azure-load-balancer", + "name": "Resiliency checklist for specific Azure services- Azure Load Balancer" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Service health gives a personalized health view of Azure services and regions used, offering the best place for notifications on outages, planned maintenance, and health advisories by knowing the services used.\n", + "recommendationControl": "High Availability", + "longDescription": "Deploying Azure Load Balancers with at least two instances in the backend prevents a single point of failure and supports scalability. Pairing with Virtual Machine Scale Sets is advised for optimal scale building.\n", "pgVerified": true, - "description": "Configure Service Health Alerts", - "potentialBenefits": "Proactive outage and maintenance alerts", - "publishedToLearn": false, + "description": "Ensure the Backend Pool contains at least two instances", + "potentialBenefits": "Enhances reliability and scalability", "tags": null, - "recommendationResourceType": "Microsoft.Insights/activityLogAlerts", + "recommendationResourceType": "Microsoft.Network/loadBalancers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This resource graph query will return all subscriptions without Service Health alerts configured.\r\n\r\nresourcecontainers\r\n| where type == 'microsoft.resources/subscriptions'\r\n| project subscriptionAlerts=tostring(id),name,tags\r\n| join kind=leftouter (\r\n resources\r\n | where type == 'microsoft.insights/activitylogalerts' and properties.condition contains \"ServiceHealth\"\r\n | extend subscriptions = properties.scopes\r\n | project subscriptions\r\n | mv-expand subscriptions\r\n | project subscriptionAlerts = tostring(subscriptions)\r\n) on subscriptionAlerts\r\n| where isempty(subscriptionAlerts1)\r\n| project-away subscriptionAlerts1\r\n| project recommendationId = \"9729c89d-8118-41b4-a39b-e12468fa872b\",id=subscriptionAlerts,name,tags\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all LoadBalancers which only have 1 backend pool defined or only 1 VM in the backend pool\nresources\n| where type =~ 'Microsoft.Network/loadBalancers'\n| extend bep = properties.backendAddressPools\n| extend BackEndPools = array_length(bep)\n| where BackEndPools == 0\n| project recommendationId = \"6d82d042-6d61-ad49-86f0-6a5455398081\", name, id, Param1=\"backendPools\", Param2=toint(0), tags\n| union (resources\n | where type =~ 'Microsoft.Network/loadBalancers'\n | where sku.name == \"Standard\"\n | extend bep = properties.backendAddressPools\n | extend BackEndPools = toint(array_length(bep))\n | mv-expand bip = properties.backendAddressPools\n | extend BackendAddresses = array_length(bip.properties.loadBalancerBackendAddresses)\n | where toint(BackendAddresses) <= 1\n | project recommendationId = \"6d82d042-6d61-ad49-86f0-6a5455398081\", name, id, tags, Param1=\"backendAddresses\", Param2=toint(BackendAddresses))\n| union (\n resources\n | where type =~ 'Microsoft.Network/loadBalancers'\n | where sku.name == \"Basic\"\n | mv-expand properties.backendAddressPools\n | extend backendPoolId = properties_backendAddressPools.id\n | project id, name, tags, tostring(backendPoolId), recommendationId = \"6d82d042-6d61-ad49-86f0-6a5455398081\", Param1=\"BackEndPools\"\n | join kind = leftouter (\n resources\n | where type =~ \"Microsoft.Network/networkInterfaces\"\n | mv-expand properties.ipConfigurations\n | mv-expand properties_ipConfigurations.properties.loadBalancerBackendAddressPools\n | extend backendPoolId = tostring(properties_ipConfigurations_properties_loadBalancerBackendAddressPools.id)\n | summarize poolMembers = count() by backendPoolId\n | project tostring(backendPoolId), poolMembers ) on backendPoolId\n | where toint(poolMembers) <= 1\n | extend BackendAddresses = poolMembers\n | project id, name, tags, recommendationId, Param1=\"backendAddresses\", Param2=toint(BackendAddresses))\n" }, { "publishedToAdvisor": null, - "aprlGuid": "dac421ec-2832-4c37-839e-b6dc5a38f2fa", + "aprlGuid": "8d319a05-677b-944f-b9b4-ca0fb42e883c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/app/convert-classic-resource", - "name": "Migrate an Application Insights classic resource to a workspace-based resource" + "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#azure-load-balancer", + "name": "Resiliency checklist for specific Azure services- Azure Load Balancer" } ], - "recommendationControl": "Service Upgrade and Retirement", - "longDescription": "Classic Application Insights retires in February 2024. To minimize disruption to existing application monitoring scenarios, transition to workspace-based Application Insights before 29 February 2024.\n", - "pgVerified": false, - "description": "Convert Classic Deployments", - "potentialBenefits": "Avoid service disruption post-Feb 2024", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Outbound rules for Standard Public Load Balancer involve manual port allocation for backend pools, limiting scalability and risk of SNAT port exhaustion. NAT Gateway is recommended for its dynamic scaling and secure internet connectivity.\n", + "pgVerified": true, + "description": "Use NAT Gateway instead of Outbound Rules for Production Workloads", + "potentialBenefits": "Enhanced scalability and reliability", "tags": null, - "recommendationResourceType": "Microsoft.Insights/components", + "recommendationResourceType": "Microsoft.Network/loadBalancers", "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph query\r\n// Filters Application Insights resources with ‘Classic’ deployment type\r\nresources\r\n| where type =~ \"microsoft.insights/components\"\r\n| extend IngestionMode = properties.IngestionMode\r\n| where IngestionMode =~ 'ApplicationInsights'\r\n| project recommendationId= \"dac421ec-2832-4c37-839e-b6dc5a38f2fa\", name, id, tags, param1=\"ApplicationInsightsDeploymentType: Classic\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all LoadBalancers with Outbound rules configured\nresources\n| where type =~ 'Microsoft.Network/loadBalancers'\n| extend outboundRules = array_length(properties.outboundRules)\n| where outboundRules > 0\n| project recommendationId = \"8d319a05-677b-944f-b9b4-ca0fb42e883c\", name, id, tags, Param1 = \"outboundRules: >=1\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1cca00d2-d9ab-8e42-a788-5d40f49405cb", - "recommendationTypeId": "78211c00-15a9-336e-17c4-0b48613dadf4", + "aprlGuid": "621dbc78-3745-4d32-8eac-9e65b27b7512", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/key-vault/general/soft-delete-overview", - "name": "Azure Key Vault soft-delete overview" + "url": "https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-availability-zones#zone-redundant", + "name": "Load Balancer and Availability Zones" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Key Vault's soft-delete feature enables recovery of deleted vaults and objects like keys, secrets, and certificates. When enabled, marked resources are retained for 90 days, allowing for their recovery, essentially undoing deletion.\n", - "pgVerified": false, - "description": "Key vaults should have soft delete enabled", - "potentialBenefits": "Enables recovery of deleted items", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "In regions with Availability Zones, assigning a zone-redundant frontend IP to a Standard Load Balancer ensures continuous traffic distribution even if one availability zone fails, provided other healthy zones and backend instances are available to receive the traffic.\n", + "pgVerified": true, + "description": "Ensure Standard Load Balancer is zone-redundant", + "potentialBenefits": "Enhances uptime and resilience", "tags": null, - "recommendationResourceType": "Microsoft.KeyVault/vaults", + "recommendationResourceType": "Microsoft.Network/loadBalancers", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Key Vaults that do not have soft delete enabled.\r\nresources\r\n| where type == \"microsoft.keyvault/vaults\"\r\n| where isnull(properties.enableSoftDelete) or properties.enableSoftDelete != \"true\"\r\n| project recommendationId = \"1cca00d2-d9ab-8e42-a788-5d40f49405cb\", name, id, tags, param1 = \"EnableSoftDelete: Disabled\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all LoadBalancers with with regional or zonal public IP Addresses\nresources\n| where type == \"microsoft.network/loadbalancers\"\n| where tolower(sku.name) != 'basic'\n| mv-expand feIPconfigs = properties.frontendIPConfigurations\n| extend\n feConfigName = (feIPconfigs.name),\n PrivateSubnetId = toupper(feIPconfigs.properties.subnet.id),\n PrivateIPZones = feIPconfigs.zones,\n PIPid = toupper(feIPconfigs.properties.publicIPAddress.id),\n JoinID = toupper(id)\n| where isnotempty(PrivateSubnetId)\n| where isnull(PrivateIPZones) or array_length(PrivateIPZones) < 2\n| project name, feConfigName, id\n| union (resources\n | where type == \"microsoft.network/loadbalancers\"\n | where tolower(sku.name) != 'basic'\n | mv-expand feIPconfigs = properties.frontendIPConfigurations\n | extend\n feConfigName = (feIPconfigs.name),\n PIPid = toupper(feIPconfigs.properties.publicIPAddress.id),\n JoinID = toupper(id)\n | where isnotempty(PIPid)\n | join kind=innerunique (\n resources\n | where type == \"microsoft.network/publicipaddresses\"\n | where isnull(zones) or array_length(zones) < 2\n | extend\n LBid = toupper(substring(properties.ipConfiguration.id, 0, indexof(properties.ipConfiguration.id, '/frontendIPConfigurations'))),\n InnerID = toupper(id)\n ) on $left.PIPid == $right.InnerID)\n| project recommendationId = \"621dbc78-3745-4d32-8eac-9e65b27b7512\", name, id, tags, param1=\"Zones: No Zone or Zonal\", param2=strcat(\"Frontend IP Configuration:\", \" \", feConfigName)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "70fcfe6d-00e9-5544-a63a-fff42b9f2edb", - "recommendationTypeId": "4ed62ae4-5072-f9e7-8d94-51c76c48159a", + "aprlGuid": "e5f5fcea-f925-4578-8599-9a391e888a60", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/key-vault/general/soft-delete-overview#purge-protection", - "name": "Azure Key Vault purge-protection overview" + "url": "https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-custom-probe-overview", + "name": "Load Balancer Health Probe Overview" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Purge protection secures against malicious deletions by enforcing a retention period for soft deleted key vaults, ensuring no one, not even insiders or Microsoft, can purge your key vaults during this period, preventing permanent data loss.\n", - "pgVerified": false, - "description": "Key vaults should have purge protection enabled", - "potentialBenefits": "Protects from insider attacks, avoids data loss", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Health probes are used by Azure Load Balancers to determine the status of backend endpoints. Using custom health probes that are aligned with vendor recommendations enhances understanding of backend availability and facilitates monitoring of backend services for any impact.\n", + "pgVerified": true, + "description": "Use Health Probes to detect backend instances availability", + "potentialBenefits": "Ensures backend uptime monitoring.", "tags": null, - "recommendationResourceType": "Microsoft.KeyVault/vaults", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/loadBalancers", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This resource graph query will return all Key Vaults that do not have Purge Protection enabled.\r\nresources\r\n| where type == \"microsoft.keyvault/vaults\"\r\n| where isnull(properties.enablePurgeProtection) or properties.enablePurgeProtection != \"true\"\r\n| project recommendationId = \"70fcfe6d-00e9-5544-a63a-fff42b9f2edb\", name, id, tags, param1 = \"EnablePurgeProtection: Disabled\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// List the load balancers which don't have health probe configured\nresources\n| where type =~ \"microsoft.network/loadbalancers\"\n| where array_length(properties.probes) == 0\n| project recommendationId=\"e5f5fcea-f925-4578-8599-9a391e888a60\", name, id, tags, param1=\"customHealthProbeUsed: false\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "00c3d2b0-ea6e-4c4b-89be-b78a35caeb51", - "recommendationTypeId": "2e96bc2f-1972-e471-9e70-ae58d41e9d2a", + "aprlGuid": "2820f6d6-a23c-7a40-aec5-506f3bd1aeb6", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/key-vault/general/security-features#network-security", - "name": "Azure Key Vault Private Link Service overview" + "url": "https://learn.microsoft.com/en-us/azure/dns/dns-protect-private-zones-recordsets", + "name": "Protecting private DNS Zones and Records - Azure DNS" } ], "recommendationControl": "Security", - "longDescription": "Azure Private Link Service lets you securely and privately connect to Azure Key Vault via a Private Endpoint in your VNet, using a private IP and eliminating public Internet exposure.\n", - "pgVerified": false, - "description": "Private endpoint should be configured for Key Vault", - "potentialBenefits": "Secure Key Vault with Private Link", - "publishedToLearn": false, + "longDescription": "Assign the built-in Private DNS Zone Contributor role to specific authorized users, groups, and entities to protect against unauthorized or accidental changes to Private DNS Zones and records. Restrict access by granting Private DNS Zone Contributor permission to all zones.\n", + "pgVerified": true, + "description": "Protect private DNS zones and records", + "potentialBenefits": "Prevents DNS outages", "tags": null, - "recommendationResourceType": "Microsoft.KeyVault/vaults", + "recommendationResourceType": "Microsoft.Network/privateDnsZones", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This resource graph query will return all Key Vaults that does not have a Private Endpoint Connection or where a private endpoint exists but public access is enabled\r\n\r\nresources\r\n| where type == \"microsoft.keyvault/vaults\"\r\n| where isnull(properties.privateEndpointConnections) or properties.privateEndpointConnections[0].properties.provisioningState != (\"Succeeded\") or (isnull(properties.networkAcls) and properties.publicNetworkAccess == 'Enabled')\r\n| extend param1 = strcat('Private Endpoint: ', iif(isnotnull(properties.privateEndpointConnections),split(properties.privateEndpointConnections[0].properties.privateEndpoint.id,'/')[8],'No Private Endpoint'))\r\n| extend param2 = strcat('Access: ', iif(properties.publicNetworkAccess == 'Disabled', 'Public Access Disabled', iif(isnotnull(properties.networkAcls), 'NetworkACLs in place','Public Access Enabled')))\r\n| project recommendationId = \"00c3d2b0-ea6e-4c4b-89be-b78a35caeb51\", name, id, tags, param1, param2\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e7091145-3642-bd41-bb58-66502e64d2cd", + "aprlGuid": "ab896e8c-49b9-2c44-adec-98339aff7821", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/key-vault/general/best-practices#why-we-recommend-separate-key-vaults", - "name": "Azure Key Vault best practices overview" + "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/privateDnsZones/", + "name": "Azure Monitor Baseline Alerts - privateDnsZones" } ], - "recommendationControl": "Governance", - "longDescription": "Key vaults are security boundaries for secret storage. Grouping secrets together increases risk during a security event, as attacks could access multiple secrets.\n", - "pgVerified": false, - "description": "Use separate key vaults per application per environment", - "potentialBenefits": "Enhanced security, Reduced risk", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Use Azure Monitor to monitor Private DNS Zone query volume, record set count, and capacity metrics for Record Set, Virtual Network Link, and Virtual Network Link with auto-registration. Create alerts based on Azure Monitor Baseline Alerts for these metrics that exceed specific thresholds.\n", + "pgVerified": true, + "description": "Monitor Private DNS Zones health and set up alerts", + "potentialBenefits": "Enhanced DNS reliability and alerting", "tags": null, - "recommendationResourceType": "Microsoft.KeyVault/vaults", + "recommendationResourceType": "Microsoft.Network/privateDnsZones", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1dc0821d-4f14-7644-bab4-ba208ff5f7fa", - "recommendationTypeId": "88bbc99c-e5af-ddd7-6105-6150b2bfa519", + "aprlGuid": "1e02335c-1f90-fd4e-a5a5-d359c7b22d70", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/key-vault/general/logging?tabs=Vault", - "name": "Azure Key Vault logging overview" + "url": "https://learn.microsoft.com/azure/cloud-adoption-framework/ready/azure-best-practices/private-link-and-dns-integration-at-scale", + "name": "Private Link and DNS integration at scale" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Enable logs, set up alerts, and adhere to retention requirements for improved monitoring and security of Key Vault access, detailing the frequency and identity of users.\n", - "pgVerified": false, - "description": "Diagnostic logs in Key Vault should be enabled", - "potentialBenefits": "Enhanced monitoring and security compliance", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "For business continuity scenarios with a low recovery time objective (RTO), ensure that distinct regional production and disaster recovery (DR) Private DNS Zones are configured and have identical workload and resource DNS entries. This keeps DNS resolution consistent across both zones.\n", + "pgVerified": true, + "description": "Use regional Private DNS Zones when there is a low recovery time objective (RTO) requirement", + "potentialBenefits": "Ensures seamless failover for DNS during a regional outage", "tags": null, - "recommendationResourceType": "Microsoft.KeyVault/vaults", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Network/privateDnsZones", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "af426a99-62a6-6b4c-9662-42d220b413b8", + "aprlGuid": "3538aa48-c40b-455b-a93b-269fe6e65be2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-netapp-files-service-levels", - "name": "Service levels for Azure NetApp Files | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/reliability/reliability-dns", + "name": "Reliability in Azure DNS" } ], - "recommendationControl": "Scalability", - "longDescription": "Service levels, part of capacity pool attributes, determine the maximum throughput per volume quota in Azure NetApp Files. It combines read and write speed, offering three levels: Standard (16 MiB/s per 1TiB), Premium (64 MiB/s per 1TiB), and Ultra (128 MiB/s per 1TiB) throughput.\n", - "pgVerified": true, - "description": "Use the correct service level and volume quota size for the expected performance level", - "potentialBenefits": "Optimized performance and cost efficiency", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Azure Private DNS allows the Time-To-Live (TTL) for record sets in the zone to be set to a value between 1 and 2147483647 seconds. You should ensure that the TTL for the DNS record sets in your DNS Zones are set appropriately to meet your RPO targets.\n", + "pgVerified": false, + "description": "Ensure Time-To-Live (TTL) is set appropriately to ensure RPOs can be met", + "potentialBenefits": "Ensures that no cached DNS records exist past RPO targets", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Network/privateDnsZones", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ab984130-c57b-6c4a-8d04-6723b4e1bdb6", + "aprlGuid": "30ec8a5e-46de-4323-87e9-a7c56b72813b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-netapp-files-network-topologies", - "name": "Guidelines for Azure NetApp Files network planning | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#virtual-hub", + "name": "Virtual WAN Monitoring Best Practices" } ], - "recommendationControl": "Scalability", - "longDescription": "Standard network feature in Azure NetApp Files enhances IP limits and VNet capabilities, including network security groups, user-defined routes on subnets, and diverse connectivity options.\n", - "pgVerified": true, - "description": "Use standard network features for production in Azure NetApp Files", - "potentialBenefits": "Enhanced connectivity and security", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Set up monitoring and alerts for v-Hubs. Create alert rule for ensuring promptly response to changes in BGP status and Data processed by v-Hubs.", + "pgVerified": false, + "description": "Monitor health for v-Hubs", + "potentialBenefits": "Detection and mitigation to avoid disruptions.", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Azure NetApp Files volumes without standard network features.\r\nresources\r\n| where type =~ \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\r\n| where properties.networkFeatures != \"Standard\"\r\n| project recommendationId = \"ab984130-c57b-6c4a-8d04-6723b4e1bdb6\", name, id, tags\r\n\r\n" + "recommendationResourceType": "Microsoft.Network/virtualHubs", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": null }, { "publishedToAdvisor": null, - "aprlGuid": "47d100a5-7f85-5742-967a-67eb5081240a", + "aprlGuid": "d0cfe47f-686b-5043-bf83-5a3868acb80a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/use-availability-zones", - "name": "Use availability zones for high availability in Azure NetApp Files | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-monitor?pivots=front-door-standard-premium#access-logs", + "name": "Azure Web Application Firewall monitoring and logging - Access Log" + }, + { + "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-tuning?pivots=front-door-standard-premium#understanding-waf-logs", + "name": "Understanding WAF logs" + }, + { + "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/application-gateway-waf-configuration?tabs=portal", + "name": "Web Application Firewall exclusion lists" + }, + { + "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/web-application-firewall-troubleshoot#fixing-false-positives", + "name": "Fixing a false positive" } ], - "recommendationControl": "High Availability", - "longDescription": "Availability zones are distinct locations within an Azure region to withstand local failures. Deploy your workload in multiple availability zones and use application-based replication or Azure NetApp Files cross-zone replication to achieve high availability. Note that failover is a manual process.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "WAF may mistakenly block legitimate requests (false positives). These can be identified by examining the last 24 hours of blocked requests in Log Analytics.\n", "pgVerified": true, - "description": "Use availability zones for high availability in Azure NetApp Files", - "potentialBenefits": "High Availability across availability zones", - "publishedToLearn": false, + "description": "Inspect Azure Front Door WAF logs for wrongfully blocked legitimate requests", + "potentialBenefits": "Reduces false positives, improves access", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationResourceType": "Microsoft.Network/frontdoorWebApplicationFirewallPolicies", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Azure NetApp Files volumes without an availability zone defined.\r\nResources\r\n| where type =~ \"Microsoft.NetApp/netAppAccounts/capacityPools/volumes\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where array_length(zones) == 0 or isnull(zones)\r\n| project recommendationId = \"47d100a5-7f85-5742-967a-67eb5081240a\", name, id, tags\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8bb690e8-64d5-4838-8703-9ee3dbac688f", + "aprlGuid": "537b4d94-edd1-4041-b13d-8217dfa485f0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/manage-availability-zone-volume-placement", - "name": "Manage availability zone volume placement for Azure NetApp Files | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/application-gateway-waf-metrics#logs-and-diagnostics", + "name": "Azure Web Application Firewall Monitoring and Logging" + }, + { + "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/web-application-firewall-logs#diagnostic-logs", + "name": "Diagnostic logs" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Azure NetApp Files' availability zone (AZ) volume placement feature lets you deploy volumes in the same AZ with Azure compute and other services to have within AZ latency and share the same AZ failure domain.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "WAF may block legitimate requests as false positives. Identifying blocked requests within the last 24 hours through Log Analytics can help manage and mitigate these incorrect blockages efficiently.\n", "pgVerified": true, - "description": "Deploy ANF volumes in the same availability zone with Azure compute and other services", - "potentialBenefits": "Within AZ latency and tolerate failure of other AZ", - "publishedToLearn": false, + "description": "Check Azure Application Gateway WAF logs for mistakenly blocked valid requests", + "potentialBenefits": "Improve false positive identification", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationResourceType": "Microsoft.Network/frontdoorWebApplicationFirewallPolicies", "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "72827434-c773-4345-9493-34848ddf5803", + "aprlGuid": "5357ae22-0f52-1a49-9fd4-1f00ace6add0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/snapshots-introduction", - "name": "How Azure NetApp Files snapshots work | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/ag-overview#waf-monitoring", + "name": "WAF monitoring" + }, + { + "url": "https://github.com/Azure/Azure-Network-Security/tree/master/Azure%20WAF/Workbook%20-%20WAF%20Monitor%20Workbook", + "name": "Azure Monitor Workbook for WAF" } ], - "recommendationControl": "High Availability", - "longDescription": "Azure NetApp Files snapshot technology ensures stability, scalability, and swift data recoverability without affecting performance. It supports automatic snapshot creation via policies for Azure NetApp Files data.\n", - "pgVerified": true, - "description": "Use snapshots for data protection in Azure NetApp Files", - "potentialBenefits": "Stable, scalable, swift recovery, no perf impact", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Monitoring the health of your Web Application Firewall and the applications it protects is crucial. This can be achieved through integration with Microsoft Defender for Cloud, Azure Monitor, and Azure Monitor logs, ensuring optimal performance and security.\n", + "pgVerified": false, + "description": "Monitor Web Application Firewall", + "potentialBenefits": "Enhanced security and health insight", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationResourceType": "Microsoft.Network/frontdoorWebApplicationFirewallPolicies", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Azure NetApp Files volumes without a snapshot policy defined.\r\nresources\r\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\r\n| where properties.dataProtection.snapshot.snapshotPolicyId == \"\"\r\n| project recommendationId = \"72827434-c773-4345-9493-34848ddf5803\", name, id, tags\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b2fb3e60-97ec-e34d-af29-b16a0d61c2ac", + "aprlGuid": "6a8b3db9-5773-413a-a127-4f7032f34bbd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/backup-introduction", - "name": "Understand Azure NetApp Files backup | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/azure-signalr/availability-zones", + "name": "Availability zones support in Azure SignalR Service" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Azure NetApp Files offers a fully managed backup solution enhancing long-term recovery, archiving, and compliance.\n", - "pgVerified": true, - "description": "Enable backup for data protection in Azure NetApp Files", - "potentialBenefits": "Enhances data recovery and compliance", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Use SignalR with zone redundancy for production to improve uptime. This feature, available in the Premium tier, is activated upon creating or upgrading to Premium. Standard can upgrade to Premium without downtime.\n", + "pgVerified": false, + "description": "Enable zone redundancy for SignalR", + "potentialBenefits": "Enhances reliability and uptime", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationResourceType": "Microsoft.SignalRService/SignalR", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Azure NetApp Files volumes without a backup policy defined.\r\nresources\r\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\r\n| where properties.dataProtection.backup.backupPolicyId == \"\"\r\n| project recommendationId = \"b2fb3e60-97ec-e34d-af29-b16a0d61c2ac\", name, id, tags\r\n" + "query": "// Azure Resource Graph Query\n// Find SignalR instances that are not configured with the Premium tier\nresources\n| where type == \"microsoft.signalrservice/signalr\"\n| where sku.tier != \"Premium\"\n| project recommendationId = \"6a8b3db9-5773-413a-a127-4f7032f34bbd\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e30317d2-c502-4dfe-a2d3-0a737cc79545", + "aprlGuid": "bb6deb9d-24fa-4ee8-bc23-ac3ebc7fdf8e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-netapp-files/cross-region-replication-introduction", - "name": "Cross-region replication of Azure NetApp Files volumes" + "url": "https://learn.microsoft.com/en-us/entra/identity/domain-services/tutorial-create-replica-set", + "name": "Create and use replica sets for resiliency or geolocation in Microsoft Entra Domain Services" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Azure NetApp Files replication offers data protection by allowing asynchronous cross-region volume replication for application failover in case of regional outages. Volumes can be replicated across regions, not concurrently with cross-zone replication. Note that failover is a manual process.\n", - "pgVerified": true, - "description": "Enable Cross-region replication of Azure NetApp Files volumes", - "potentialBenefits": "Enhanced data protection and disaster recovery", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "You need to use a minimum of Enterprise SKU for your managed domain to support replica sets.\n", + "pgVerified": false, + "description": "Use at least the Enterprise SKU", + "potentialBenefits": "The Enterprise SKU enables creation of replica sets.", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationResourceType": "Microsoft.AAD/domainServices", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Azure NetApp Files volumes without cross-region replication.\r\nresources\r\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\r\n| extend remoteVolumeRegion = properties.dataProtection.replication.remoteVolumeRegion\r\n| extend volumeType = properties.volumeType\r\n| extend replicationType = iff((remoteVolumeRegion == location), \"CZR\", iff((remoteVolumeRegion == \"\"),\"n/a\",\"CRR\"))\r\n| where replicationType != \"CRR\" and volumeType != \"DataProtection\"\r\n| project recommendationId = \"e30317d2-c502-4dfe-a2d3-0a737cc79545\", name, id, tags\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Gets Entry Domain Services that are using the Standard SKU\nresources\n| where type == \"microsoft.aad/domainservices\"\n| extend sku = properties.sku\n| where sku =~ 'Standard'\n| project recommendationId='bb6deb9d-24fa-4ee8-bc23-ac3ebc7fdf8e', name=name, id=id, tags=tags, param1=strcat('SKU:', sku)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e3d742e1-dacd-9b48-b6b1-510ec9f87c96", + "aprlGuid": "a3058909-fcf8-4450-88b5-499f57449178", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/cross-zone-replication-introduction", - "name": "Cross-zone replication of Azure NetApp Files volumes | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/entra/identity/domain-services/tutorial-create-replica-set", + "name": "Create and use replica sets for resiliency or geolocation in Microsoft Entra Domain Services" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "The cross-zone replication (CZR) feature enables asynchronous data replication between Azure NetApp Files volumes across different availability zones, ensuring data protection and critical application failover in case of zone-wide disasters. Note that failover is a manual process.\n", - "pgVerified": true, - "description": "Enable Cross-zone replication of Azure NetApp Files volumes", - "potentialBenefits": "Enhances disaster recovery across availability zones", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "To improve the resiliency of a Microsoft Entra Domain Services managed domain, or deploy to additional geographic locations close to your applications, you can use replica sets.\nYou can add a replica set to any peered virtual network in any Azure region that supports Domain Services.\n", + "pgVerified": false, + "description": "Use replica sets for resiliency or geolocation in Microsoft Entra Domain Services", + "potentialBenefits": "The replica sets provide geographical resiliency.", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationResourceType": "Microsoft.AAD/domainServices", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Azure NetApp Files volumes without cross-zone replication.\r\nresources\r\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| extend remoteVolumeRegion = properties.dataProtection.replication.remoteVolumeRegion\r\n| extend volumeType = properties.volumeType\r\n| extend replicationType = iff((remoteVolumeRegion == location), \"CZR\", iff((remoteVolumeRegion == \"\"),\"n/a\",\"CRR\"))\r\n| where replicationType != \"CZR\" and volumeType != \"DataProtection\"\r\n| project recommendationId = \"e3d742e1-dacd-9b48-b6b1-510ec9f87c96\", name, id, tags\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Gets Entry Domain Services that are using only one replicaSet\nresources\n| where type == \"microsoft.aad/domainservices\"\n| extend replicaSets = properties.replicaSets\n| where array_length(replicaSets) < 2\n| project recommendationId='a3058909-fcf8-4450-88b5-499f57449178', name=name, id=id, tags=tags, param1=strcat('replicaSetLocation:', replicaSets[0].location)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2f579fc9-e599-0d44-8b97-254f50ae04d8", + "aprlGuid": "4f63619f-5001-439c-bacb-8de891287727", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/monitor-azure-netapp-files", - "name": "Ways to monitor Azure NetApp Files | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/aks/availability-zones", + "name": "AKS Availability Zones" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-availability-zones#zone-balancing", + "name": "Zone Balancing" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Azure NetApp Files offers metrics like allocated storage, actual usage, volume IOPS, and latency, enabling a better understanding of usage patterns and volume performance for NetApp accounts.\n", + "recommendationControl": "High Availability", + "longDescription": "Azure Availability Zones ensure high availability by offering independent locations within regions, equipped with their own power, cooling, and networking to ensure applications and data are protected from datacenter-level failures.\n", "pgVerified": true, - "description": "Monitor Azure NetApp Files metrics to better understand usage pattern and performance", - "potentialBenefits": "Optimize usage and performance", - "publishedToLearn": false, + "description": "Deploy AKS cluster across availability zones", + "potentialBenefits": "Enhanced fault tolerance for AKS", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns AKS clusters that do not have any availability zones enabled or only use a single zone\nresources\n| where type =~ \"Microsoft.ContainerService/managedClusters\"\n| project id, name, tags, location, pools = properties.agentPoolProfiles\n| mv-expand pool = pools\n| extend\n numOfAvailabilityZones = iif(isnull(pool.availabilityZones), 0, array_length(pool.availabilityZones))\n| where numOfAvailabilityZones < 2\n| project\n recommendationId = \"4f63619f-5001-439c-bacb-8de891287727\",\n id,\n name,\n tags,\n param1 = strcat(\"NodePoolName: \", pool.name),\n param2 = strcat(\"Mode: \", pool.mode),\n param3 = strcat(\"AvailabilityZones: \", iif(numOfAvailabilityZones == 0, \"None\", strcat(\"Zone \", strcat_array(pool.availabilityZones, \", \")))),\n param4 = strcat(\"Location: \", location)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "687ae58f-517f-ca43-90fe-922497e61283", + "aprlGuid": "5ee083cd-6ac3-4a83-8913-9549dd36cf56", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-policy-definitions", - "name": "Azure Policy definitions for Azure NetApp Files | Microsoft Learn" - }, - { - "url": "https://learn.microsoft.com/azure/governance/policy/tutorials/create-custom-policy-definition", - "name": "Creating custom policy definitions | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/aks/use-system-pools?tabs=azure-cli#system-and-user-node-pools", + "name": "System and user node pools" } ], - "recommendationControl": "Governance", - "longDescription": "Azure NetApp Files supports Azure policy integration using either built-in policy definitions or by creating custom ones to maintain organizational standards and compliance.\n", - "pgVerified": true, - "description": "Enforce standards and assess compliance in Azure NetApp Files with Azure policy", - "potentialBenefits": "Enforce standards and assess compliance", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "AKS assigns the kubernetes.azure.com/mode: system label to nodes in system node pools signaling the preference for system pods should be scheduled there. The CriticalAddonsOnly=true:NoSchedule taint can be added to your system nodes to prohibit application pods from being scheduled on them.\n", + "pgVerified": false, + "description": "Isolate system and application pods", + "potentialBenefits": "Enhanced reliability via pod isolation", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns each AKS cluster with nodepools that do not have system pods labelled with CriticalAddonsOnly\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\n| where agentPoolProfile.mode =~ 'System' // system node pools\n| extend taint = tostring(parse_json(agentPoolProfile.nodeTaints))\n| extend hasCriticalAddonsTaint = agentPoolProfile.kubeletConfig has 'CriticalAddonsOnly'\n| extend hasNodeLabel = agentPoolProfile.customNodeLabels has 'CriticalAddonsOnly'\n| extend hasCriticalAddonsOnly = hasCriticalAddonsTaint or hasNodeLabel or isempty(taint)\n| extend nodePool = tostring(parse_json(agentPoolProfile.name))\n| where hasCriticalAddonsOnly\n| project\n recommendationId=\"5ee083cd-6ac3-4a83-8913-9549dd36cf56\",\n id,\n name,\n tags,\n param1=strcat(\"nodepoolName: \", nodePool)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "cfa2244b-5436-47de-8287-b217875d3b0a", + "aprlGuid": "ca324d71-54b0-4a3e-b9e4-10e767daa9fc", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/configure-network-features", - "name": "Configure network features for an Azure NetApp Files volume" - }, - { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/manage-smb-share-access-control-lists", - "name": "Manage SMB share ACLs in Azure NetApp Files" - }, - { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-netapp-files-configure-export-policy", - "name": "Configure export policy for NFS or dual-protocol volumes" + "url": "https://learn.microsoft.com/en-us/azure/aks/concepts-identity#azure-ad-integration", + "name": "Entra integration" }, { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/configure-access-control-lists", - "name": "Configure access control lists on NFSv4.1 volumes for Azure NetApp Files" + "url": "https://learn.microsoft.com/en-us/azure/aks/manage-azure-rbac?source=recommendations", + "name": "Use Azure role-based access control for AKS" }, { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/configure-unix-permissions-change-ownership-mode", - "name": "Configure Unix permissions and change ownership mode for NFS and dual-protocol volumes" + "url": "https://learn.microsoft.com/en-us/azure/aks/manage-local-accounts-managed-azure-ad?source=recommendations", + "name": "Manage AKS local accounts" } ], "recommendationControl": "Security", - "longDescription": "Access to the delegated subnet should be limited to specific Azure Virtual Networks. SMB-enabled volumes' share permissions should move away from 'Everyone/Full control'. NFS-enabled volumes' access needs to be controlled via export policies and/or NFSv4.1 ACLs.\n", - "pgVerified": true, - "description": "Restrict default access to Azure NetApp Files volumes", - "potentialBenefits": "Enhanced security, Reduced data breach risk", - "publishedToLearn": false, + "longDescription": "Local Kubernetes accounts in AKS, being non-auditable and legacy, are discouraged. Microsoft Entra's integration offers centralized management, multi-factor authentication, RBAC for detailed access, and a secure, scalable authentication system compatible with Azure and external identity providers.\n", + "pgVerified": false, + "description": "Disable local accounts", + "potentialBenefits": "Enhanced security and access control", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns a list of AKS clusters not using AAD enabled\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| extend aadProfile = tostring (parse_json(properties.aadProfile))\n| extend disablelocalAdmin = tostring(parse_json(properties.disableLocalAccounts))\n| extend RBAC = tostring(parse_json(properties.enableRBAC))\n| where RBAC == \"false\"\n| project recommendationId=\"ca324d71-54b0-4a3e-b9e4-10e767daa9fc\", name, id, tags, param1=strcat(\"aadProfile: \", aadProfile), param2=strcat(\"disablelocalAdmin: \",disablelocalAdmin), param3=strcat(\"RBAC: \", RBAC)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d1e7ccc3-e6c1-40e9-a36e-fd134711c808", - "recommendationTypeId": "e4bebd74-387a-4a74-b757-475d2d1b4e3e", + "aprlGuid": "c22db132-399b-4e7c-995d-577a60881be8", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/faq-application-resilience#do-i-need-to-take-special-precautions-for-smb-based-applications", - "name": "Do I need to take special precautions for SMB-based applications? | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/aks/configure-azure-cni-dynamic-ip-allocation", + "name": "Configure Azure CNI networking" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/aks/azure-cni-overlay", + "name": "Configure Azure CNI Overlay networking" } ], - "recommendationControl": "High Availability", - "longDescription": "Certain SMB applications need SMB Transparent Failover for maintenance without interrupting server connectivity. Azure NetApp Files provides this through SMB Continuous Availability for applications like Citrix App Layering, FSLogix user/profile containers, Microsoft SQL Server, MSIX app attach.\n", - "pgVerified": true, - "description": "Make use of SMB continuous availability for supported applications", - "potentialBenefits": "Zero downtime for SMB apps", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Azure CNI enhances cluster IP and network management, allowing dynamic IP allocation, scalable subnets, direct pod-VNET connectivity, and supports diverse network policies for pods and nodes with Azure Network Policies and Calico, optimizing network efficiency and security\n", + "pgVerified": false, + "description": "Configure Azure CNI networking for dynamic allocation of IPs", + "potentialBenefits": "Dynamic IP allocation, scalable subnets, direct VNET access", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Check AKS Clusters using kubenet network profile\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| extend networkProfile = tostring (parse_json(properties.networkProfile.networkPlugin))\n| where networkProfile ==\"kubenet\"\n| project recommendationId=\"c22db132-399b-4e7c-995d-577a60881be8\", name, id, tags, param1=strcat(\"networkProfile :\",networkProfile)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "60f36f9b-fac9-4160-bbf5-57af04da4f53", + "aprlGuid": "902c82ff-4910-4b61-942d-0d6ef7f39b67", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-netapp-files/faq-application-resilience#what-do-you-recommend-for-handling-potential-application-disruptions-due-to-storage-service-maintenance-events", - "name": "What do you recommend for handling potential application disruptions due to storage service maintenance events? | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/aks/cluster-autoscaler?tabs=azure-cli", + "name": "Use the Cluster Autoscaler on AKS" + }, + { + "url": "https://learn.microsoft.com/azure/aks/operator-best-practices-advanced-scheduler", + "name": "Best practices for advanced scheduler features" + }, + { + "url": "https://learn.microsoft.com/azure/aks/best-practices-performance-scale-large#node-pool-scaling", + "name": "Node pool scaling considerations and best practices" + }, + { + "url": "https://learn.microsoft.com/azure/aks/operator-best-practices-scheduler", + "name": "Best practices for basic scheduler features" } ], - "recommendationControl": "High Availability", - "longDescription": "Azure NetApp Files might undergo occasional planned maintenance such as platform updates or service and software upgrades. It's important to be aware of the application's resiliency settings to cope with these storage service maintenance events.\n", + "recommendationControl": "Scalability", + "longDescription": "The cluster auto-scaler in AKS adjusts node counts based on pod resource needs and available capacity, enabling scaling as per demand to prevent outages.\n", "pgVerified": true, - "description": "Ensure application resilience for service maintenance events", - "potentialBenefits": "Minimizes downtime during maintenance", - "publishedToLearn": false, + "description": "Enable the cluster auto-scaler on an existing cluster", + "potentialBenefits": "Optimizes scaling and prevents outages", "tags": null, - "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find AKS clusters with auto-scaling disabled\nResources\n| where type == \"microsoft.containerservice/managedclusters\"\n| extend autoScaling = tostring (parse_json(properties.agentPoolProfiles.[0].enableAutoScaling))\n| where autoScaling == \"false\"\n| project recommendationId=\"902c82ff-4910-4b61-942d-0d6ef7f39b67\", name, id, tags, param1=strcat(\"autoScaling :\", autoScaling)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "823b0cff-05c0-2e4e-a1e7-9965e1cfa16f", - "recommendationTypeId": "c9c9750b-9ddb-436f-b19a-9c725539a0b5", + "aprlGuid": "269a9f1a-6675-460a-831e-b05a887a8c4b", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-autoscaling-zone-redundant#autoscaling-and-high-availability", - "name": "Application Gateway Autoscaling Zone-Redundant" + "url": "https://learn.microsoft.com/en-us/azure/backup/azure-kubernetes-service-cluster-backup", + "name": "AKS Backups" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/aks/operator-best-practices-storage", + "name": "Best Practices for AKS Backups" } ], - "recommendationControl": "Scalability", - "longDescription": "Azure Application Gateways v2 are always deployed in a highly available fashion with multiple instances by default. Enabling autoscale ensures the service is not reliant on manual intervention for scaling.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "AKS, popular for stateful apps needing backups, can now use Azure Backup to secure clusters and attached volumes through an installed Backup Extension, enabling backup and restore operations via a Backup Vault.\n", "pgVerified": true, - "description": "Ensure Autoscale feature has been enabled", - "potentialBenefits": "Enhances uptime and enables autoscaling", - "publishedToLearn": false, + "description": "Back up Azure Kubernetes Service", + "potentialBenefits": "Ensures data safety for AKS", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all Application Gateways that do not have autoscale enabled or have a min capacity of 1\r\nresources\r\n| where type =~ \"microsoft.network/applicationGateways\"\r\n| where isnull(properties.autoscaleConfiguration) or properties.autoscaleConfiguration.minCapacity <= 1\r\n| project recommendationId = \"823b0cff-05c0-2e4e-a1e7-9965e1cfa16f\", name, id, tags, param1 = \"autoScaleConfiguration: isNull or MinCapacity <= 1\"\r\n| order by id asc\r\n\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find AKS clusters that do not have backup enabled\n\nresources\n| where type =~ 'Microsoft.ContainerService/managedClusters'\n| extend lname = tolower(name)\n| join kind=leftouter(recoveryservicesresources\n | where type =~ 'microsoft.dataprotection/backupvaults/backupinstances'\n | extend lname = tolower(tostring(split(properties.dataSourceInfo.resourceID, '/')[8]))\n | extend protectionState = properties.currentProtectionState\n | project lname, protectionState) on lname\n| where protectionState != 'ProtectionConfigured'\n| extend param1 = iif(isnull(protectionState), 'Protection Not Configured', strcat('Protection State: ', protectionState))\n| project recommendationId = \"269a9f1a-6675-460a-831e-b05a887a8c4b\", name, id, tags, param1\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "233a7008-71e9-e745-923e-1a1c7a0b92f3", + "aprlGuid": "d3111036-355d-431b-ab49-8ddad042800b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway#security", - "name": "Application Gateway Security" + "url": "https://learn.microsoft.com/azure/reliability/availability-zones-overview?tabs=azure-cli", + "name": "Availability zones overview" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/ssl-overview", - "name": "Application Gateway SSL Overview" + "url": "https://learn.microsoft.com/azure/storage/common/storage-redundancy#zone-redundant-storage", + "name": "Zone-redundant storage" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-ssl-policy-overview", - "name": "Application Gateway SSL Policy Overview" + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-redundancy#zone-redundant-storage-for-managed-disks", + "name": "ZRS disks" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/key-vault-certs", - "name": "Application Gateway KeyVault Certs" + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-migrate-lrs-zrs", + "name": "Convert a disk from LRS to ZRS" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/ssl-certificate-management", - "name": "Application Gateway SSL Cert Management" + "url": "https://learn.microsoft.com/azure/storage/container-storage/enable-multi-zone-redundancy", + "name": "Enable multi-zone storage redundancy in Azure Container Storage" } ], - "recommendationControl": "Security", - "longDescription": "Secure all incoming connections using HTTPS for production services with end-to-end SSL/TLS or SSL/TLS termination at the Application Gateway to protect against attacks and ensure data remains private and encrypted between the web server and browsers.\n", + "recommendationControl": "High Availability", + "longDescription": "ZRS ensures data replication across three zones, protecting against zonal outages. It's available for Azure Disks, Container Storage, Files, and Blob by setting the SKU to ZRS in storage classes, enhancing multi-zone AKS clusters from v1.29.\n", "pgVerified": true, - "description": "Secure all incoming connections with SSL", - "potentialBenefits": "Enhanced security and privacy", - "publishedToLearn": false, + "description": "Use zone-redundant storage for persistent volumes when running multi-zone AKS", + "potentialBenefits": "Increases data durability and availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// You can use the following Azure Resource Graph query to check if an HTTP rule is using an SSL certificate or is using Azure Key Vault to store the certificates\r\nresources\r\n| where type =~ \"microsoft.network/applicationGateways\"\r\n| mv-expand frontendPorts = properties.frontendPorts\r\n| mv-expand httpListeners = properties.httpListeners\r\n| where isnull(parse_json(httpListeners.properties.sslCertificate))\r\n| project recommendationId=\"233a7008-71e9-e745-923e-1a1c7a0b92f3\", name, id, tags, param1=strcat(\"frontendPort: \", frontendPorts.properties.port), param2=\"tls: false\"\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8d9223c4-730d-ca47-af88-a9a024c37270", - "recommendationTypeId": "efe75f01-6fff-5d9d-08e6-092b98d3fb3f", + "aprlGuid": "b002c030-72e6-4a37-8217-1cb276c43169", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway", - "name": "Well-Architected Framework Application Gateway Overview" + "url": "https://learn.microsoft.com/azure/aks/csi-storage-drivers", + "name": "CSI Storage Drivers" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/features#web-application-firewall", - "name": "Application Gateway - Web Application Firewall" + "url": "https://learn.microsoft.com/azure/aks/csi-migrate-in-tree-volumes", + "name": "CSI Migrate in Tree Volumes" } ], - "recommendationControl": "Security", - "longDescription": "Use Application Gateway with Web Application Firewall (WAF) in an application virtual network to safeguard inbound HTTP/S internet traffic. WAF offers centralized defense against potential exploits through OWASP core rule sets-based rules.\n", + "recommendationControl": "Governance", + "longDescription": "From Kubernetes 1.26, Azure Disk and Azure File in-tree drivers are deprecated in favor of CSI drivers. Existing deployments remain operational but untested; users should switch to CSI drivers for new features and SKUs.\n", "pgVerified": true, - "description": "Enable Web Application Firewall policies", - "potentialBenefits": "Enhanced security for HTTP/S traffic", - "publishedToLearn": false, + "description": "Upgrade Persistent Volumes using in-tree drivers to Azure CSI drivers", + "potentialBenefits": "Ensures future compatibility", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all Application Gateways that do not have WAF enabled\r\nResources\r\n| where type =~ \"microsoft.network/applicationGateways\"\r\n| where properties.firewallpolicy != \"\"\r\n| project recommendationId = \"8d9223c4-730d-ca47-af88-a9a024c37270\", name, id, tags, param1 = \"webApplicationFirewallConfiguration: isNull\"\r\n| order by id asc\r\n\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7893f0b3-8622-1d47-beed-4b50a19f7895", - "recommendationTypeId": "0e19257e-dcef-4d00-8de1-5fe1ae0fd948", + "aprlGuid": "9a1c17e5-c9a0-43db-b920-adaf54d1bcb7", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/application-gateway/overview-v2", - "name": "Application Gateway Overview V2" - }, - { - "url": "https://learn.microsoft.com/azure/application-gateway/overview-v2#feature-comparison-between-v1-sku-and-v2-sku", - "name": "Application Gateway Feature Comparison Between V1 and V2" - }, - { - "url": "https://azure.microsoft.com/updates/application-gateway-v1-will-be-retired-on-28-april-2026-transition-to-application-gateway-v2/", - "name": "Application Gateway V1 Retirement" + "url": "https://kubernetes.io/docs/concepts/policy/resource-quotas/", + "name": "Resource Quotas" } ], "recommendationControl": "Scalability", - "longDescription": "Use Application Gateway v2 for built-in features like autoscaling, static VIPs, Azure KeyVault integration for better traffic management and performance, unless v1 is necessary.\n", - "pgVerified": true, - "description": "Migrate to Application Gateway v2", - "potentialBenefits": "Better performance, autoscaling, more features", - "publishedToLearn": false, + "longDescription": "A ResourceQuota object sets limits on resource use per namespace, controlling the number and type of objects created, and the total compute resources available.\n", + "pgVerified": false, + "description": "Implement Resource Quota to ensure that Kubernetes resources do not exceed hard resource limits", + "potentialBenefits": "Limits AKS resource usage per namespace", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Get all Application Gateways, which are using the deprecated V1 SKU\r\nresources\r\n| where type =~ 'microsoft.network/applicationgateways'\r\n| extend tier = properties.sku.tier\r\n| where tier == 'Standard' or tier == 'WAF'\r\n| project recommendationId = \"7893f0b3-8622-1d47-beed-4b50a19f7895\", name, id, tags\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5d035919-898d-a047-8d5d-454e199692e5", + "aprlGuid": "b4639ca7-6308-429a-8b98-92f0bf9bf813", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-metrics", - "name": "Application Gateway Metrics" + "url": "https://learn.microsoft.com/azure/aks/virtual-nodes", + "name": "Virtual Nodes" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-diagnostics", - "name": "Application Gateway Diagnostics" + "url": "https://learn.microsoft.com/azure/container-instances/container-instances-overview", + "name": "Azure Container Instances" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Enable logging in storage accounts, Log Analytics, and monitoring services for auditing and insights. If using NSGs, enable NSG flow logs to be stored, providing in-depth traffic analysis into Azure Cloud.\n", - "pgVerified": true, - "description": "Monitor and Log the configurations and traffic", - "potentialBenefits": "Enhanced traffic insight and audit", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "To rapidly scale AKS workloads, utilize virtual nodes for quick pod provisioning, unlike Kubernetes auto-scaler. For clusters with availability zones, ensure one nodepool per AZ due to persistent volumes not working across AZs, preventing auto-scaler pod creation failures if lacking access.\n", + "pgVerified": false, + "description": "Attach Virtual Nodes (ACI) to the AKS cluster", + "potentialBenefits": "Faster scaling with virtual nodes", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "847a8d88-21c4-bc48-a94e-562206edd767", + "aprlGuid": "0611251f-e70f-4243-8ddd-cfe894bec2e7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/application-gateway/application-gateway-probe-overview", - "name": "Application Gateway Probe Overview" + "url": "https://learn.microsoft.com/en-us/azure/aks/free-standard-pricing-tiers", + "name": "Pricing Tiers" }, { - "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway", - "name": "Well-Architected Framework Application Gateway Overview" + "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Faks%2Ftoc.json&bc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Fbread%2Ftoc.json#kubernetes-api-server-sla", + "name": "AKS Baseline Architecture" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Using custom health probes enhances understanding of backend availability and facilitates monitoring of backend services for any impact.\n", + "recommendationControl": "High Availability", + "longDescription": "Production AKS clusters require the Standard tier for a financially backed SLA and enhanced node scalability, as the free service lacks these features.\n", "pgVerified": true, - "description": "Use Health Probes to detect backend availability", - "potentialBenefits": "Ensures backend uptime monitoring.", - "publishedToLearn": false, + "description": "Update AKS tier to Standard", + "potentialBenefits": "SLA guarantee and better scalability", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Application Gateways are not using health probes to monitor the availability of the backend systems\r\nresources\r\n| where type =~ \"microsoft.network/applicationGateways\"\r\n| where array_length(properties.probes) == 0\r\n| project recommendationId=\"847a8d88-21c4-bc48-a94e-562206edd767\", name, id, tags, param1=\"customHealthProbeUsed: false\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Returns all AKS clusters not running on the Standard tier\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| where sku.tier != \"Standard\"\n| project recommendationId=\"0611251f-e70f-4243-8ddd-cfe894bec2e7\", id, name, tags, param1=strcat(\"skuName: \", sku.name), param2=strcat(\"skuTier: \", sku.tier)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c9c00f2a-3888-714b-a72b-b4c9e8fcffb2", - "recommendationTypeId": "5c488377-be3e-4365-92e8-09d1e8d9038c", + "aprlGuid": "dcaf8128-94bd-4d53-9235-3a0371df6b74", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-application-gateway#reliability", - "name": "Well-Architected Framework Application Gateway Reliability" - }, - { - "url": "https://learn.microsoft.com/azure/application-gateway/overview-v2", - "name": "Application Gateway V2 Overview" + "url": "https://learn.microsoft.com/azure/aks/monitor-aks", + "name": "Monitor AKS" } ], - "recommendationControl": "High Availability", - "longDescription": "Deploying Application Gateway in a zone-aware configuration ensures continued customer access to services even if a specific zone goes down, as services in other zones remain available.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Azure Monitor enables real-time health and performance insights for AKS by collecting events, capturing container logs, and gathering CPU/Memory data from the Metrics API. It allows data visualization using Azure Monitor Container Insights, Prometheus, Grafana, or others.\n", "pgVerified": true, - "description": "Deploy Application Gateway in a zone-redundant configuration", - "potentialBenefits": "Enhanced uptime and customer access", - "publishedToLearn": false, + "description": "Enable AKS Monitoring", + "potentialBenefits": "Real-time AKS health/performance insights", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// list Application Gateways that are not configured to use at least 2 Availability Zones\r\nresources\r\n| where type =~ \"microsoft.network/applicationGateways\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where isnull(zones) or array_length(zones) < 2\r\n| extend zoneValue = iff((isnull(zones)), \"null\", zones)\r\n| project recommendationId = \"c9c00f2a-3888-714b-a72b-b4c9e8fcffb2\", name, id, tags, param1=\"Zones: No Zone or Zonal\", param2=strcat(\"Zones value: \", zoneValue )\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Returns AKS clusters where either Azure Monitor is not enabled and/or Container Insights is not enabled\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| extend azureMonitor = tostring(parse_json(properties.azureMonitorProfile.metrics.enabled))\n| extend insights = tostring(parse_json(properties.addonProfiles.omsagent.enabled))\n| where isempty(azureMonitor) or isempty(insights)\n| project recommendationId=\"dcaf8128-94bd-4d53-9235-3a0371df6b74\",id, name, tags, param1=strcat(\"azureMonitorProfileEnabled: \", iff(isempty(azureMonitor), \"false\", azureMonitor)), param2=strcat(\"containerInsightsEnabled: \", iff(isempty(insights), \"false\", insights))\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "10f02bc6-e2e7-004d-a2c2-f9bf9f16b915", + "aprlGuid": "a7bfcc18-b0d8-4d37-81f3-8131ed8bead5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/application-gateway/features#connection-draining", - "name": "Application Gateway Connection Draining" + "url": "https://learn.microsoft.com/azure/aks/concepts-storage#ephemeral-os-disk", + "name": "Ephemeral OS disk" + }, + { + "url": "https://learn.microsoft.com/azure/aks/cluster-configuration", + "name": "Configure an AKS cluster" }, { - "url": "https://learn.microsoft.com/azure/application-gateway/configuration-http-settings#connection-draining", - "name": "Application Gateway Connection Draining HTTP Settings" + "url": "https://learn.microsoft.com/samples/azure-samples/aks-ephemeral-os-disk/aks-ephemeral-os-disk/", + "name": "Everything you want to know about ephemeral OS disks and AKS" } ], - "recommendationControl": "High Availability", - "longDescription": "Using connection draining for backend maintenance ensures graceful removal of backend pool members during updates or health issues. It's enabled via Backend Setting and applies to all members during rule creation.\n", + "recommendationControl": "Scalability", + "longDescription": "Ephemeral OS disks on AKS offer lower read/write latency due to local attachment, eliminating the need for replication seen with managed disks. This enhances performance and speeds up cluster operations such as scaling or upgrading due to quicker re-imaging and boot times.\n", "pgVerified": true, - "description": "Plan for backend maintenance by using connection draining", - "potentialBenefits": "Smooth updates, no dropped users", - "publishedToLearn": false, + "description": "Use Ephemeral OS disks on AKS clusters", + "potentialBenefits": "Lower latency, faster re-imaging and booting", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will check if connection draining is enabled\r\nresources\r\n| where type =~ \"microsoft.network/applicationGateways\"\r\n| mv-expand backendHttpSettings = properties.backendHttpSettingsCollection\r\n| extend connectionDrainingEnabled = backendHttpSettings.properties.connectionDraining.enabled\r\n| where connectionDrainingEnabled != true\r\n| extend backendPoolName = backendHttpSettings.name\r\n| project recommendationId = \"10f02bc6-e2e7-004d-a2c2-f9bf9f16b915\", name, id, tags, param1 = \"connectionDraining: Disabled\", param2 = strcat(\"backendSettingsName: \", backendPoolName)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Returns any AKS cluster nodepools that do not have Ephemeral Disks\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\n| extend type = tostring(agentPoolProfile.osDiskType)\n| where type != 'Ephemeral'\n| project recommendationId=\"a7bfcc18-b0d8-4d37-81f3-8131ed8bead5\", name, id, param1=strcat(\"osDiskType: \", type)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8364fd0a-7c0e-e240-9d95-4bf965aec243", - "recommendationTypeId": "ef4da732-f541-4109-bc0e-465c68b6c7eb", + "aprlGuid": "26ebaf1f-c70d-4ebd-8641-4b60a0ce0094", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/application-gateway/configuration-infrastructure#size-of-the-subnet", - "name": "Azure Application Gateway infrastructure configuration | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/containers/aks/baseline-aks?toc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Faks%2Ftoc.json&bc=https%3A%2F%2Flearn.microsoft.com%2Fen-us%2Fazure%2Fbread%2Ftoc.json#policy-management", + "name": "AKS Baseline - Policy Management" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/aks/policy-reference", + "name": "Built-in Policy Definitions for AKS" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Application Gateway v2 (Standard_v2 or WAF_v2 SKU) can support up to 125 instances. A /24 subnet isn't mandatory for deployment but is advised to provide enough space for autoscaling and maintenance upgrades.\n", - "pgVerified": true, - "description": "Ensure Application Gateway Subnet is using a /24 subnet mask", - "potentialBenefits": "Allows autoscaling and maintenance", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "Azure Policies in AKS clusters help enforce governance best practices concerning security, authentication, provisioning, networking, and more, ensuring a robust and secure environment for operations.\n", + "pgVerified": false, + "description": "Enable and remediate Azure Policies configured for AKS", + "potentialBenefits": "Enhanced AKS governance and security", "tags": null, - "recommendationResourceType": "Microsoft.Network/applicationGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will validate the subnet id for an appGW ends with a /24\r\n\r\nresources\r\n| where type =~ 'Microsoft.Network/applicationGateways'\r\n| extend subnetid = tostring(properties.gatewayIPConfigurations[0].properties.subnet.id)\r\n| join kind=leftouter(resources\r\n | where type == \"microsoft.network/virtualnetworks\"\r\n | mv-expand properties.subnets\r\n | extend subnetid = tostring(properties_subnets.id)\r\n | extend addressprefix = tostring(properties_subnets.properties.addressPrefix)\r\n | project subnetid, addressprefix) on subnetid\r\n| where addressprefix !endswith '/24'\r\n| project recommendationId = \"8364fd0a-7c0e-e240-9d95-4bf965aec243\", name, id, tags, param1 = strcat('AppGW subnet prefix: ', addressprefix)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Returns a count of non-compliant policy items per AKS cluster\nPolicyResources\n| where type =~ 'Microsoft.PolicyInsights/PolicyStates'\n| extend complianceState = tostring(properties.complianceState)\n| where complianceState == 'NonCompliant'\n| where properties.resourceType =~ 'Microsoft.ContainerService/managedClusters'\n| extend\n id = tostring(properties.resourceId)\n| summarize count() by id\n| join kind=inner (\n resources\n | where type =~ 'Microsoft.ContainerService/managedClusters'\n | project id, name\n) on id\n| project recommendationId=\"26ebaf1f-c70d-4ebd-8641-4b60a0ce0094\", id, name, param1=strcat(\"numNonCompliantAlerts: \", count_)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c72b7fee-1fa0-5b4b-98e5-54bcae95bb74", + "aprlGuid": "5f3cbd68-692a-4121-988c-9770914859a9", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/azure-firewall", - "name": "Azure Well Architected Framework - Azure Firewall" + "url": "https://learn.microsoft.com/en-us/azure/architecture/guide/aks/aks-cicd-github-actions-and-gitops", + "name": "GitOps with AKS" }, { - "url": "https://learn.microsoft.com/azure/firewall/deploy-availability-zone-powershell", - "name": "Deploy Azure Firewall across multiple availability zones" + "url": "https://learn.microsoft.com/en-us/azure/architecture/example-scenario/gitops-aks/gitops-blueprint-aks", + "name": "GitOps for AKS - Reference Architecture" } ], - "recommendationControl": "High Availability", - "longDescription": "Azure Firewall offers different SLAs depending on its deployment; in a single availability zone or across multiple, potentially improving reliability and performance.\n", - "pgVerified": true, - "description": "Deploy Azure Firewall across multiple availability zones", - "potentialBenefits": "Enhanced SLA and reliability", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "GitOps, an operating model for cloud-native apps, uses Git for storing application and infrastructure code as a source of truth for continuous delivery.\n", + "pgVerified": false, + "description": "Enable GitOps when using DevOps frameworks", + "potentialBenefits": "Ensures AKS config consistency", "tags": null, - "recommendationResourceType": "Microsoft.Network/azureFirewalls", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// List all Azure Firewalls that are not configured with multiple availability zones or deployed without a zone\r\nresources\r\n| where type == 'microsoft.network/azurefirewalls'\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where array_length(zones) <= 1 or isnull(zones)\r\n| where isempty(properties.virtualHub.id) or isnull(properties.virtualHub.id)\r\n| project recommendationId = \"c72b7fee-1fa0-5b4b-98e5-54bcae95bb74\", name, id, tags, param1=\"multipleZones:false\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Returns AKS clusters where GitOps is not enabled\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| extend gitops = tostring (parse_json(properties.addOnProfiles.gitops.enabled))\n| where isempty(gitops)\n| project recommendationId=\"5f3cbd68-692a-4121-988c-9770914859a9\", id, name, tags, param1=strcat(\"gitopsEnabled: \", \"false\")\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3c8fa7c6-6b78-a24a-a63f-348a7c71acb9", + "aprlGuid": "928fcc6f-5e9a-42d9-9bd4-260af42de2e5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/metrics-supported#microsoftnetworkazurefirewalls", - "name": "Azure Firewall metrics supported in Azure Monitor" + "url": "https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/", + "name": "Topology Spread Constraints" }, { - "url": "https://learn.microsoft.com/azure/firewall/firewall-performance", - "name": "Azure Firewall performance" + "url": "https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/", + "name": "Assign Pod Node" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitor Azure Firewall for overall health, processed throughput, and outbound SNAT port usage. Get alerted before limits impact services. Consider NAT gateway integration with zonal deployments; note limitations with zone redundant firewalls and secure virtual hub networks.\n", + "recommendationControl": "High Availability", + "longDescription": "Enhance availability and reliability by using pod topology spread constraints to control pod distribution based on node or zone topology, ensuring pods are spread across your cluster.\n", "pgVerified": true, - "description": "Monitor Azure Firewall metrics", - "potentialBenefits": "Improve health and performance monitoring", - "publishedToLearn": false, + "description": "Use pod topology spread constraints to ensure that pods are spread across different nodes or zones", + "potentialBenefits": "Ensures high availability and efficient use", "tags": null, - "recommendationResourceType": "Microsoft.Network/azureFirewalls", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// List all Azure Firewalls resources in-scope, along with any metrics associated to Azure Monitor alert rules, that are not fully configured.\r\nresources\r\n| where type == \"microsoft.network/azurefirewalls\"\r\n| project firewallId = tolower(id), name, tags\r\n| join kind = leftouter (\r\n resources\r\n | where type == \"microsoft.insights/metricalerts\"\r\n | mv-expand properties.scopes\r\n | mv-expand properties.criteria.allOf\r\n | where properties_scopes contains \"azureFirewalls\"\r\n | project metricId = tolower(properties_scopes), monitoredMetric = properties_criteria_allOf.metricName, tags\r\n | summarize monitoredMetrics = make_list(monitoredMetric) by tostring(metricId)\r\n | project\r\n metricId,\r\n monitoredMetrics,\r\n allAlertsConfigured = monitoredMetrics contains(\"FirewallHealth\") and monitoredMetrics contains (\"Throughput\") and monitoredMetrics contains (\"SNATPortUtilization\")\r\n) on $left.firewallId == $right.metricId\r\n| extend alertsNotFullyConfigured = isnull(allAlertsConfigured) or not(allAlertsConfigured)\r\n| where alertsNotFullyConfigured\r\n| project recommendationId = \"c8fa7c6-6b78-a24a-a63f-348a7c71acb9\", name, id = firewallId, tags, param1 = strcat(\"MetricsAlerts:\", monitoredMetrics)\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1b2dbf4a-8a0b-5e4b-8f4e-3f758188910d", + "aprlGuid": "cd6791b1-c60e-4b37-ac98-9897b1e6f4b8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/ddos-protection/ddos-protection-overview", - "name": "Azure DDoS Protection overview" + "url": "https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/", + "name": "Configure probes" + }, + { + "url": "https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/", + "name": "Assign Pod Node" } ], - "recommendationControl": "Security", - "longDescription": "Associate a DDoS protection plan with the virtual network hosting Azure Firewall to provide enhanced mitigation against DDoS attacks. Azure Firewall Manager integrates the creation of firewall infrastructure and DDoS protection plans.\n", + "recommendationControl": "High Availability", + "longDescription": "AKS kubelet controller uses liveness probes to validate containers and applications health, ensuring the system knows when to restart a container based on its health status.\n", "pgVerified": true, - "description": "Configure DDoS Protection on the Azure Firewall VNet", - "potentialBenefits": "Enhanced DDoS attack defense", - "publishedToLearn": false, + "description": "Configures Pods Liveness, Readiness, and Startup Probes", + "potentialBenefits": "Enhances container health monitoring", "tags": null, - "recommendationResourceType": "Microsoft.Network/azureFirewalls", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// List all in-scope Azure Firewall resources, where the VNet is not associated to a DDoS Protection Plan\r\nresources\r\n| where type =~ \"Microsoft.Network/azureFirewalls\"\r\n| where isempty(properties.virtualHub.id) or isnull(properties.virtualHub.id)\r\n| mv-expand ipConfig = properties.ipConfigurations\r\n| project\r\n name,\r\n firewallId = id,\r\n tags,\r\n vNetName = split(ipConfig.properties.subnet.id, \"/\", 8)[0],\r\n vNetId = tolower(substring(ipConfig.properties.subnet.id, 0, indexof(ipConfig.properties.subnet.id, \"/subnet\")))\r\n| join kind=fullouter (\r\n resources\r\n | where type =~ \"Microsoft.Network/ddosProtectionPlans\"\r\n | mv-expand vNet = properties.virtualNetworks\r\n | project ddosProtectionPlanId = id, vNetId = tolower(vNet.id)\r\n )\r\n on vNetId\r\n| where isempty(ddosProtectionPlanId)\r\n| project recommendationId = \"1b2dbf4a-8a0b-5e4b-8f4e-3f758188910d\", name, id = firewallId, tags, param1 = strcat(\"vNet: \", vNetName), param2 = \"ddosProtection: Disabled\"\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3a63560a-1ed3-6140-acd1-d1d23f9a2e12", + "aprlGuid": "bcfe71f1-ebed-49e5-a84a-193b81ad5d27", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/firewall-manager/rule-hierarchy", - "name": "Azure Firewall Policy hierarchy" + "url": "https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/", + "name": "Replica Sets" } ], - "recommendationControl": "Governance", - "longDescription": "Azure Firewall policy supports rule hierarchies for compliance enforcement, using a central base policy with higher priority over child policies, and employs Azure custom roles to safeguard base policy and manage access within subscriptions or groups.\n", + "recommendationControl": "High Availability", + "longDescription": "Configuring multiple replicas in Pod or Deployment manifests stabilizes the number of replica Pods, ensuring that a specified number of identical Pods are always available, thereby guaranteeing their availability.\n", "pgVerified": true, - "description": "Leverage Azure Firewall policy inheritance model", - "potentialBenefits": "Enhanced compliance and rule hierarchy", - "publishedToLearn": false, + "description": "Use deployments with multiple replicas in production applications to guarantee availability", + "potentialBenefits": "Ensures stable pod availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/azureFirewalls", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d2e4a38e-2307-4299-a217-4c0cebc9a7f6", + "aprlGuid": "7f7ae535-a5ba-4665-b7e0-c451dbdda01f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/service-guides/azure-firewall#recommendations", - "name": "Azure Well-Architected Framework review - Azure Firewall" + "url": "https://learn.microsoft.com/azure/aks/use-system-pools?tabs=azure-cli", + "name": "System nodepools" } ], "recommendationControl": "High Availability", - "longDescription": "Configure a minimum of two to four public IP addresses per Azure Firewall to avoid SNAT exhaustion. Azure Firewall offers SNAT for all outbound traffic to public IPs, providing 2,496 SNAT ports for each additional PIP.\n", - "pgVerified": false, - "description": "Configure 2-4 PIPs for SNAT Port utilization", - "potentialBenefits": "Avoids SNAT exhaustion.", - "publishedToLearn": false, + "longDescription": "The system node pool should be configured with a minimum node count of two to ensure critical system pods are resilient to node outages.\n", + "pgVerified": true, + "description": "Configure system nodepool count", + "potentialBenefits": "Ensures pod resilience", "tags": null, - "recommendationResourceType": "Microsoft.Network/azureFirewalls", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under development\r\n\r\n" + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns each AKS cluster with nodepools that have system nodepools with less than 2 nodes\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\n| extend taints = tostring(parse_json(agentPoolProfile.nodeTaints))\n| extend nodePool = tostring(parse_json(agentPoolProfile.name))\n| where taints has \"CriticalAddonsOnly=true:NoSchedule\" and agentPoolProfile.minCount < 2\n| project recommendationId=\"7f7ae535-a5ba-4665-b7e0-c451dbdda01f\", id, name, param1=strcat(\"nodePoolName: \", nodePool), param2=strcat(\"nodePoolMinNodeCount: \", agentPoolProfile.minCount)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8faace2d-a36e-425c-aa58-2ad99e3e0b7a", + "aprlGuid": "005ccbbd-aeab-46ef-80bd-9bd4479412ec", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/service-guides/azure-firewall#recommendations", - "name": "Azure Well-Architected Framework review - Azure Firewall" - }, - { - "url": "https://learn.microsoft.com/azure/firewall/metrics", - "name": "Azure Firewall metrics overview" + "url": "https://learn.microsoft.com/azure/well-architected/service-guides/azure-kubernetes-service#design-checklist", + "name": "Azure Well-Architected Framework review for Azure Kubernetes Service (AKS)" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Creating a metric to monitor latency probes over 20ms for periods longer than 30ms helps identify when firewall instance CPUs are stressed, potentially indicating issues.\n", - "pgVerified": false, - "description": "Monitor \"AZFW Latency Probe\" metric", - "potentialBenefits": "Improved CPU stress detection", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Configuring the user node pool with at least two nodes is essential for applications needing high availability, ensuring they remain operational and accessible without interruption.\n", + "pgVerified": true, + "description": "Configure user nodepool count", + "potentialBenefits": "Ensures high app availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/azureFirewalls", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns each AKS cluster with nodepools that have user nodepools with less than 2 nodes\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\n| extend taints = tostring(parse_json(agentPoolProfile.nodeTaints))\n| extend nodePool = tostring(parse_json(agentPoolProfile.name))\n| where taints !has \"CriticalAddonsOnly=true:NoSchedule\" and agentPoolProfile.minCount < 2\n| project recommendationId=\"005ccbbd-aeab-46ef-80bd-9bd4479412ec\", id, name, param1=strcat(\"nodePoolName: \", nodePool), param2=strcat(\"nodePoolMinNodeCount: \", agentPoolProfile.minCount)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f6a14b32-a727-4ace-b5fa-7b1c6bdff402", + "aprlGuid": "a08a06a0-e41a-4b99-83bb-69ce8bca54cb", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/about-fastpath", - "name": "About ExpressRoute FastPath" + "url": "https://kubernetes.io/docs/tasks/run-application/configure-pdb/", + "name": "Configure PDBs" + }, + { + "url": "https://learn.microsoft.com/azure/aks/operator-best-practices-scheduler#plan-for-availability-using-pod-disruption-budgets", + "name": "Plan availability using PDBs" } ], - "recommendationControl": "Scalability", - "longDescription": "ExpressRoute gateways facilitate network traffic and route exchanges. FastPath enhances on-premises to virtual network data path performance by directing traffic straight to virtual machines, bypassing the gateway for improved resiliency through reduced gateway utilization.\n", + "recommendationControl": "High Availability", + "longDescription": "A Pod Disruption Budget is a Kubernetes resource configuring the minimum number or percentage of pods that should remain available during disruptions like maintenance or scaling, ensuring a minimum number of pods are always available in the cluster.\n", "pgVerified": true, - "description": "For better data path performance enable FastPath on ExpressRoute Connections", - "potentialBenefits": "Enhances speed and resiliency", - "publishedToLearn": false, + "description": "Configure pod disruption budgets (PDBs)", + "potentialBenefits": "Ensures cluster resiliency during disruptions", "tags": null, - "recommendationResourceType": "Microsoft.Network/connections", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// Azure Resource Graph Query\r\n// Find all ExpressRoute Connections that are connected to ErGw3AZ or UltraPerformance gateway sku that don't have\r\n// FastPath enabled for both the Gateway Bypass or Private Endpoint/Link service.\r\nresources\r\n| where type == \"microsoft.network/connections\"\r\n| where properties.connectionType =~ 'expressroute'\r\n| extend gatewayId = tostring(properties.virtualNetworkGateway1.id)\r\n| join kind=inner (\r\n resources\r\n | where type =~ \"Microsoft.Network/virtualNetworkGateways\"\r\n | where properties.sku.name in~ (\"ErGw3AZ\", \"UltraPerformance\")\r\n | extend gatewayId = tostring(id)\r\n) on gatewayId\r\n| extend erGatewayBypass = tobool(properties.expressRouteGatewayBypass)\r\n| extend privateLinkFastPath = tobool(properties.enablePrivateLinkFastPath)\r\n| where not(erGatewayBypass) or not(privateLinkFastPath)\r\n| project recommendationId = \"f6a14b32-a727-4ace-b5fa-7b1c6bdff402\", id, name, tags,\r\n param1 = iff(erGatewayBypass, \"Enabled: Gateway Bypass\", \"Disabled: Gateway Bypass\"),\r\n param2 = iff(privateLinkFastPath, \"Enabled: PE FastPath\", \"Disabled: PE FastPath\"),\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a5f3a4bd-4cf1-4196-a3cb-f5a0876198b2", + "aprlGuid": "e620fa98-7a40-41a0-bfc9-b4407297fb58", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources?tabs=json", - "name": "Protect your Azure resources with a lock - Azure Resource Manager | Microsoft Learn" + "url": "https://learn.microsoft.com/azure/aks/configure-azure-cni-dynamic-ip-allocation", + "name": "Azure CNI Dynamic IP Allocation" } ], "recommendationControl": "High Availability", - "longDescription": "Configure an Azure Resource lock for Gateway Connection resources to prevent accidental deletion and maintain connectivity between on-premises networks and Azure workloads.\n", - "pgVerified": true, - "description": "Configure an Azure Resource Lock on connections to prevent accidental deletion", - "potentialBenefits": "Prevents accidental deletion of connections", - "publishedToLearn": false, + "longDescription": "Nodepool subnets sized for max auto-scale settings enable AKS to efficiently scale out nodes, meeting increased demand while reducing resource constraints and potential service disruptions.\n", + "pgVerified": false, + "description": "Nodepool subnet size needs to accommodate maximum auto-scale settings", + "potentialBenefits": "Efficient scaling, reduced disruptions", "tags": null, - "recommendationResourceType": "Microsoft.Network/connections", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns each AKS cluster with nodepools that have user nodepools with a subnetmask that does not match autoscale configured max-nodes\n// Subtracting the network address, broadcast address, and default 3 addresses Azure reserves within each subnet\n\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| extend nodePools = properties['agentPoolProfiles']\n| mv-expand nodePools = properties.agentPoolProfiles\n| where nodePools.enableAutoScaling == true\n| extend nodePoolName=nodePools.name, maxNodes = nodePools.maxCount, subnetId = tostring(nodePools.vnetSubnetID)\n| project clusterId = id, clusterName=name, nodePoolName=nodePools.name, toint(maxNodes), subnetId\n| join kind = leftouter (\n resources\n | where type == 'microsoft.network/virtualnetworks'\n | extend subnets = properties.subnets\n | mv-expand subnets\n | project id = tostring(subnets.id), addressPrefix = tostring(subnets.properties['addressPrefix'])\n | extend subnetmask = toint(substring(addressPrefix, indexof(addressPrefix, '/')+1, string_size(addressPrefix)))\n | extend possibleMaxNodeCount = toint(exp2(32-subnetmask) - 5)\n) on $left.subnetId == $right.id\n| project-away id, subnetmask\n| where possibleMaxNodeCount <= maxNodes\n| extend param1 = strcat(nodePoolName, \" autoscaler upper limit: \", maxNodes)\n| extend param2 = strcat(\"ip addresses on subnet: \", possibleMaxNodeCount)\n| project recommendationId=\"e620fa98-7a40-41a0-bfc9-b4407297fb58\", name=clusterName, id=clusterId, param1, param2\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ae054bf2-aefa-cf4a-8282-741194cef8da", + "aprlGuid": "a01afc4c-7439-4919-b2da-3565992ea2a7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/ddos-protection/monitor-ddos-protection-reference", - "name": "Monitoring Azure DDoS Protection" + "url": "https://learn.microsoft.com/azure/quotas/quotas-overview", + "name": "Azure Quotas" } ], - "recommendationControl": "Security", - "longDescription": "Azure DDoS Plan metrics differentiate packets and bytes by tags: Dropped (packets scrubbed by DDoS), Forwarded (packets to VIP not filtered), and No tag (total packets, sum of dropped and forwarded).\n", - "pgVerified": true, - "description": "Monitor Azure DDoS Protection Plan metrics", - "potentialBenefits": "Enhanced security and traffic insight", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Node pool settings should not exceed the subscription core quota to ensure AKS can scale out nodes efficiently, meeting increased demand while reducing resource constraints and potential service disruptions.\n", + "pgVerified": false, + "description": "Node pool auto-scale settings should not exceed subscription core quota", + "potentialBenefits": "Reduced disruptions", "tags": null, - "recommendationResourceType": "Microsoft.Network/ddosProtectionPlans", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7d09523b-b3c0-403e-b104-d5d46240d683", + "aprlGuid": "f46b0d1d-56ef-4795-b98a-f6ee00cb341a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/reliability/reliability-dns", - "name": "Reliability in Azure DNS" + "url": "https://learn.microsoft.com/azure/aks/use-azure-linux", + "name": "Azure Linux" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Azure DNS allows the Time-To-Live (TTL) for record sets in the zone to be set to a value between 1 and 2147483647 seconds. You should ensure that the TTL for the DNS record sets in your DNS Zones are set appropriately to meet your RPO targets.\n", + "recommendationControl": "High Availability", + "longDescription": "Azure Linux on AKS boosts resiliency with a native image using validated, source-built components. It's lightweight, reducing the attack surface and maintenance. A Microsoft-hardened kernel, optimized for Azure, enhances stability and security for container workloads.\n", "pgVerified": false, - "description": "Ensure Time-To-Live (TTL) is set appropriately to ensure RPOs can be met", - "potentialBenefits": "Ensures that no cached DNS records exist past RPO targets", - "publishedToLearn": false, + "description": "Use Azure Linux for Linux nodepools", + "potentialBenefits": "Reduced disruptions", "tags": null, - "recommendationResourceType": "Microsoft.Network/dnsZones", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Returns each AKS cluster with nodepools that have Linux nodepools not using Azure Linux\nresources\n| where type == \"microsoft.containerservice/managedclusters\"\n| mv-expand agentPoolProfile = properties.agentPoolProfiles\n| where agentPoolProfile.osType == 'Linux' and agentPoolProfile.osSKU != 'AzureLinux'\n| project recommendationid=\"f46b0d1d-56ef-4795-b98a-f6ee00cb341a\", name, id, param1=strcat(\"nodePoolName: \", agentPoolProfile.name)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4d703025-dafc-f840-a183-5dc440456134", + "aprlGuid": "9200aca6-0e83-4749-a5eb-e3939367bdc2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering", - "name": "Designing for disaster recovery with ExpressRoute private peering" + "url": "https://learn.microsoft.com/azure/aks/best-practices-app-cluster-reliability#multi-replica-applications", + "name": "Multi-replica apps" } ], - "recommendationControl": "High Availability", - "longDescription": "Connecting each ExpressRoute Gateway to a minimum of two circuits in different peering locations enhances redundancy and reliability by ensuring alternate pathways for data in case one circuit fails.\n", - "pgVerified": true, - "description": "Connect on-prem networks to Azure critical workloads via multiple ExpressRoutes", - "potentialBenefits": "Enhanced reliability and redundancy", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Deploying at least two replicas of your application ensures that your application is highly available and can tolerate node failures.\n", + "pgVerified": false, + "description": "Deploy at least two replicas of your application", + "potentialBenefits": "Ensures high app availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationResourceType": "Microsoft.ContainerService/managedClusters", "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0e19cc41-8274-1342-b0db-0e4146eacef8", + "aprlGuid": "19b6df57-f6b5-3e4f-843a-273daa087cb0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/designing-for-high-availability-with-expressroute", - "name": "Designing for high availability with ExpressRoute" - }, - { - "url": "https://learn.microsoft.com/azure/well-architected/services/networking/azure-expressroute#recommendations", - "name": "Azure Well-Architected Framework review - Azure ExpressRoute - Design Checklist" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/generation-2#features-and-capabilities", + "name": "Generation 1 vs generation 2 virtual machines" } ], "recommendationControl": "High Availability", - "longDescription": "Microsoft or the ExpressRoute provider always ensures physical redundancy in their services. It's essential to maintain this level of physical redundancy (two devices, two links) from the ExpressRoute peering location to your network for optimal performance and reliability.\n", + "longDescription": "When building Image Templates, use sources for gen 2 VMs. Gen 2 offers more memory, supports >2TB disks, uses UEFI for faster boot/installation, has Intel SGX, and virtualized persistent memory (vPMEM), unlike gen 1's BIOS-based architecture.\n", "pgVerified": true, - "description": "Ensure ExpressRoute's physical links connect to distinct network edge devices", - "potentialBenefits": "Enhanced reliability and fault tolerance", - "publishedToLearn": false, + "description": "Use Generation 2 virtual machine source image", + "potentialBenefits": "More memory, supports >2TB disks, faster boot", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.VirtualMachineImages/imageTemplates", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f06a2bbe-5839-d447-9f39-fc3d20562d88", + "aprlGuid": "21fb841b-ba70-1f4e-a460-1f72fb41aa51", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/designing-for-high-availability-with-expressroute#active-active-connections", - "name": "Designing for high availability with ExpressRoute - Active-active connections" + "url": "https://learn.microsoft.com/en-us/azure/reliability/reliability-image-builder?toc=%2Fazure%2Fvirtual-machines%2Ftoc.json&bc=%2Fazure%2Fvirtual-machines%2Fbreadcrumb%2Ftoc.json#capacity-and-proactive-disaster-recovery-resiliency", + "name": "Image Template resiliency" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/image-builder-overview?tabs=azure-powershell#regions", + "name": "Azure Image Builder Supported Regions" } ], - "recommendationControl": "High Availability", - "longDescription": "Operating both connections of an ExpressRoute circuit in active-active mode enhances high availability as the Microsoft network will load balance the traffic across the connections on a per-flow basis.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "The Azure Image Builder service, used for deploying Image Templates, lacks availability zones support. By replicating Image Templates to a secondary, preferably paired, region, quick recovery from a region failure is enabled, ensuring continuous virtual machine deployment from these templates.\n", "pgVerified": true, - "description": "Ensure both connections of an ExpressRoute circuit are configured in active-active mode", - "potentialBenefits": "Improved high availability and load balancing", - "publishedToLearn": false, + "description": "Replicate your Image Templates to a secondary region", + "potentialBenefits": "Enhances disaster recovery capability", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.VirtualMachineImages/imageTemplates", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// List all Image Templates that are not replicated to another region\nresources\n| where type =~ \"microsoft.virtualmachineimages/imagetemplates\"\n| mv-expand distribution=properties.distribute\n| where array_length(parse_json(distribution).replicationRegions) == 1\n| project recommendationId = \"21fb841b-ba70-1f4e-a460-1f72fb41aa51\", name, id, param1=strcat(\"replicationRegions:\",parse_json(distribution).replicationRegions)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2a5bf650-586d-db4c-a292-d922be7d3e0e", + "aprlGuid": "67205887-0733-466e-b50e-b1cd7316c514", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/expressroute-bfd", - "name": "Configure BFD over ExpressRoute" + "url": "https://learn.microsoft.com/en-us/azure/automation/automation-disaster-recovery?tabs=win-hrw%2Cps-script%2Coption-one", + "name": "Disaster recovery for Automation accounts" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/automation/automation-disaster-recovery?tabs=win-hrw%2Cps-script%2Coption-one#scenarios-for-cloud-and-hybrid-jobs", + "name": "Disaster recovery scenarios for cloud and hybrid jobs" } ], "recommendationControl": "High Availability", - "longDescription": "Enabling BFD over ExpressRoute speeds up link failure detection between MSEE devices and routers configured for ExpressRoute (CE/PE), applicable over both customer and Partner Edge routing devices with managed Layer 3 service.\n", - "pgVerified": true, - "description": "Activate Bidirectional Forwarding Detection on edge devices for faster failover", - "potentialBenefits": "Faster link failure detection", - "publishedToLearn": false, + "longDescription": "Set up disaster recovery for Automation accounts and resources like Modules, Connections, Credentials, Certificates, Variables, and Schedules to deal with region or zone failures. A replica Automation account should be ready in a secondary region for failover.\n", + "pgVerified": false, + "description": "Set up disaster recovery of Automation accounts and its dependent resources", + "potentialBenefits": "Ensures continuity during outages", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationResourceType": "Microsoft.Automation/automationAccounts", "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9771a435-d031-814e-9827-9b5fdafc0f87", + "aprlGuid": "e6c7e1cc-2f47-264d-aa50-1da421314472", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/expressRouteCircuits/", - "name": "Azure Monitor Baseline Alerts - expressRouteCircuits" + "url": "https://learn.microsoft.com/azure/storage/common/storage-redundancy", + "name": "Azure Storage redundancy" + }, + { + "url": "https://learn.microsoft.com/azure/storage/common/redundancy-migration", + "name": "Change the redundancy configuration for a storage account" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Use Network Insights for monitoring ExpressRoute circuit availability, QoS, and throughput. Set alerts based on Azure Monitor Baseline Alerts for availability, QoS metrics, and throughput metrics exceeding specific thresholds.\n", + "recommendationControl": "High Availability", + "longDescription": "Redundancy ensures storage accounts meet availability and durability targets amidst failures, weighing lower costs against higher availability. Locally redundant storage offers the least durability at the lowest cost.\n", "pgVerified": true, - "description": "Configure monitoring and alerting for ExpressRoute circuits", - "potentialBenefits": "Enhanced network performance and health", - "publishedToLearn": false, + "description": "Ensure that storage accounts are zone or region redundant", + "potentialBenefits": "High availability and durability for storage", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationResourceType": "Microsoft.Storage/storageAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This query will return all storage accounts that are not using Zone or Region replication\nResources\n| where type =~ \"Microsoft.Storage/storageAccounts\"\n| where sku.name in~ (\"Standard_LRS\", \"Premium_LRS\")\n| project recommendationId = \"e6c7e1cc-2f47-264d-aa50-1da421314472\", name, id, tags, param1 = strcat(\"sku: \", sku.name)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "26cb547f-aabc-dc40-be02-d0a9b6b04b1a", - "recommendationTypeId": null, + "aprlGuid": "63ad027e-611c-294b-acc5-8e3234db9a40", + "recommendationTypeId": "47bb383c-8e25-95f0-c2aa-437add1d87d3", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/maintenance-alerts", - "name": "How to view and configure alerts for Azure ExpressRoute circuit maintenance" + "url": "https://azure.microsoft.com/updates/classic-azure-storage-accounts-will-be-retired-on-31-august-2024/", + "name": "Azure classic storage accounts retirement announcement" + }, + { + "url": "https://learn.microsoft.com/azure/storage/common/classic-account-migration-overview", + "name": "Migrate your classic storage accounts to Azure Resource Manager" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "ExpressRoute leverages service health for notifications on both planned and unplanned maintenance, ensuring users are informed about any changes to their ExpressRoute circuits.\n", + "recommendationControl": "Service Upgrade and Retirement", + "longDescription": "Classic storage accounts will be fully retired on August 31, 2024. If you have classic storage accounts, start planning your migration now.\n", "pgVerified": true, - "description": "Configure service health to receive ExpressRoute circuit maintenance notification", - "potentialBenefits": "Stay informed on circuit updates", - "publishedToLearn": false, + "description": "Classic Storage Accounts must be migrated to new Azure Resource Manager resources", + "potentialBenefits": "Avoids service retirement issues", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationResourceType": "Microsoft.Storage/storageAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Azure classic Storage Account\nresources\n| where type =~ 'microsoft.classicstorage/storageaccounts'\n| project recommendationId = '63ad027e-611c-294b-acc5-8e3234db9a40', name, id, tags, param1=type\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d40c769d-2f08-4980-8d8f-a386946276e6", - "recommendationTypeId": null, + "aprlGuid": "5587ef77-7a05-a74d-9c6e-449547a12f27", + "recommendationTypeId": "c6b94711-f1f5-4e7e-9c89-c17ed4190969", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/rate-limit", - "name": "Rate limiting for ExpressRoute Direct circuits (Preview)" + "url": "https://learn.microsoft.com/azure/storage/common/storage-account-overview#types-of-storage-accounts", + "name": "Types of storage accounts" + }, + { + "url": "https://learn.microsoft.com/azure/storage/common/scalability-targets-standard-account", + "name": "Scalability and performance targets for standard storage accounts" + }, + { + "url": "https://learn.microsoft.com/azure/storage/blobs/storage-performance-checklist", + "name": "Performance and scalability checklist for Blob storage" + }, + { + "url": "https://learn.microsoft.com/azure/storage/blobs/scalability-targets", + "name": "Scalability and performance targets for Blob storage" + }, + { + "url": "https://learn.microsoft.com/azure/storage/blobs/storage-blob-block-blob-premium", + "name": "Premium block blob storage accounts" } ], "recommendationControl": "Scalability", - "longDescription": "Rate limiting controls traffic volume between on-premises networks and Azure via ExpressRoute Direct, applying to private or Microsoft peering. It distributes port bandwidth, ensures stability, and prevents congestion, with steps outlined for enabling on circuits.\n", + "longDescription": "Use premium performance block blob storage instead of standard performance storage for workloads that require fast storage response times and/or high transaction rates.\n", "pgVerified": true, - "description": "Implement rate-limiting across ExpressRoute Direct Circuits to optimize network flow", - "potentialBenefits": "Optimizes network, prevents congestion", - "publishedToLearn": false, + "description": "Use premium performance block blob storage for high performance workloads", + "potentialBenefits": "Optimized cost and performance", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteCircuits", + "recommendationResourceType": "Microsoft.Storage/storageAccounts", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all the ExpressRoute circuits (Direct Based) that have Direct Port Rate Limiting disabled\r\nresources\r\n| where type =~ \"microsoft.network/expressroutecircuits\"\r\n| where properties.expressRoutePort != \"\" or isnotnull(properties.expressRoutePort)\r\n| where properties.enableDirectPortRateLimit == false\r\n| project recommendationId = \"d40c769d-2f08-4980-8d8f-a386946276e6\", name, id, tags, param1=strcat(\"enableDirectPortRateLimit: \",properties.enableDirectPortRateLimit)\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9987c813-d687-4163-a511-95f31bc5e536", - "recommendationTypeId": null, + "aprlGuid": "03263c57-c869-3841-9e0a-3dbb9ef3e28d", + "recommendationTypeId": "42dbf883-9e4b-4f84-9da4-232b87c4b5e9", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering", - "name": "Designing for disaster recovery with ExpressRoute private peering" + "url": "https://learn.microsoft.com//azure/storage/blobs/soft-delete-blob-enable?tabs=azure-portal ", + "name": "Soft delete detail docs" } ], - "recommendationControl": "High Availability", - "longDescription": "To increase reliability, it's advised that each v-Hub's ExpressRoute gateway connects to at least two circuits, with each circuit originating from a different peering location than the other, ensuring diverse connectivity paths for enhanced resilience.", - "pgVerified": false, - "description": "Connect v-Hub's ExpressRoute gateway to circuits from diverse peering locations for resilience", - "potentialBenefits": "Enhance resiliency for Azure Service", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "The soft delete option enables data recovery if mistakenly deleted, while the Lock feature prevents the accidental deletion of the storage account itself, ensuring additional security and data integrity measures.\n", + "pgVerified": true, + "description": "Enable Soft Delete to protect your data", + "potentialBenefits": "Prevents accidental data/account loss", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "17e8d380-e4b4-41a1-9b37-2e4df9fd5125", + "aprlGuid": "8ebda7c0-e0e1-ed45-af59-2d7ea9a1c05d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#expressroute-gateway", - "name": "Virtual WAN Monitoring Best Practices" + "url": "https://learn.microsoft.com/azure/storage/blobs/versioning-overview ", + "name": "Blob versioning" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Set up monitoring and alerts for Virtual WAN ExpressRoute Gateway. Create alert rule for ensuring promptly response to critical events such as exceeding packets per second, exceeding BGP routes prefixes, Gateway overutilization and high frequency in route changes.", - "pgVerified": false, - "description": "Monitor health for v-Hub's ExpressRoute gateway", - "potentialBenefits": "Detection and mitigation to avoid disruptions.", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Consider enabling versioning for Azure Storage Accounts to recover from accidental modifications or deletions and manage blob operation latency. Microsoft advises maintaining fewer than 1000 versions per blob to optimize performance. Lifecycle management can help delete old versions automatically.\n", + "pgVerified": true, + "description": "Enable versioning for accidental modification and keep the number of versions below 1000", + "potentialBenefits": "Recover data, manage latency", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRouteGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "60077378-7cb1-4b35-89bb-393884d9921d", + "aprlGuid": "1b965cb9-7629-214e-b682-6bf6e450a100", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/expressroute-howto-erdirect#state", - "name": "How to configure ExpressRoute Direct Change Admin State of links" + "url": "https://learn.microsoft.com/azure/storage/blobs/point-in-time-restore-overview", + "name": "Point-in-time restore for block blobs" + }, + { + "url": "https://learn.microsoft.com/azure/storage/blobs/point-in-time-restore-manage?tabs=portal", + "name": "Perform a point-in-time restore on block blob data" } ], - "recommendationControl": "High Availability", - "longDescription": "In Azure ExpressRoute Direct, the \"Admin State\" indicates the administrative status of layer 1 links, showing if a link is enabled or disabled, effectively turning the physical port on or off.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Consider enabling point-in-time restore for standard general purpose v2 accounts with flat namespace to protect against accidental deletion or corruption by restoring block blob data to an earlier state.\n", "pgVerified": true, - "description": "The Admin State of both Links of an ExpressRoute Direct should be in Enabled state", - "potentialBenefits": "Ensures optimal connectivity.", - "publishedToLearn": false, + "description": "Enable point-in-time restore for GPv2 accounts to safeguard against data loss", + "potentialBenefits": "Protects data from loss/corruption", "tags": null, - "recommendationResourceType": "Microsoft.Network/ExpressRoutePorts", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Express Route Directs that do not have Admin State of both Links Enabled\r\nresources\r\n| where type == \"microsoft.network/expressrouteports\"\r\n| where properties['links'][0]['properties']['adminState'] == \"Disabled\" or properties['links'][1]['properties']['adminState'] == \"Disabled\"\r\n| project recommendationId = \"60077378-7cb1-4b35-89bb-393884d9921d\", name, id, tags, param1 = strcat(\"Link1AdminState: \", properties['links'][0]['properties']['adminState']), param2 = strcat(\"Link2AdminState: \", properties['links'][1]['properties']['adminState'])\r\n\r\n" + "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0bee356b-7348-4799-8cab-0c71ffe13018", + "aprlGuid": "96cb8331-6b06-8242-8ce8-4e2f665dc679", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/expressroute-erdirect-about?source=recommendations#circuit-sizes", - "name": "About ExpressRoute Direct Circuit Sizes" + "url": "https://learn.microsoft.com/azure/storage/blobs/monitor-blob-storage", + "name": "Monitor Azure Blob Storage" + }, + { + "url": "https://learn.microsoft.com/azure/storage/blobs/blob-storage-monitoring-scenarios", + "name": "Best practices for monitoring Azure Blob Storage" } ], - "recommendationControl": "Scalability", - "longDescription": "Provisioning ExpressRoute circuits on a 10-Gbps or 100-Gbps ExpressRoute Direct resource up to 20-Gbps or 200-Gbps is possible but not recommended for resiliency. If an ExpressRoute Direct port fails, and circuits are using full capacity, the remaining port won't handle the extra load.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "For critical applications and business processes relying on Azure, monitoring and alerts are crucial. Resource logs are only stored after creating a diagnostic setting to route logs to specified locations, requiring selection of log categories to collect.\n", "pgVerified": true, - "description": "Ensure you do not over-subscribe an ExpressRoute Direct", - "potentialBenefits": "Improves resilience during port failures", - "publishedToLearn": false, + "description": "Monitor all blob storage accounts", + "potentialBenefits": "Enhanced alerting and log analysis", "tags": null, - "recommendationResourceType": "Microsoft.Network/ExpressRoutePorts", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Express Route Directs that are over subscribed\r\nresources\r\n| where type == \"microsoft.network/expressrouteports\"\r\n| where toint(properties['provisionedBandwidthInGbps']) > toint(properties['bandwidthInGbps'])\r\n| project recommendationId = \"0bee356b-7348-4799-8cab-0c71ffe13018\", name, id, tags, param1 = strcat(\"provisionedBandwidthInGbps: \", properties['provisionedBandwidthInGbps']), param2 = strcat(\"bandwidthInGbps: \", properties['bandwidthInGbps'])\r\n\r\n" + "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "55815823-d588-4cb7-a5b8-ae581837356e", + "aprlGuid": "2ad78dec-5a4d-4a30-8fd1-8584335ad781", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/expressRoutePorts/", - "name": "Azure Monitor Baseline Alerts - expressRoutePorts" + "url": "https://learn.microsoft.com/azure/storage/common/storage-account-overview#legacy-storage-account-types", + "name": "Legacy storage account types" + }, + { + "url": "https://learn.microsoft.com/azure/storage/common/storage-account-upgrade", + "name": "Upgrade to a general-purpose v2 storage account" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Use Network Insights for monitoring ExpressRoute Port light levels, bits per second in/out, and line protocol. Set alerts based on Azure Monitor Baseline Alerts for light levels, bits per second in/out, and line protocol exceeding specific thresholds.\n", - "pgVerified": false, - "description": "Configure monitoring and alerting for ExpressRoute Ports", - "potentialBenefits": "Enhanced network performance and health", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "General-purpose v2 accounts are recommended for most storage scenarios offering the latest features or the lowest per-gigabyte pricing. Legacy accounts like Standard general-purpose v1 and Blob Storage aren't advised by Microsoft but may fit specific scenarios.\n", + "pgVerified": true, + "description": "Consider upgrading legacy storage accounts to v2 storage accounts", + "potentialBenefits": "Latest features, lowest cost", "tags": null, - "recommendationResourceType": "Microsoft.Network/expressRoutePorts", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n" + "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Azure Storage Accounts, that upgradeable to General purpose v2.\nResources\n| where type =~ \"Microsoft.Storage/storageAccounts\" and kind in~ (\"Storage\", \"BlobStorage\")\n| extend\n param1 = strcat(\"AccountKind: \", case(kind =~ \"Storage\", \"Storage (general purpose v1)\", kind =~ \"BlobStorage\", \"BlobStorage\", kind)),\n param2 = strcat(\"Performance: \", sku.tier),\n param3 = strcat(\"Replication: \", sku.name)\n| project recommendationId = \"2ad78dec-5a4d-4a30-8fd1-8584335ad781\", name, id, tags, param1, param2, param3\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d0cfe47f-686b-5043-bf83-5a3868acb80a", + "aprlGuid": "dc55be60-6f8c-461e-a9d5-a3c7686ed94e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-monitor?pivots=front-door-standard-premium#access-logs", - "name": "Azure Web Application Firewall monitoring and logging - Access Log" - }, - { - "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-tuning?pivots=front-door-standard-premium#understanding-waf-logs", - "name": "Understanding WAF logs" - }, - { - "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/application-gateway-waf-configuration?tabs=portal", - "name": "Web Application Firewall exclusion lists" + "url": "https://learn.microsoft.com/en-us/azure/architecture/example-scenario/wvd/windows-virtual-desktop#azure-virtual-desktop-limitations", + "name": "Learn More" }, { - "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/web-application-firewall-troubleshoot#fixing-false-positives", - "name": "Fixing a false positive" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-virtual-desktop/networking#private-endpoints-private-link", + "name": "Private Link" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "WAF may mistakenly block legitimate requests (false positives). These can be identified by examining the last 24 hours of blocked requests in Log Analytics.\n", + "recommendationControl": "Security", + "longDescription": "Leverage Azure Private Link Service for secure access to Azure Storage and services via Private Endpoint in your VNet. Eliminate the need for public IPs, ensuring data privacy. Enjoy granular access control for enhanced security.\n", "pgVerified": true, - "description": "Inspect Azure Front Door WAF logs for wrongfully blocked legitimate requests", - "potentialBenefits": "Reduces false positives, improves access", - "publishedToLearn": false, + "description": "Enable Azure Private Link service for storage accounts", + "potentialBenefits": "Secure, private access to storage with no public IPs", "tags": null, - "recommendationResourceType": "Microsoft.Network/frontdoorWebApplicationFirewallPolicies", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This resource graph query will return all storage accounts that does not have a Private Endpoint Connection or where a private endpoint exists but public access is enabled\nresources\n| where type =~ \"Microsoft.Storage/StorageAccounts\"\n| where isnull(properties.privateEndpointConnections) or properties.privateEndpointConnections[0].properties.provisioningState != (\"Succeeded\") or (isnull(properties.networkAcls) and properties.publicNetworkAccess == 'Enabled')\n| extend param1 = strcat('Private Endpoint: ', iif(isnotnull(properties.privateEndpointConnections),split(properties.privateEndpointConnections[0].properties.privateEndpoint.id,'/')[8],'No Private Endpoint'))\n| extend param2 = strcat('Access: ', iif(properties.publicNetworkAccess == 'Disabled', 'Public Access Disabled', iif(isnotnull(properties.networkAcls), 'NetworkACLs in place','Public Access Enabled')))\n| project recommendationId = \"dc55be60-6f8c-461e-a9d5-a3c7686ed94e\", name, id, tags, param1, param2\n" }, { "publishedToAdvisor": null, - "aprlGuid": "537b4d94-edd1-4041-b13d-8217dfa485f0", + "aprlGuid": "493f6079-3bb6-4a56-96ba-ab3248474cb1", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/application-gateway-waf-metrics#logs-and-diagnostics", - "name": "Azure Web Application Firewall Monitoring and Logging" - }, - { - "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/web-application-firewall-logs#diagnostic-logs", - "name": "Diagnostic logs" + "url": "https://learn.microsoft.com/azure/app-service/troubleshoot-diagnostic-logs", + "name": "Enable diagnostics logging for apps in Azure App Service" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "WAF may block legitimate requests as false positives. Identifying blocked requests within the last 24 hours through Log Analytics can help manage and mitigate these incorrect blockages efficiently.\n", - "pgVerified": true, - "description": "Check Azure Application Gateway WAF logs for mistakenly blocked valid requests", - "potentialBenefits": "Improve false positive identification", - "publishedToLearn": false, + "longDescription": "Enabling diagnostics logging for your Azure App Service is crucial for monitoring and diagnostics, including both application logging and web server logging.\n", + "pgVerified": false, + "description": "Enable diagnostics logging", + "potentialBenefits": "Monitoring and Alerting", "tags": null, - "recommendationResourceType": "Microsoft.Network/frontdoorWebApplicationFirewallPolicies", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5357ae22-0f52-1a49-9fd4-1f00ace6add0", + "aprlGuid": "a7e8bb3d-8ceb-442d-b26f-007cd63f9ffc", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/web-application-firewall/ag/ag-overview#waf-monitoring", - "name": "WAF monitoring" + "url": "https://learn.microsoft.com/azure/application-insights/app-insights-overview", + "name": "Application Insights" }, { - "url": "https://github.com/Azure/Azure-Network-Security/tree/master/Azure%20WAF/Workbook%20-%20WAF%20Monitor%20Workbook", - "name": "Azure Monitor Workbook for WAF" + "url": "https://learn.microsoft.com/azure/azure-monitor/app/azure-web-apps", + "name": "Application monitoring for Azure App Service" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring the health of your Web Application Firewall and the applications it protects is crucial. This can be achieved through integration with Microsoft Defender for Cloud, Azure Monitor, and Azure Monitor logs, ensuring optimal performance and security.\n", + "longDescription": "Use Application Insights to monitor app performance and load behavior, offering real-time insights, issue diagnosis, and root-cause analysis. It supports ASP.NET, ASP.NET Core, Java, and Node.js on Azure App Service, now with built-in monitoring.\n", "pgVerified": false, - "description": "Monitor Web Application Firewall", - "potentialBenefits": "Enhanced security and health insight", - "publishedToLearn": false, + "description": "Monitor Performance", + "potentialBenefits": "Real-time insights and issue diagnosis", "tags": null, - "recommendationResourceType": "Microsoft.Network/frontdoorWebApplicationFirewallPolicies", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "38c3bca1-97a1-eb42-8cd3-838b243f35ba", + "aprlGuid": "78a5c033-ff51-4332-8a71-83464c34494b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/azure-load-balancer/reliability", - "name": "Reliability and Azure Load Balancer" - }, - { - "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#azure-load-balancer", - "name": "Resiliency checklist for specific Azure services- Azure Load Balancer" + "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#app-service", + "name": "Resiliency checklist for specific Azure services" } ], - "recommendationControl": "High Availability", - "longDescription": "Selecting Standard SKU Load Balancer enhances reliability through availability zones and zone resiliency, ensuring deployments withstand zone and region failures. Unlike Basic, it supports global load balancing and offers an SLA.\n", - "pgVerified": true, - "description": "Use Standard Load Balancer SKU", - "potentialBenefits": "Enhanced reliability and SLA support", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "If your solution includes both a web front end and a web API, decomposing them into separate App Service apps facilitates solution decomposition by workload, allowing for independent scaling. Initially, you can deploy both in the same plan and separate them for independent scaling when necessary.\n", + "pgVerified": false, + "description": "Separate web apps from web APIs", + "potentialBenefits": "Independent scaling, easier management", "tags": null, - "recommendationResourceType": "Microsoft.Network/loadBalancers", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all LoadBalancers using Basic SKU\r\nresources\r\n| where type =~ 'Microsoft.Network/loadBalancers'\r\n| where sku.name == 'Basic'\r\n| project recommendationId = \"38c3bca1-97a1-eb42-8cd3-838b243f35ba\", name, id, tags, Param1=strcat(\"sku-tier: basic\")\r\n\r\n" + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6d82d042-6d61-ad49-86f0-6a5455398081", + "aprlGuid": "3f9ddb59-0bb3-4acb-9c9b-99aa1776f0ab", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#azure-load-balancer", - "name": "Resiliency checklist for specific Azure services- Azure Load Balancer" + "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#app-service", + "name": "Resiliency checklist" } ], - "recommendationControl": "High Availability", - "longDescription": "Deploying Azure Load Balancers with at least two instances in the backend prevents a single point of failure and supports scalability. Pairing with Virtual Machine Scale Sets is advised for optimal scale building.\n", - "pgVerified": true, - "description": "Ensure the Backend Pool contains at least two instances", - "potentialBenefits": "Enhances reliability and scalability", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Creating a separate storage account for logs and not using the same one for application data prevents logging activities from reducing application performance by ensuring that the resources dedicated to handling application data are not burdened by logging processes.\n", + "pgVerified": false, + "description": "Create a separate storage account for logs", + "potentialBenefits": "Improves app performance", "tags": null, - "recommendationResourceType": "Microsoft.Network/loadBalancers", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all LoadBalancers which only have 1 backend pool defined or only 1 VM in the backend pool\r\nresources\r\n| where type =~ 'Microsoft.Network/loadBalancers'\r\n| extend bep = properties.backendAddressPools\r\n| extend BackEndPools = array_length(bep)\r\n| where BackEndPools == 0\r\n| project recommendationId = \"6d82d042-6d61-ad49-86f0-6a5455398081\", name, id, Param1=\"backendPools\", Param2=toint(0), tags\r\n| union (resources\r\n | where type =~ 'Microsoft.Network/loadBalancers'\r\n | where sku.name == \"Standard\"\r\n | extend bep = properties.backendAddressPools\r\n | extend BackEndPools = toint(array_length(bep))\r\n | mv-expand bip = properties.backendAddressPools\r\n | extend BackendAddresses = array_length(bip.properties.loadBalancerBackendAddresses)\r\n | where toint(BackendAddresses) <= 1\r\n | project recommendationId = \"6d82d042-6d61-ad49-86f0-6a5455398081\", name, id, tags, Param1=\"backendAddresses\", Param2=toint(BackendAddresses))\r\n| union (\r\n resources\r\n | where type =~ 'Microsoft.Network/loadBalancers'\r\n | where sku.name == \"Basic\"\r\n | mv-expand properties.backendAddressPools\r\n | extend backendPoolId = properties_backendAddressPools.id\r\n | project id, name, tags, tostring(backendPoolId), recommendationId = \"6d82d042-6d61-ad49-86f0-6a5455398081\", Param1=\"BackEndPools\"\r\n | join kind = leftouter (\r\n resources\r\n | where type =~ \"Microsoft.Network/networkInterfaces\"\r\n | mv-expand properties.ipConfigurations\r\n | mv-expand properties_ipConfigurations.properties.loadBalancerBackendAddressPools\r\n | extend backendPoolId = tostring(properties_ipConfigurations_properties_loadBalancerBackendAddressPools.id)\r\n | summarize poolMembers = count() by backendPoolId\r\n | project tostring(backendPoolId), poolMembers ) on backendPoolId\r\n | where toint(poolMembers) <= 1\r\n | extend BackendAddresses = poolMembers\r\n | project id, name, tags, recommendationId, Param1=\"backendAddresses\", Param2=toint(BackendAddresses))\r\n" + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8d319a05-677b-944f-b9b4-ca0fb42e883c", - "recommendationTypeId": null, + "aprlGuid": "a1d91661-32d4-430b-b3b6-5adeb0975df7", + "recommendationTypeId": "1d3b5a51-62d4-4b77-96f6-40ed0a3aa21f", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#azure-load-balancer", - "name": "Resiliency checklist for specific Azure services- Azure Load Balancer" + "url": "https://learn.microsoft.com/azure/app-service-web/web-sites-staged-publishing", + "name": "Set up staging environments in Azure App Service" } ], - "recommendationControl": "High Availability", - "longDescription": "Outbound rules for Standard Public Load Balancer involve manual port allocation for backend pools, limiting scalability and risk of SNAT port exhaustion. NAT Gateway is recommended for its dynamic scaling and secure internet connectivity.\n", - "pgVerified": true, - "description": "Use NAT Gateway instead of Outbound Rules for Production Workloads", - "potentialBenefits": "Enhanced scalability and reliability", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "Create a deployment slot for staging to deploy updates, verify them, and ensure all instances are warmed up before production swap, reducing bad update chances. An LKG slot allows easy rollback to a previous good deployment if issues arise later, enhancing reliability.\n", + "pgVerified": false, + "description": "Deploy to a staging slot", + "potentialBenefits": "Safer updates and easy rollback", "tags": null, - "recommendationResourceType": "Microsoft.Network/loadBalancers", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all LoadBalancers with Outbound rules configured\r\nresources\r\n| where type =~ 'Microsoft.Network/loadBalancers'\r\n| extend outboundRules = array_length(properties.outboundRules)\r\n| where outboundRules > 0\r\n| project recommendationId = \"8d319a05-677b-944f-b9b4-ca0fb42e883c\", name, id, tags, Param1 = \"outboundRules: >=1\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Display App Service with the count of deployment slots for Apps under eligible App service plans and it shows if deployment slot is enabled or not\n\nresources\n| where type =~ 'microsoft.web/sites' or type =~ 'microsoft.web/sites/slots'\n| extend isSlot = iff(type =~ 'microsoft.web/sites/slots', 1, 0)\n| extend AspName = iff(isSlot == 1, split(name, '/')[0], name)\n| extend Sku = tostring(properties.sku)\n| where tolower(Sku) contains \"standard\" or tolower(Sku) contains \"premium\" or tolower(Sku) contains \"isolatedv2\"\n| project id, name, AspName, isSlot, Sku\n| summarize Slots = countif(isSlot == 1) by id, name, AspName, Sku\n| extend DeploymentSlotEnabled = iff(Slots > 1, true, false)\n| where DeploymentSlotEnabled = false\n| project recommendationId=\"a1d91661-32d4-430b-b3b6-5adeb0975df7\", name, id, tags=\"\", param1=Sku, param2=Slots, param3=\"DeploymentSlotEnabled=false\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "621dbc78-3745-4d32-8eac-9e65b27b7512", + "aprlGuid": "0b80b67c-afbe-4988-ad58-a85a146b681e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-standard-availability-zones#zone-redundant", - "name": "Load Balancer and Availability Zones" + "url": "https://learn.microsoft.com/azure/app-service-web/web-sites-configure", + "name": "Configure web apps in Azure App Service" } ], - "recommendationControl": "High Availability", - "longDescription": "In regions with Availability Zones, assigning a zone-redundant frontend IP to a Standard Load Balancer ensures continuous traffic distribution even if one availability zone fails, provided other healthy zones and backend instances are available to receive the traffic.\n", - "pgVerified": true, - "description": "Ensure Standard Load Balancer is zone-redundant", - "potentialBenefits": "Enhances uptime and resilience", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "Use app settings for configuration and define them in Resource Manager templates or via PowerShell to facilitate part of an automated deployment/update process for improved reliability.\n", + "pgVerified": false, + "description": "Store configuration as app settings", + "potentialBenefits": "Enhanced reliability via automation", "tags": null, - "recommendationResourceType": "Microsoft.Network/loadBalancers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all LoadBalancers with with regional or zonal public IP Addresses\r\nresources\r\n| where type == \"microsoft.network/loadbalancers\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where tolower(sku.name) != 'basic'\r\n| mv-expand feIPconfigs = properties.frontendIPConfigurations\r\n| extend\r\n feConfigName = (feIPconfigs.name),\r\n PrivateSubnetId = toupper(feIPconfigs.properties.subnet.id),\r\n PrivateIPZones = feIPconfigs.zones,\r\n PIPid = toupper(feIPconfigs.properties.publicIPAddress.id),\r\n JoinID = toupper(id)\r\n| where isnotempty(PrivateSubnetId)\r\n| where isnull(PrivateIPZones) or array_length(PrivateIPZones) < 2\r\n| project name, feConfigName, id\r\n| union (resources\r\n | where type == \"microsoft.network/loadbalancers\"\r\n | where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n | where tolower(sku.name) != 'basic'\r\n | mv-expand feIPconfigs = properties.frontendIPConfigurations\r\n | extend\r\n feConfigName = (feIPconfigs.name),\r\n PIPid = toupper(feIPconfigs.properties.publicIPAddress.id),\r\n JoinID = toupper(id)\r\n | where isnotempty(PIPid)\r\n | join kind=innerunique (\r\n resources\r\n | where type == \"microsoft.network/publicipaddresses\"\r\n | where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n | where isnull(zones) or array_length(zones) < 2\r\n | extend\r\n LBid = toupper(substring(properties.ipConfiguration.id, 0, indexof(properties.ipConfiguration.id, '/frontendIPConfigurations'))),\r\n InnerID = toupper(id)\r\n ) on $left.PIPid == $right.InnerID)\r\n| project recommendationId = \"621dbc78-3745-4d32-8eac-9e65b27b7512\", name, id, tags, param1=\"Zones: No Zone or Zonal\", param2=strcat(\"Frontend IP Configuration:\", \" \", feConfigName)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Provides a list of Azure App Service resources that don't have App Settings configured\n\nappserviceresources\n| where type == \"microsoft.web/sites/config\"\n| extend AppSettings = iif(isempty(properties.AppSettings), true, false)\n| where AppSettings == false\n| project recommendationId=\"0b80b67c-afbe-4988-ad58-a85a146b681e\", id, name, tags=\"\", param1=\"AppSettings is not configured\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e5f5fcea-f925-4578-8599-9a391e888a60", + "aprlGuid": "fd049c28-ae6d-48f0-a641-cc3ba1a3fe1d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/load-balancer/load-balancer-custom-probe-overview", - "name": "Load Balancer Health Probe Overview" + "url": "https://learn.microsoft.com/en-us/azure/app-service/monitor-instances-health-check?tabs=dotnet#enable-health-check", + "name": "Monitor the health of App Service instances" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Health probes are used by Azure Load Balancers to determine the status of backend endpoints. Using custom health probes that are aligned with vendor recommendations enhances understanding of backend availability and facilitates monitoring of backend services for any impact.\n", - "pgVerified": true, - "description": "Use Health Probes to detect backend instances availability", - "potentialBenefits": "Ensures backend uptime monitoring.", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "Use Health Check for production workloads. Health check increases your application's availability by rerouting requests away from unhealthy instances, and replacing instances if they remain unhealthy. The Health check path should check critical components of your application.\n", + "pgVerified": false, + "description": "Enable Health check for App Services", + "potentialBenefits": "Enhanced reliability via automation", "tags": null, - "recommendationResourceType": "Microsoft.Network/loadBalancers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// List the load balancers which don't have health probe configured\r\nresources\r\n| where type =~ \"microsoft.network/loadbalancers\"\r\n| where array_length(properties.probes) == 0\r\n| project recommendationId=\"e5f5fcea-f925-4578-8599-9a391e888a60\", name, id, tags, param1=\"customHealthProbeUsed: false\"\r\n" + "query": "// Azure Resource Graph Query\n// Check if Health Check is enabled for App Service\n\nresources\n| where type =~ 'microsoft.web/sites'\n| where properties.kind has 'app'\n| join kind = inner\n (\n appserviceresources\n | where isnull(properties.HealthCheckPath) == true\n | project name\n ) on name\n| project recommendationId = \"fd049c28-ae6d-48f0-a641-cc3ba1a3fe1d\", name, id, tags, param1 = \"Healthcheckpath = not set\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4281631c-3d19-4994-8d96-084c2a51a534", + "aprlGuid": "aab6b4a4-9981-43a4-8728-35c7ecbb746d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-gateway-design#scale-a-nat-gateway-to-meet-the-demand-of-a-dynamic-workload", - "name": "Scale a NAT gateway to meet the demand of a dynamic workload" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-metrics#total-snat-connection-count", - "name": "Total SNAT Connection Count" + "url": "https://learn.microsoft.com/en-us/azure/app-service/app-service-ip-restrictions?tabs=azurecli", + "name": "Set up Azure App Service access restrictions" } ], - "recommendationControl": "Scalability", - "longDescription": "NAT Gateway provides 64,512 SNAT ports per public IP address and supports up to 16 public IP addresses. Monitor \"Total SNAT connection count\" metric to determine if you're nearing the connection limit of NAT gateway. You can scale the NAT gateway by adding more public IP addresses.\n", + "recommendationControl": "Governance", + "longDescription": "Use network access restrictions to define a priority-ordered allow/deny list that controls network access to your app. Web application firewalls, such as the one available in Application Gateway, are recommended for protection of public-facing web applications.\n", "pgVerified": false, - "description": "Scale a NAT gateway to meet the demand of a dynamic workload", - "potentialBenefits": "Enhances reliability and scalability", - "publishedToLearn": false, + "description": "Configure network access restrictions", + "potentialBenefits": "Enhanced security", "tags": null, - "recommendationResourceType": "Microsoft.Network/natGateways", + "recommendationResourceType": "Microsoft.Web/sites", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Check if Network access restrictions defined for App service\n\nresources\n| where type =~ 'microsoft.web/sites'\n| where properties.kind has 'app'\n| join kind = inner\n (\n appserviceresources\n | mv-expand IpSecurityRestrictions = properties.IpSecurityRestrictions\n | where isnotnull(IpSecurityRestrictions) == true\n | project name\n ) on name\n| project recommendationId = \"aab6b4a4-9981-43a4-8728-35c7ecbb746d\", name, id, tags, param1 = \"No network restrictions set\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "babf75d6-6407-4d90-b01e-5a1768e621f5", + "aprlGuid": "9e6682ac-31bc-4635-9959-ab74b52454e6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-metrics", - "name": "What is Azure NAT Gateway metrics and alerts?" - }, - { - "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/natGateways/", - "name": "AMBA - NAT Gateway" + "url": "https://azure.github.io/AppService/2020/05/15/Robust-Apps-for-the-cloud.html", + "name": "Ultimate guide to running healthy apps in the cloud" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Use Network Insights for monitoring and alerting on your NAT gateway.Use Total SNAT connection count metric to determine if you're nearing the connection limit of NAT gateway. Set alerts based on Azure Monitor Baseline Alerts (AMBA) thresholds for NAT Gateway\n", + "recommendationControl": "Scalability", + "longDescription": "App Service should be configured with a minimum of two instances for production workloads. If apps have a longer warmup time a minimum of three instances should be used.\n", "pgVerified": false, - "description": "Configure monitoring and alerting for NAT gateway", - "potentialBenefits": "Enhanced network performance and health", - "publishedToLearn": false, + "description": "Set minimum instance count to 2 for app service", + "potentialBenefits": "Improves app performace", "tags": null, - "recommendationResourceType": "Microsoft.Network/natGateways", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of App services that do not have minimum instance count of 2\n\nresources\n| where type =~ 'microsoft.web/sites'\n| where properties.kind has 'app'\n| join kind = inner\n (\n appserviceresources\n | where properties.PreWarmedInstanceCount < 2\n | project name\n ) on name\n| project recommendationId = \"9e6682ac-31bc-4635-9959-ab74b52454e6\", name, id, tags, param1 = \"PreWarmedInstanceCount is less than 2\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "419df1ea-336b-460a-b6b2-fefe2588fcef", + "aprlGuid": "c6c4b962-5af4-447a-9d74-7b9c53a5dff5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/nat-gateway/nat-availability-zones#zonal-nat-gateway-resource-for-each-zone-in-a-region-to-create-zone-resiliency", - "name": "Zonal NAT gateway resource for each zone in a region to create zone-resiliency" + "url": "https://azure.github.io/AppService/2018/09/10/Announcing-the-New-Auto-Healing-Experience-in-App-Service-Diagnostics.html", + "name": "Announcing the New Auto Healing Experience in App Service Diagnostics - Azure App Service" } ], "recommendationControl": "High Availability", - "longDescription": "A zonal promise for zone isolation scenarios exists when a virtual machine instance using a NAT gateway resource is in the same zone as the NAT gateway resource and its public IP addresses. The pattern you want to use for zone isolation is creating a \"zonal stack\" per availability zone.\n", + "longDescription": "Auto Heal allows you to mitigate your apps when it runs into unexpected situations like HTTP server errors, resource exhaustion, etc. You can configure different triggers based on your need and choose to recycle the app to recover it from a bad state.\n", "pgVerified": false, - "description": "Consider zonal NAT gateway deployment for zone isolation scenarios", - "potentialBenefits": "Enhances reliability and scalability", - "publishedToLearn": false, + "description": "Enable auto heal for Functions App", + "potentialBenefits": "Improved app availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/natGateways", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure Function App resources that do not have auto heal enabled\n\nResources\n| where type =~ 'microsoft.web/sites'\n| where properties.kind contains 'functionapp'\n| join kind=inner\n (appserviceresources\n | where type == \"microsoft.web/sites/config\"\n | where properties.AutoHealEnabled == 'false'\n | project id, name, tenantId, location, resourceGroup, properties.AutoHealEnabled\n ) on name\n| project recommendationID = \"c6c4b962-5af4-447a-9d74-7b9c53a5dff5\", name, id, type, kind, param1=\"AutoHealEnabled =false\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d2976d3e-294b-4b49-a1f0-c42566a3758f", + "aprlGuid": "52f368ee-1d77-4b34-92db-64be269642d0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/diagnostic-settings", - "name": "Diagnostic settings in Azure Monitor" + "url": "https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-warmup?tabs=in-process%2Cnodejs-v4&pivots=programming-language-csharp#trigger", + "name": "Azure Functions Warmup Trigger" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Resource Logs are not collected and stored until you create a diagnostic setting and route them to one or more locations.\n", - "pgVerified": true, - "description": "Configure Diagnostic Settings for all network security groups", - "potentialBenefits": "Enhanced monitoring and security insights", - "publishedToLearn": false, + "longDescription": "Add a warmup trigger to pre-load custom dependencies during the pre-warming process so that your functions are ready to start processing requests immediately.\n", + "pgVerified": false, + "description": "No warmup trigger added to Function App", + "potentialBenefits": "Improved app availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationResourceType": "Microsoft.Web/sites", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8bb4a57b-55e4-d24e-9c19-2679d8bc779f", + "aprlGuid": "0b06a688-0dd6-4d73-9f72-6666ff853ca9", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/activity-log?tabs=powershell", - "name": "Azure Monitor activity log" + "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules", + "name": "Resource naming restrictions - Azure Resource Manager" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Create Alerts with Azure Monitor for operations like creating or updating Network Security Group rules to catch unauthorized/undesired changes to resources and spot attempts to bypass firewalls or access resources from the outside.\n", - "pgVerified": true, - "description": "Monitor changes in Network Security Groups with Azure Monitor", - "potentialBenefits": "Enhanced security and change monitoring", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "A host ID must be between 1 and 32 characters, contain only lowercase letters, numbers, and dashes, not start or end with a dash, and not contain consecutive dashes. The host ID value should be unique for all apps/slots you're running.\n", + "pgVerified": false, + "description": "Ensure unique hostid set for Function App", + "potentialBenefits": "Easier management", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationResourceType": "Microsoft.Web/sites", "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Network Security Groups without alerts for modification configured.\r\nresources\r\n| where type =~ \"Microsoft.Network/networkSecurityGroups\"\r\n| project name, id, tags, lowerCaseNsgId = tolower(id)\r\n| join kind = leftouter (\r\n resources\r\n | where type =~ \"Microsoft.Insights/activityLogAlerts\" and properties.enabled == true\r\n | mv-expand scope = properties.scopes\r\n | where scope has \"Microsoft.Network/networkSecurityGroups\"\r\n | project alertName = name, conditionJson = dynamic_to_json(properties.condition.allOf), scope\r\n | where conditionJson has '\"Administrative\"' and (\r\n // Create or Update Network Security Group\r\n (conditionJson has '\"Microsoft.Network/networkSecurityGroups/write\"') or\r\n // All administrative operations\r\n (conditionJson !has '\"Microsoft.Network/networkSecurityGroups/write\"' and conditionJson !has '\"Microsoft.Network/networkSecurityGroups/delete\"' and conditionJson !has '\"Microsoft.Network/networkSecurityGroups/join/action\"')\r\n )\r\n | project lowerCaseNsgIdOfScope = tolower(scope)\r\n )\r\n on $left.lowerCaseNsgId == $right.lowerCaseNsgIdOfScope\r\n| where isempty(lowerCaseNsgIdOfScope)\r\n| project recommendationId = \"8bb4a57b-55e4-d24e-9c19-2679d8bc779f\", name, id, tags, param1 = \"ModificationAlert: Not configured/Disabled\"\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "52ac35e8-9c3e-f84d-8ce8-2fab955333d3", + "aprlGuid": "c9a278b7-024b-454b-bd54-41587c512b74", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-resource-manager/management/lock-resources?toc=%2Fazure%2Fvirtual-network%2Ftoc.json&tabs=json", - "name": "Lock your resources to protect your infrastructure" + "url": "https://learn.microsoft.com/en-us/azure/azure-functions/migrate-version-3-version-4?tabs=net6-in-proc%2Cazure-cli%2Cwindows&pivots=programming-language-csharp", + "name": "Migrate version 3.x to 4.x" } ], "recommendationControl": "Governance", - "longDescription": "As an administrator, you can lock an Azure subscription, resource group, or resource to protect them from accidental deletions and modifications. The lock overrides user permissions. Locks can prevent either deletions or modifications and are known as Delete and Read-only in the portal.\n", - "pgVerified": true, - "description": "Configure locks for Network Security Groups to avoid accidental changes and/or deletion", - "potentialBenefits": "Prevents accidental edits/deletions", - "publishedToLearn": false, + "longDescription": "Beginning on December 13, 2022, function apps running on versions 2.x and 3.x of the Azure Functions runtime have reached the end of life (EOL) of extended support. We highly recommend you migrating your function apps to version 4.x of the Functions runtime.\n", + "pgVerified": false, + "description": "Ensure Function App runs a supported version", + "potentialBenefits": "Better governance", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Web/sites", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "da1a3c06-d1d5-a940-9a99-fcc05966fe7c", + "aprlGuid": "7c608f46-46b2-4cc0-bbd6-1d457c16671c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/network-watcher/network-watcher-nsg-flow-logging-overview", - "name": "Flow logging for network security groups" + "url": "https://learn.microsoft.com/en-us/azure/azure-functions/functions-app-settings#functions_worker_runtime", + "name": "FUNCTIONS_WORKER_RUNTIME" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring, managing, and understanding your network is crucial for protection and optimization. Knowing the current state, who and from where connections are made, open internet ports, expected and irregular behavior, and traffic spikes is essential.\n", - "pgVerified": true, - "description": "Configure NSG Flow Logs", - "potentialBenefits": "Enhances security and optimizes network", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "The FUNCTIONS_WORKER_RUNTIME setting in the Function App configuration should be set to the appropriate value based on the language you are using. This setting is used to determine the language worker that will be used to execute your functions.\n", + "pgVerified": false, + "description": "Ensure FUNCTIONS_WORKER_RUNTIME is set properly", + "potentialBenefits": "Better governance", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", + "recommendationResourceType": "Microsoft.Web/sites", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Network Security Groups without NSG Flow logs configured or disabled.\r\nresources\r\n| where type =~ \"Microsoft.Network/networkSecurityGroups\"\r\n| project name, id, tags, lowerCaseNsgId = tolower(id)\r\n| join kind = leftouter (\r\n resources\r\n | where type == \"microsoft.network/networkwatchers/flowlogs\" and properties.enabled == true\r\n | project flowLogName = name, lowerCaseTargetNsgId = tolower(properties.targetResourceId)\r\n )\r\n on $left.lowerCaseNsgId == $right.lowerCaseTargetNsgId\r\n| where isempty(lowerCaseTargetNsgId)\r\n| project recommendationId = \"da1a3c06-d1d5-a940-9a99-fcc05966fe7c\", name, id, tags, param1 = \"NSGFlowLog: Not configured/Disabled\"\r\n\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8291c1fa-650c-b44b-b008-4deb7465919d", + "aprlGuid": "88cb90c2-3b99-814b-9820-821a63f600dd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-network/network-security-groups-overview#security-rules", - "name": "Security rules" + "url": "https://learn.microsoft.com/en-us/azure/reliability/migrate-app-service", + "name": "Migrate App Service to availability zone support" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/enterprise-integration/ase-high-availability-deployment", + "name": "High availability enterprise deployment using App Service Environment" } ], - "recommendationControl": "Security", - "longDescription": "Azure network security groups filter network traffic between resources in a virtual network, using security rules to allow or deny inbound or outbound traffic based on source, destination, port, and protocol.\n", - "pgVerified": true, - "description": "The NSG only has Default Security Rules, make sure to configure the necessary rules", - "potentialBenefits": "Enhanced traffic control and security", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure's feature of deploying App Service plans across availability zones enhances resiliency and reliability by ensuring operation during datacenter failures, providing redundancy without needing different regions, thus minimizing downtime and maintaining uninterrupted services.\n", + "pgVerified": false, + "description": "Migrate App Service to availability Zone Support", + "potentialBenefits": "Enhances app resiliency and reliability", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkSecurityGroups", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Web/serverFarms", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all NSGs that have NO security rules\r\nresources\r\n| where type =~ \"microsoft.network/networksecuritygroups\"\r\n| extend sr = string_size(properties.securityRules)\r\n| where sr <=2 or isnull(properties.securityRules)\r\n| project recommendationId = \"8291c1fa-650c-b44b-b008-4deb7465919d\", name, id\r\n\r\n" + "query": "// Azure Resource Graph Query\n// The query filters the qualified App Service Plans that do not have Zone Redundancy enabled.\n// Its important to check regions that support availability zones for Azure App Services running on multi-tenant and App Service Environments https://learn.microsoft.com/en-us/azure/reliability/reliability-app-service?tabs=graph%2Ccli#:~:text=The%20following%20regions%20support%20Azure%20App%20Services%20running%20on%20multi%2Dtenant%20environments%3A\n\nresources\n| where type =~ 'microsoft.web/serverfarms'\n| extend zoneRedundant = tobool(properties.zoneRedundant)\n| extend sku_tier = tostring(sku.tier)\n| where (tolower(sku_tier) contains \"isolated\" or tolower(sku_tier) contains \"premium\") and zoneRedundant == false\n| project recommendationId=\"88cb90c2-3b99-814b-9820-821a63f600dd\", name, id, tags, param1=sku_tier, param2=\"Not Zone Redundant\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4e133bd0-8762-bc40-a95b-b29142427d73", + "aprlGuid": "b2113023-a553-2e41-9789-597e2fb54c31", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/network-watcher/network-watcher-overview", - "name": "What is Azure Network Watcher?" + "url": "https://learn.microsoft.com/en-us/azure/architecture/checklist/resiliency-per-service#app-service", + "name": "Resiliency checklist for specific Azure services" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Azure Network Watcher offers tools for monitoring, diagnosing, viewing metrics, and managing logs for IaaS resources. It helps maintain the health of VMs, VNets, application gateways, load balancers, but not for PaaS or Web analytics.\n", - "pgVerified": true, - "description": "Deploy Network Watcher in all regions where you have networking services", - "potentialBenefits": "Enhanced monitoring and diagnostics for Azure IaaS", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Choose Standard/Premium Azure App Service Plan for robust apps with advanced scaling, high availability, better performance, and multiple slots, ensuring resilience and continuous operation.\n", + "pgVerified": false, + "description": "Use Standard or Premium tier", + "potentialBenefits": "Enhanced scaling and reliability", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkWatchers", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Web/serverFarms", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all locations that do not have a Network Watcher deployed\r\nresources\r\n| where location != \"global\"\r\n| union (Resources\r\n | where type =~ \"microsoft.network/networkwatchers\")\r\n| summarize NetworkWatcherCount = countif(type =~ 'Microsoft.Network/networkWatchers') by location\r\n| where NetworkWatcherCount == 0\r\n| project recommendationId = \"4e133bd0-8762-bc40-a95b-b29142427d73\", name=location, id=\"n/a\", param1 = strcat(\"LocationMisingNetworkWatcher:\", location)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Provides a list of Azure App Service Plans that are not in the \"Standard\", \"Premium\", or \"IsolatedV2\" SKU tiers.\n\nresources\n| where type =~ 'microsoft.web/serverfarms'\n| extend sku_tier = tostring(sku.tier)\n| where tolower(sku_tier) !contains \"standard\" and\n tolower(sku_tier) !contains \"premium\" and\n tolower(sku_tier) !contains \"isolatedv2\"\n| project recommendationId=\"b2113023-a553-2e41-9789-597e2fb54c31\", name, id, tags, param1= strcat(\"SKU=\",sku_tier)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "22a769ed-0ecb-8b49-bafe-8f52e6373d9c", + "aprlGuid": "07243659-4643-d44c-a1c6-07ac21635072", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/network-watcher/nsg-flow-logging", - "name": "Manage NSG flow logs using the Azure portal" + "url": "https://learn.microsoft.com/en-us/azure/architecture/checklist/resiliency-per-service#app-service", + "name": "Resiliency checklist for specific Azure services" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Network security group flow logging is a feature of Azure Network Watcher that logs IP traffic info through a network security group. If in Failed state, monitoring data from the associated resource is not collected.\n", - "pgVerified": true, - "description": "Fix Flow Log configurations in Failed state or Disabled Status", - "potentialBenefits": "Ensures IP traffic logging", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Avoid frequent scaling up/down of Azure App Service instances to prevent service disruptions. Choose the right tier and size for the workload and scale out for traffic changes, as scaling adjustments can trigger application restarts.\n", + "pgVerified": false, + "description": "Avoid scaling up or down", + "potentialBenefits": "Minimizes restarts, enhances stability", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkWatchers", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Web/serverFarms", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all Network Watcher Flow Logs that are not enabled or in a succeeded state\r\nresources\r\n| where type =~ \"microsoft.network/networkwatchers/flowlogs\" and isnotnull(properties)\r\n| extend targetResourceId = tostring(properties.targetResourceId)\r\n| extend status = iff(properties.enabled =~ 'true', \"Enabled\", \"Disabled\")\r\n| extend provisioningState = tostring(properties.provisioningState)\r\n| extend flowLogType = iff(properties.targetResourceId contains \"Microsoft.Network/virtualNetworks\", 'Virtual network', 'Network security group')\r\n| where provisioningState != \"Succeeded\" or status != \"Enabled\"\r\n| project recommendationId = \"22a769ed-0ecb-8b49-bafe-8f52e6373d9c\", name, id, tags, param1 = strcat(\"provisioningState:\", provisioningState), param2=strcat(\"Status:\", status), param3=strcat(\"targetResourceId:\",targetResourceId), param4=strcat(\"flowLogType:\",flowLogType)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Provides a list of Azure App Service Plans and the number of changes that was made to the pricing tier, if the count is higher that 3 it means you need to avoid scaling up and down that often\n\nresourcechanges\n| extend changeTime = todatetime(properties.changeAttributes.timestamp), targetResourceId = tostring(properties.targetResourceId),\nchangeType = tostring(properties.changeType), correlationId = properties.changeAttributes.correlationId,\nchangedProperties = properties.changes, changeCount = properties.changeAttributes.changesCount\n| where changeTime > ago(14d)\n| join kind=inner (resources | project resources_Name = name, resources_Type = type, resources_Subscription= subscriptionId, resources_ResourceGroup= resourceGroup, id) on $left.targetResourceId == $right.id\n| where resources_Type contains \"microsoft.web/serverfarms\"\n| where changedProperties['sku.name'].propertyChangeType == 'Update' or changedProperties['sku.tier'].propertyChangeType == 'Update'\n| summarize count() by targetResourceId, resources_Name ,tostring(changedProperties['sku.name'].previousValue), tostring(changedProperties['sku.tier'].newValue)\n| project recommendationId=\"07243659-4643-d44c-a1c6-07ac21635072\", name=resources_Name, id=targetResourceId, tags=\"\", param1=['changedProperties_sku.name_previousValue'], param2=['changedProperties_sku.tier_newValue'], param3=count_\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1e28bbc1-1eb7-486f-8d7f-93943f40219c", + "aprlGuid": "dbe3fd66-fb2a-9d46-b162-1791e21da236", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/network-watcher/connection-monitor-overview", - "name": "Connection monitor overview" + "url": "https://learn.microsoft.com/en-us/azure/architecture/checklist/resiliency-per-service#app-service", + "name": "Resiliency checklist for specific Azure services" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Improves monitoring for Azure and Hybrid connectivity\n", - "pgVerified": true, - "description": "Configure Network Watcher Connection monitor", - "potentialBenefits": "Improves monitoring for Azure and Hybrid connectivity", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "It is strongly recommended to create separate App Service plans for production and test environments to avoid using slots within your production deployment for testing purposes.\n", + "pgVerified": false, + "description": "Create separate App Service plans for production and test", + "potentialBenefits": "Protects prod performance; avoids test impact", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkWatchers", + "recommendationResourceType": "Microsoft.Web/serverFarms", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// under-development\r\n" + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a1317a0b-402d-4604-be40-a25a004ba171", + "aprlGuid": "6320abf6-f917-1843-b2ae-4779c35985ae", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/network-watcher/nsg-flow-logs-overview", - "name": "Flow logging for network security groups" + "url": "https://learn.microsoft.com/en-us/azure/app-service/manage-automatic-scaling?tabs=azure-portal", + "name": "Automatic scaling in Azure App Service" }, { - "url": "https://learn.microsoft.com/en-us/azure/network-watcher/vnet-flow-logs-overview", - "name": "Virtual network flow logs" + "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/autoscale/autoscale-get-started", + "name": "Auto Scale Web Apps" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Improves monitoring and security for Azure and Hybrid connectivity\n", - "pgVerified": true, - "description": "Enable Network Security Group and Virtual Network Flow Logs", - "potentialBenefits": "Improves monitoring and security for Azure connectivity", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Enabling Autoscale/Automatic Scaling for your Azure App Service ensures sufficient resources for incoming requests. Autoscaling is rule-based, whereas Automatic Scaling, a newer feature, automatically adjusts resources based on HTTP traffic.\n", + "pgVerified": false, + "description": "Enable Autoscale/Automatic scaling to ensure adequate resources are available to service requests", + "potentialBenefits": "Optimizes resources for traffic", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkWatchers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Web/serverFarms", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "bf0b7dbd-016d-458c-af99-70fcb03ad451", + "aprlGuid": "5a44bd30-ae6a-4b81-9b68-dc3a8ffca4d8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/network-watcher/traffic-analytics", - "name": "Network Watcher traffic analytics" + "url": "https://learn.microsoft.com/azure/azure-cache-for-redis/cache-how-to-zone-redundancy", + "name": "Enable zone redundancy for Azure Cache for Redis" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Improves monitoring, security and troubleshooting for Azure and Hybrid connectivity\n", - "pgVerified": true, - "description": "Enable traffic analytics in Network Security Group and Virtual Network Flow Logs configuration.", - "potentialBenefits": "Improves monitoring, security and troubleshooting.", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure Cache for Redis offers zone redundancy in Premium and Enterprise tiers, using VMs across multiple Availability Zones to ensure greater resilience and availability.\n", + "pgVerified": false, + "description": "Enable zone redundancy for Azure Cache for Redis", + "potentialBenefits": "Higher resilience and availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/networkWatchers", + "recommendationResourceType": "Microsoft.Cache/Redis", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Cache for Redis instances with one or no Zones selected\nresources\n| where type =~ \"microsoft.cache/redis\"\n| where array_length(zones) <= 1 or isnull(zones)\n| project recommendationId = \"5a44bd30-ae6a-4b81-9b68-dc3a8ffca4d8\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fd43ea32-2ccf-49a8-ada4-9a78794e3ff1", + "aprlGuid": "cabc1f98-c8a7-44f7-ab24-977982ef3f70", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#point-to-site-vpn-gateway", - "name": "Virtual WAN Monitoring Best Practices" + "url": "https://learn.microsoft.com/en-us/azure/azure-cache-for-redis/cache-administration#update-channel-and-schedule-updates", + "name": "Schedule Redis Updates" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Set up monitoring and alerts for Point-to-Site VPN gateways. Create alert rule for ensuring promptly response to critical events such as Gateway overutilization, connection count limits and User VPN route limits.", + "recommendationControl": "High Availability", + "longDescription": "Azure Cache for Redis allows for specifying maintenance windows. A maintenance window allows you to control the days and times of a week during which the VMs hosting your cache can be updated.\n", "pgVerified": false, - "description": "Monitor health for v-Hub's Point-to-Site VPN gateways", - "potentialBenefits": "Detection and mitigation to avoid disruptions.", - "publishedToLearn": false, + "description": "Schedule updates by setting a maintenance window", + "potentialBenefits": "Higher resilience and availability", "tags": null, - "recommendationResourceType": "Microsoft.Network/p2sVpnGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Cache/redis", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2820f6d6-a23c-7a40-aec5-506f3bd1aeb6", + "aprlGuid": "c474fc96-4e6a-4fb0-95d0-a26b3f35933c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/dns/dns-protect-private-zones-recordsets", - "name": "Protecting private DNS Zones and Records - Azure DNS" + "url": "https://learn.microsoft.com/azure/azure-cache-for-redis/cache-network-isolation", + "name": "Configure private endpoints for Azure Redis Cache" } ], "recommendationControl": "Security", - "longDescription": "Assign the built-in Private DNS Zone Contributor role to specific authorized users, groups, and entities to protect against unauthorized or accidental changes to Private DNS Zones and records. Restrict access by granting Private DNS Zone Contributor permission to all zones.\n", - "pgVerified": true, - "description": "Protect private DNS zones and records", - "potentialBenefits": "Prevents DNS outages", - "publishedToLearn": false, + "longDescription": "Use private endpoints for secure connection to cache via a private link, avoiding the public internet.\n", + "pgVerified": false, + "description": "Configure Private Endpoints", + "potentialBenefits": "Secure, private VNet ingress, efficient data transfer", "tags": null, - "recommendationResourceType": "Microsoft.Network/privateDnsZones", + "recommendationResourceType": "Microsoft.Cache/redis", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Azure Redis cache services not protected by private endpoints.\nResources\n| where type =~ \"microsoft.cache/redis\"\n| where properties['publicNetworkAccess'] == \"Enabled\"\n| project recommendationId = \"c474fc96-4e6a-4fb0-95d0-a26b3f35933c\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ab896e8c-49b9-2c44-adec-98339aff7821", + "aprlGuid": "3464854d-6f75-4922-95e4-a2a308b53ce6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.github.io/azure-monitor-baseline-alerts/services/Network/privateDnsZones/", - "name": "Azure Monitor Baseline Alerts - privateDnsZones" + "url": "https://learn.microsoft.com/azure/reliability/reliability-batch#cross-region-disaster-recovery-and-business-continuity", + "name": "Learn More" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Use Azure Monitor to monitor Private DNS Zone query volume, record set count, and capacity metrics for Record Set, Virtual Network Link, and Virtual Network Link with auto-registration. Create alerts based on Azure Monitor Baseline Alerts for these metrics that exceed specific thresholds.\n", - "pgVerified": true, - "description": "Monitor Private DNS Zones health and set up alerts", - "potentialBenefits": "Enhanced DNS reliability and alerting", - "publishedToLearn": false, + "longDescription": "To ensure cross-region disaster recovery and business continuity, set the right quotas for all Batch accounts to allocate necessary core numbers upfront, preventing execution interruptions from reaching quota limits.\n", + "pgVerified": false, + "description": "Monitor Batch Account quota", + "potentialBenefits": "Ensures business continuity", "tags": null, - "recommendationResourceType": "Microsoft.Network/privateDnsZones", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Batch/batchAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1e02335c-1f90-fd4e-a5a5-d359c7b22d70", + "aprlGuid": "71cfab8f-d588-4742-b175-b6e07ae48dbd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cloud-adoption-framework/ready/azure-best-practices/private-link-and-dns-integration-at-scale", - "name": "Private Link and DNS integration at scale" + "url": "https://learn.microsoft.com/azure/batch/create-pool-availability-zones", + "name": "Learn More" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "For business continuity scenarios with a low recovery time objective (RTO), ensure that distinct regional production and disaster recovery (DR) Private DNS Zones are configured and have identical workload and resource DNS entries. This keeps DNS resolution consistent across both zones.\n", - "pgVerified": true, - "description": "Use regional Private DNS Zones when there is a low recovery time objective (RTO) requirement", - "potentialBenefits": "Ensures seamless failover for DNS during a regional outage", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "When using Virtual Machine Configuration for Azure Batch pools, opting to distribute your pool across Availability Zones bolsters your compute nodes against Azure datacenter failures.\n", + "pgVerified": false, + "description": "Create an Azure Batch pool across Availability Zones", + "potentialBenefits": "Enhanced reliability and failure protection", "tags": null, - "recommendationResourceType": "Microsoft.Network/privateDnsZones", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Batch/batchAccounts", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3538aa48-c40b-455b-a93b-269fe6e65be2", + "aprlGuid": "be448849-0d7d-49ba-9c94-9573ee533d5d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/reliability/reliability-dns", - "name": "Reliability in Azure DNS" + "url": "https://learn.microsoft.com/en-us/azure/service-health/resource-health-overview", + "name": "Resource Health" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/service-health/resource-health-alert-monitor-guide#create-a-resource-health-alert-rule-in-the-azure-portal", + "name": "Configure Resource Health alerts in the Azure portal" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/service-health/alerts-activity-log-service-notifications-portal", + "name": "Alerts Health" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Azure Private DNS allows the Time-To-Live (TTL) for record sets in the zone to be set to a value between 1 and 2147483647 seconds. You should ensure that the TTL for the DNS record sets in your DNS Zones are set appropriately to meet your RPO targets.\n", - "pgVerified": false, - "description": "Ensure Time-To-Live (TTL) is set appropriately to ensure RPOs can be met", - "potentialBenefits": "Ensures that no cached DNS records exist past RPO targets", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Configure Resource Health Alerts for all applicable resources to stay informed about the current and historical health status of your Azure resources. They notify you when these resources have a change in their health status.\n", + "pgVerified": true, + "description": "Configure Resource Health Alerts", + "potentialBenefits": "Stay informed on resource status", "tags": null, - "recommendationResourceType": "Microsoft.Network/privateDnsZones", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Insights/activityLogAlerts", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b89c9acc-0aba-fb44-9ff2-3dbfcf97dce7", + "aprlGuid": "9729c89d-8118-41b4-a39b-e12468fa872b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/private-link/manage-private-endpoint?tabs=manage-private-link-powershell#private-endpoint-connections", - "name": "Private endpoint connections" + "url": "https://learn.microsoft.com/azure/service-health/overview", + "name": "What is Azure Service Health?" + }, + { + "url": "https://learn.microsoft.com/azure/service-health/alerts-activity-log-service-notifications-portal", + "name": "Configure alerts for service health events" } ], - "recommendationControl": "High Availability", - "longDescription": "A private endpoint has two custom properties, static IP address and the network interface name, which must be set at creation. If not in Succeeded state, there may be issues with the endpoint or associated resource.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Service health gives a personalized health view of Azure services and regions used, offering the best place for notifications on outages, planned maintenance, and health advisories by knowing the services used.\n", "pgVerified": true, - "description": "Resolve issues with Private Endpoints in non Succeeded connection state", - "potentialBenefits": "Ensure connection availability", - "publishedToLearn": false, + "description": "Configure Service Health Alerts", + "potentialBenefits": "Proactive outage and maintenance alerts", "tags": null, - "recommendationResourceType": "Microsoft.Network/privateEndpoints", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Insights/activityLogAlerts", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all Private Endpoints that are not in a Succeeded state\r\nresources\r\n| where type =~ \"microsoft.network/privateendpoints\"\r\n| where (properties.provisioningState =~ \"Succeeded\" and (properties.privateLinkServiceConnections[0].properties.provisioningState =~ \"Succeeded\" or properties.manualPrivateLinkServiceConnections[0].properties.provisioningState =~ \"Succeeded\")) == false\r\n| project recommendationId = \"b89c9acc-0aba-fb44-9ff2-3dbfcf97dce7\", name, id, tags, param1 = strcat(\"provisioningState: \", tostring(properties.provisioningState)), param2 = strcat(\"provisioningState: \", tostring(properties.privateLinkServiceConnections[0].properties.provisioningState)), param3 = strcat(\"manualProvisioningState: \", tostring(properties.manualPrivateLinkServiceConnections[0].properties.provisioningState))\r\n" + "query": "// Azure Resource Graph Query\n// This resource graph query will return all subscriptions without Service Health alerts configured.\n\nresourcecontainers\n| where type == 'microsoft.resources/subscriptions'\n| project subscriptionAlerts=tostring(id),name,tags\n| join kind=leftouter (\n resources\n | where type == 'microsoft.insights/activitylogalerts' and properties.condition contains \"ServiceHealth\"\n | extend subscriptions = properties.scopes\n | project subscriptions\n | mv-expand subscriptions\n | project subscriptionAlerts = tostring(subscriptions)\n) on subscriptionAlerts\n| where isempty(subscriptionAlerts1)\n| project-away subscriptionAlerts1\n| project recommendationId = \"9729c89d-8118-41b4-a39b-e12468fa872b\",id=subscriptionAlerts,name,tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c63b81fb-7afc-894c-a840-91bb8a8dcfaf", + "aprlGuid": "dac421ec-2832-4c37-839e-b6dc5a38f2fa", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-network/ip-services/public-ip-addresses#availability-zone", - "name": "Public IP addresses - Availability Zones" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/public-ip-basic-upgrade-guidance#steps-to-complete-the-upgrade", - "name": "Upgrading a basic public IP address to Standard SKU" + "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/app/convert-classic-resource", + "name": "Migrate an Application Insights classic resource to a workspace-based resource" } ], - "recommendationControl": "High Availability", - "longDescription": "Public IP addresses in Azure can be of standard SKU, available as non-zonal, zonal, or zone-redundant. Zone-redundant IPs are accessible across all zones, resisting any single zone failure, thereby providing higher resilience.\n", - "pgVerified": true, - "description": "Use Standard SKU and Zone-Redundant IPs when applicable", - "potentialBenefits": "Enhanced resilience with zone redundancy", - "publishedToLearn": false, + "recommendationControl": "Service Upgrade and Retirement", + "longDescription": "Classic Application Insights retires in February 2024. To minimize disruption to existing application monitoring scenarios, transition to workspace-based Application Insights before 29 February 2024.\n", + "pgVerified": false, + "description": "Convert Classic Deployments", + "potentialBenefits": "Avoid service disruption post-Feb 2024", "tags": null, - "recommendationResourceType": "Microsoft.Network/publicIPAddresses", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Insights/components", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph query\r\n// List public IP addresses that are not Zone-Redundant\r\nResources\r\n| where type =~ \"Microsoft.Network/publicIPAddresses\" and sku.tier =~ \"Regional\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where isempty(zones) or array_length(zones) <= 1\r\n| extend az = case(isempty(zones), \"Non-zonal\", array_length(zones) <= 1, strcat(\"Zonal (\", strcat_array(zones, \",\"), \")\"), zones)\r\n| project recommendationId = \"c63b81fb-7afc-894c-a840-91bb8a8dcfaf\", name, id, tags, param1 = strcat(\"sku: \", sku.name), param2 = strcat(\"availabilityZone: \", az)\r\n\r\n" + "query": "// Azure Resource Graph query\n// Filters Application Insights resources with ‘Classic’ deployment type\nresources\n| where type =~ \"microsoft.insights/components\"\n| extend IngestionMode = properties.IngestionMode\n| where IngestionMode =~ 'ApplicationInsights'\n| project recommendationId= \"dac421ec-2832-4c37-839e-b6dc5a38f2fa\", name, id, tags, param1=\"ApplicationInsightsDeploymentType: Classic\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1adba190-5c4c-e646-8527-dd1b2a6d8b15", + "aprlGuid": "0e835cc2-2551-a247-b1f1-3c5f25c9cb70", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/advisor/advisor-reference-reliability-recommendations#use-nat-gateway-for-outbound-connectivity", - "name": "Use NAT GW for outbound connectivity" - }, - { - "url": "https://learn.microsoft.com/azure/architecture/framework/services/compute/azure-app-service/reliability#tcp-and-snat-ports", - "name": "TCP and SNAT Ports" + "url": "https://learn.microsoft.com/en-us/azure/databricks/release-notes/runtime/databricks-runtime-ver", + "name": "Databricks runtime support lifecycles" } ], - "recommendationControl": "High Availability", - "longDescription": "Prevent connectivity failures due to SNAT port exhaustion by employing NAT gateway for outbound traffic from virtual networks, ensuring dynamic scaling and secure internet connections.\n", + "recommendationControl": "Governance", + "longDescription": "Databricks recommends migrating workloads to the latest or LTS version of its runtime for enhanced stability and support. If on Runtime 11.3 LTS or above, move directly to the latest 12.x version. If below, first migrate to 11.3 LTS, then to the latest 12.x version as per the migration guide.\n", "pgVerified": true, - "description": "Use NAT gateway for outbound connectivity to avoid SNAT Exhaustion", - "potentialBenefits": "Avoids SNAT port exhaustion risks", - "publishedToLearn": false, + "description": "Databricks runtime version is not latest or is not LTS version", + "potentialBenefits": "Enhanced stability and support", "tags": null, - "recommendationResourceType": "Microsoft.Network/publicIPAddresses", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph query\r\n// Lists VMs with PIPs\r\nresources\r\n| where type =~ 'Microsoft.Network/publicIPAddresses'\r\n| where tostring(properties.ipConfiguration.id) contains \"microsoft.network/networkinterfaces\"\r\n| project recommendationId=\"1adba190-5c4c-e646-8527-dd1b2a6d8b15\", name, id, tags, param1=strcat(\"Migrate from instance IP to NAT Gateway\")\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5cea1501-6fe4-4ec4-ac8f-f72320eb18d3", + "aprlGuid": "c166602e-0804-e34b-be8f-09b4d56e1fcd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-network/ip-services/public-ip-basic-upgrade-guidance", - "name": "Upgrading a basic public IP address to Standard SKU - Guidance" - }, - { - "url": "https://azure.microsoft.com/en-us/updates/upgrade-to-standard-sku-public-ip-addresses-in-azure-by-30-september-2025-basic-sku-will-be-retired/", - "name": "Upgrade to Standard SKU public IP addresses in Azure by 30 September 2025 as Basic SKU will be retired" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "Basic SKU public IP addresses will be retired on September 30, 2025. Users are advised to upgrade to Standard SKU public IP addresses before this date to avoid service disruptions.\n", + "recommendationControl": "Scalability", + "longDescription": "Databricks pools pre-provision VMs, reducing risks of provisioning errors during cluster start or scale, enhancing reliability.\n", "pgVerified": true, - "description": "Upgrade Basic SKU public IP addresses to Standard SKU", - "potentialBenefits": "Avoids service disruption", - "publishedToLearn": false, + "description": "Use Databricks Pools", + "potentialBenefits": "Reduces provisioning errors", "tags": null, - "recommendationResourceType": "Microsoft.Network/publicIPAddresses", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph query\r\n// List Basic SKU public IP addresses\r\nResources\r\n| where type =~ \"Microsoft.Network/publicIPAddresses\"\r\n| where sku.name =~ \"Basic\"\r\n| project recommendationId = \"5cea1501-6fe4-4ec4-ac8f-f72320eb18d3\", name, id, tags, param1 = strcat(\"sku: \", sku.name)\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c4254c66-b8a5-47aa-82f6-e7d7fb418f47", + "aprlGuid": "5877a510-8444-7a4c-8412-a8dab8662f7e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/ddos-protection/ddos-protection-overview", - "name": "Azure DDoS Protection" + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-types#premium-ssd", + "name": "Azure managed disk types" } ], - "recommendationControl": "Security", - "longDescription": "DDoS attacks can be targeted at any endpoint that is publicly reachable through the internet.\n", + "recommendationControl": "Scalability", + "longDescription": "Upgrade HDDs in premium VMs to SSDs for better speed and reliability. Premium SSDs boost IO-heavy apps; Standard SSDs balance cost and performance. Ideal for critical workloads, upgrading improves connectivity with brief reboot. Consider for vital VMs\n", "pgVerified": true, - "description": "Public IP addresses should have DDoS protection enabled", - "potentialBenefits": "Avoids service disruption", - "publishedToLearn": false, + "description": "Use SSD backed VMs for Worker VM Type and Driver type", + "potentialBenefits": "Faster, reliable VM performance", "tags": null, - "recommendationResourceType": "Microsoft.Network/publicIPAddresses", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph query\r\n// Public IP addresses should have DDoS protection enabled\r\nresources\r\n| where type =~ 'Microsoft.Network/publicIPAddresses'\r\n| where properties.ddosSettings.protectionMode !in~ (\"Enabled\", \"VirtualNetworkInherited\")\r\n| project recommendationId=\"c4254c66-b8a5-47aa-82f6-e7d7fb418f47\", name, id, tags, param1=strcat(\"Apply either DDoS Network protection or DDoS IP Protrection to the public IP address.\")\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "23b2dfc7-7e5d-9443-9f62-980ca621b561", + "aprlGuid": "5c72f0d6-55ec-d941-be84-36c194fa78c0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/essentials/activity-log?tabs=powershell", - "name": "Azure activity log - Azure Monitor | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#enable-autoscaling-for-batch-workloadss", + "name": "Best practices for reliability" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Create Alerts with Azure Monitor for operations like Create or Update Route Table to spot unauthorized/undesired changes in production resources. This setup aids in identifying improper routing changes, including efforts to evade firewalls or access resources from outside.\n", + "recommendationControl": "Scalability", + "longDescription": "Autoscaling adjusts cluster sizes automatically based on workload demands, offering benefits for many use cases in terms of costs and performance. It includes guidance on when and how to best utilize Autoscaling. For streaming, Delta Live Tables with autoscaling is advised.\n", "pgVerified": true, - "description": "Monitor changes in Route Tables with Azure Monitor", - "potentialBenefits": "Enhanced security and change detection", - "publishedToLearn": false, + "description": "Enable autoscaling for batch workloads", + "potentialBenefits": "Cost and performance optimization", "tags": null, - "recommendationResourceType": "Microsoft.Network/routeTables", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Route Tables without alerts for modification configured.\r\nresources\r\n| where type =~ \"Microsoft.Network/routeTables\"\r\n| project name, id, tags, lowerCaseRouteTableId = tolower(id)\r\n| join kind = leftouter (\r\n resources\r\n | where type =~ \"Microsoft.Insights/activityLogAlerts\" and properties.enabled == true\r\n | mv-expand scope = properties.scopes\r\n | where scope has \"Microsoft.Network/routeTables\"\r\n | project alertName = name, conditionJson = dynamic_to_json(properties.condition.allOf), scope\r\n | where conditionJson has '\"Administrative\"' and (\r\n // Create or Update Route Table\r\n (conditionJson has '\"Microsoft.Network/routeTables/write\"') or\r\n // All Administrative operations\r\n (conditionJson !has '\"Microsoft.Network/routeTables/write\"' and conditionJson !has '\"Microsoft.Network/routeTables/delete\"' and conditionJson !has '\"Microsoft.Network/routeTables/join/action\"')\r\n )\r\n | project lowerCaseRouteTableIdOfScope = tolower(scope)\r\n )\r\n on $left.lowerCaseRouteTableId == $right.lowerCaseRouteTableIdOfScope\r\n| where isempty(lowerCaseRouteTableIdOfScope)\r\n| project recommendationId = \"23b2dfc7-7e5d-9443-9f62-980ca621b561\", name, id, tags, param1 = \"ModificationAlert: Not configured/Disabled\"\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "89d1166a-1a20-0f46-acc8-3194387bf127", + "aprlGuid": "362ad2b6-b92c-414f-980a-0cf69467ccce", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources?toc=%2Fazure%2Fvirtual-network%2Ftoc.json&tabs=json", - "name": "Protect your Azure resources with a lock - Azure Resource Manager | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#enable-autoscaling-for-sql-warehouse", + "name": "Best practices for reliability" } ], - "recommendationControl": "Governance", - "longDescription": "As an administrator, you can protect Azure subscriptions, resource groups, or resources from accidental deletions and modifications by setting locks.\n", + "recommendationControl": "Scalability", + "longDescription": "The scaling parameter of a SQL warehouse defines the min and max number of clusters for distributing queries. By default, it's set to one. Increasing the cluster count can accommodate more concurrent users effectively.\n", "pgVerified": true, - "description": "Configure locks for Route Tables to avoid accidental changes or deletion", - "potentialBenefits": "Prevents accidental edits/deletions", - "publishedToLearn": false, + "description": "Enable autoscaling for SQL warehouse", + "potentialBenefits": "Improves concurrency and efficiency", "tags": null, - "recommendationResourceType": "Microsoft.Network/routeTables", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f05a3e6d-49db-2740-88e2-2b13706c1f67", + "aprlGuid": "cd77db98-9b13-6e4b-bd2b-74c2cb538628", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-monitoring", - "name": "Azure Traffic Manager endpoint monitoring" - }, - { - "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-monitoring#enable-or-disable-health-checks-preview", - "name": "Enable or disable health checks" + "url": "https://learn.microsoft.com/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" }, { - "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-troubleshooting-degraded", - "name": "Troubleshooting degraded state on Azure Traffic Manager" + "url": "https://learn.microsoft.com/azure/databricks/delta-live-tables/settings#use-autoscaling-to-increase-efficiency-and-reduce-resource-usage", + "name": "Databricks enhanced autoscaling" } ], - "recommendationControl": "High Availability", - "longDescription": "Monitor status should be online to ensure failover for application workload. If Traffic Manager's health shows Degraded, one or more endpoints may also be Degraded.\n", + "recommendationControl": "Scalability", + "longDescription": "Databricks enhanced autoscaling optimizes cluster utilization by automatically allocating cluster resources based on workload volume, with minimal impact on the data processing latency of your pipelines.\n", "pgVerified": true, - "description": "Traffic Manager Monitor Status Should be Online", - "potentialBenefits": "Ensures failover functionality", - "publishedToLearn": false, + "description": "Use Delta Live Tables enhanced autoscaling", + "potentialBenefits": "Optimized resource use and minimal latency", "tags": null, - "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find traffic manager profiles that have an endpoint monitor status of not 'Online'\r\nresources\r\n| where type == \"microsoft.network/trafficmanagerprofiles\"\r\n| mv-expand properties.endpoints\r\n| where properties_endpoints.properties.endpointMonitorStatus != \"Online\"\r\n| project recommendationId = \"f05a3e6d-49db-2740-88e2-2b13706c1f67\", name, id, tags, param1 = strcat('Profile name: ',properties_endpoints.name), param2 = strcat('endpointMonitorStatus: ', properties_endpoints.properties.endpointMonitorStatus)\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5b422a7f-8caa-3d48-becb-511599e5bba9", - "recommendationTypeId": "6cd70072-c45c-4716-bf7b-b35c18e46e72", + "aprlGuid": "3d3e53b5-ebd1-db42-b43b-d4fad74824ec", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-endpoint-types", - "name": "Traffic Manager Endpoint Types" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], "recommendationControl": "High Availability", - "longDescription": "When configuring the Azure traffic manager, provision at least two endpoints to ensure workloads can fail-over to another instance, enhancing reliability and availability.\n", + "longDescription": "To conserve cluster resources, you can terminate a cluster to store its configuration for future reuse or autostart jobs. Clusters can auto-terminate after inactivity, but this only tracks Spark jobs, not local processes, which might still be running even after Spark jobs end.\n", "pgVerified": true, - "description": "Traffic manager profiles should have more than one endpoint", - "potentialBenefits": "Enhances failover capabilities", - "publishedToLearn": false, + "description": "Automatic Job Termination is enabled, ensure there are no user-defined local processes", + "potentialBenefits": "Saves cluster resources, avoids idle use", "tags": null, - "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find traffic manager profiles that have less than 2 endpoints\r\nresources\r\n| where type == \"microsoft.network/trafficmanagerprofiles\"\r\n| where array_length(properties.endpoints) < 2\r\n| project recommendationId = \"5b422a7f-8caa-3d48-becb-511599e5bba9\", name, id, tags, param1 = strcat('EndpointCount: ', array_length(properties.endpoints))\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1ad9d7b7-9692-1441-a8f4-93792efbe97a", - "recommendationTypeId": "0db76759-6d22-4262-93f0-2f989ba2b58e", + "aprlGuid": "7fb90127-5364-bb4d-86fa-30778ed713fb", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/advisor/advisor-reference-reliability-recommendations#add-at-least-one-more-endpoint-to-the-profile-preferably-in-another-azure-region", - "name": "Reliability recommendations" + "url": "https://learn.microsoft.com/en-us/azure/databricks/clusters/configure#cluster-log-delivery", + "name": "Create a cluster" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Profiles should have multiple endpoints to ensure availability in case an endpoint fails. It's also advised to distribute these endpoints across different regions for enhanced reliability.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "When creating a Databricks cluster, you can set a log delivery location for the Spark driver, worker nodes, and events. Logs are delivered every 5 mins and archived hourly. Upon cluster termination, all generated logs until that point are guaranteed to be delivered.\n", "pgVerified": true, - "description": "Configure at least one endpoint within a another region", - "potentialBenefits": "Enhances availability across regions", - "publishedToLearn": false, + "description": "Enable Logging-Cluster log delivery", + "potentialBenefits": "Improved troubleshooting and audit", "tags": null, - "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c31f76a0-48cd-9f44-aa43-99ee904db9bc", - "recommendationTypeId": "0bbe0a49-3c63-49d3-ab4a-aa24198f03f7", + "aprlGuid": "da4ea916-4df3-8c4d-8060-17b49da45977", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/advisor/advisor-reference-reliability-recommendations#add-an-endpoint-configured-to-all-world", - "name": "Add an endpoint configured to \"All (World)\"" - }, - { - "url": "https://aka.ms/Rf7vc5", - "name": "Traffic Manager profile - GeographicProfile (Add an endpoint configured to \"\"All (World)\"\")." + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "For geographic routing, traffic is directed to endpoints based on specific regions. If a region fails, without a predefined failover, configuring an endpoint to \"All (World)\" for geographic profiles can prevent traffic black holes, ensuring service remains available.\n", + "recommendationControl": "High Availability", + "longDescription": "Delta Lake is an open source storage format enhancing data lakes' reliability with ACID transactions, schema enforcement, and scalable metadata handling.\n", "pgVerified": true, - "description": "Ensure endpoint configured to (All World) for geographic profiles", - "potentialBenefits": "Avoids traffic black holing, ensures availability", - "publishedToLearn": false, + "description": "Use Delta Lake for higher reliability", + "potentialBenefits": "Enhances data reliability and processing", "tags": null, - "recommendationResourceType": "Microsoft.Network/trafficManagerProfiles", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Traffic Manager resources that are not confirgured for all-World access\r\nResources\r\n| where type == 'microsoft.network/trafficmanagerprofiles'\r\n| where properties.trafficRoutingMethod =~ \"Geographic\"\r\n| extend endpoints = properties.endpoints\r\n| mv-expand endpoint = endpoints\r\n| where endpoint.properties.geoMapping !contains \"WORLD\"\r\n| extend endpointName = endpoint.name\r\n| project recommendationId=\"c31f76a0-48cd-9f44-aa43-99ee904db9bc\", name, id, tags, param1=strcat(\"endpointName:\",endpointName), param2=strcat(\"GeoMapping:\", tostring(endpoint.properties.geoMapping))\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "30ec8a5e-46de-4323-87e9-a7c56b72813b", + "aprlGuid": "892ca809-e2b5-9a47-924a-71132bf6f902", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#virtual-hub", - "name": "Virtual WAN Monitoring Best Practices" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#use-apache-spark-or-photon-for-distributed-compute", + "name": "Best practices for reliability" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Set up monitoring and alerts for v-Hubs. Create alert rule for ensuring promptly response to changes in BGP status and Data processed by v-Hubs.", - "pgVerified": false, - "description": "Monitor health for v-Hubs", - "potentialBenefits": "Detection and mitigation to avoid disruptions.", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Apache Spark in Databricks Lakehouse ensures resilient distributed data processing by automatically rescheduling failed tasks, aiding in overcoming external issues like network problems or revoked VMs.\n", + "pgVerified": true, + "description": "Use Photon Acceleration", + "potentialBenefits": "Boosts speed and reliability for Spark tasks", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualHubs", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Low", "automationAvailable": false, - "query": null + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d37db635-157f-584d-9bce-4f6fc8c65ce5", - "recommendationTypeId": "8d61a7d4-5405-4f43-81e3-8c6239b844a6", + "aprlGuid": "7e52d64d-8cc0-8548-a593-eb49ab45630d", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering", - "name": "Designing for disaster recovery with ExpressRoute private peering" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "To increase reliability, it's advised that each ExpressRoute gateway connects to at least two circuits, with each circuit originating from a different peering location than the other, ensuring diverse connectivity paths for enhanced resilience.\n", + "recommendationControl": "Business Continuity", + "longDescription": "Invalid or nonconforming data can crash workloads dependent on specific data formats. Best practices recommend filtering such data at ingestion to improve end-to-end resilience, ensuring no data is lost or missed.\n", "pgVerified": true, - "description": "Connect ExpressRoute gateway with circuits from diverse peering locations for resilience", - "potentialBenefits": "Enhanced resiliency for Azure service", - "publishedToLearn": false, + "description": "Automatically rescue invalid or nonconforming data with Databricks Auto Loader or Delta Live Tables", + "potentialBenefits": "Enhanced data resilience and integrity", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of ExpressRoute Gateways that are not connected to two or more ExpressRoute Circuits. Baremetal circuits are excluded from consideration\r\n//This query assumes that the running entity has visibilty to the gateway, connection, and circuit scopes.\r\n//Start with a full list of gateways\r\n(resources\r\n| where type == \"microsoft.network/virtualnetworkgateways\"\r\n| where properties.gatewayType == \"ExpressRoute\"\r\n| extend exrGatewayId = tolower(tostring(id))\r\n| join kind=inner(\r\nresources\r\n| where type == \"microsoft.network/virtualnetworkgateways\"\r\n| where properties.gatewayType == \"ExpressRoute\"\r\n| extend exrGatewayId = tolower(tostring(id))\r\n| join kind=leftouter(\r\n//connections joined with circuit peer info\r\nresources\r\n| where type == \"microsoft.network/connections\"\r\n| extend connectionType = properties.connectionType\r\n| extend exrGatewayId = tolower(tostring(properties.virtualNetworkGateway1.id))\r\n| extend peerId = tolower(tostring(properties.peer.id))\r\n| extend connectionId = tolower(tostring(id))\r\n| where connectionType == \"ExpressRoute\"\r\n| join kind=leftouter(\r\n resources\r\n | where type == \"microsoft.network/expressroutecircuits\"\r\n //should this be location instead of peeringLocation\r\n | extend circuitId = tolower(tostring(id))\r\n | extend peeringLocation = tostring(properties.serviceProviderProperties.peeringLocation)\r\n | extend peerId = tolower(id)\r\n) on peerId ) on exrGatewayId\r\n//remove bare metal services connections/circuits\r\n| where not(isnotnull(connectionId) and isnull(sku1))\r\n//group by gateway ID's and peering locations\r\n| summarize by exrGatewayId, peeringLocation\r\n//summarize to connections with fewer than two unique connections\r\n| summarize connCount = count() by exrGatewayId\r\n| where connCount < 2) on exrGatewayId\r\n| project recommendationId = \"d37db635-157f-584d-9bce-4f6fc8c65ce5\", name, id, tags, param1 = \"twoOrMoreCircuitsConnectedFromDifferentPeeringLocations: false\")\r\n| union\r\n(\r\nresources\r\n| where type == \"microsoft.network/virtualnetworkgateways\"\r\n| where properties.gatewayType == \"ExpressRoute\"\r\n| extend exrGatewayId = tolower(tostring(id))\r\n| join kind=leftouter(\r\n//connections joined with circuit peer info\r\nresources\r\n| where type == \"microsoft.network/connections\"\r\n| extend connectionType = properties.connectionType\r\n| extend exrGatewayId = tolower(tostring(properties.virtualNetworkGateway1.id))\r\n| extend peerId = tolower(tostring(properties.peer.id))\r\n| extend connectionId = tolower(tostring(id))\r\n| where connectionType == \"ExpressRoute\") on exrGatewayId\r\n| where isnull(connectionType)\r\n| project recommendationId = \"d37db635-157f-584d-9bce-4f6fc8c65ce5\", name, id, tags, param1 = \"twoOrMoreCircuitsConnectedFromDifferentPeeringLocations: false\", param2 = \"noConnectionsOnGateway: true\"\r\n)\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "bbe668b7-eb5c-c746-8b82-70afdedf0cae", - "recommendationTypeId": "c9af1ef6-55bc-48af-bfe4-2c80490159f8", + "aprlGuid": "84e44da6-8cd7-b349-b02c-c8bf72cf587c", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/expressroute-about-virtual-network-gateways#zrgw", - "name": "About ExpressRoute virtual network gateways - Zone-redundant gateway SKUs" - }, - { - "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways", - "name": "About zone-redundant virtual network gateway in Azure availability zones" - }, - { - "url": "https://learn.microsoft.com/azure/vpn-gateway/create-zone-redundant-vnet-gateway", - "name": "Create a zone-redundant virtual network gateway in Azure Availability Zones" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], "recommendationControl": "High Availability", - "longDescription": "Azure ExpressRoute gateway offers variable SLAs based on deployment in single or multiple availability zones. To deploy virtual network gateways across zones automatically, use zone-redundant gateways for accessing critical, scalable services with increased resilience.\n", + "longDescription": "Use Databricks and MLflow for deploying models as Spark UDFs for job scheduling, retries, autoscaling. Model serving offers scalable infrastructure, processes models using MLflow, and serves them via REST API using serverless compute managed in Databricks cloud.\n", "pgVerified": true, - "description": "Use Zone-redundant ExpressRoute gateway SKUs", - "potentialBenefits": "Enhanced SLA and resilience", - "publishedToLearn": false, + "description": "Configure jobs for automatic retries and termination", + "potentialBenefits": "Enhanced reliability and autoscaling", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// For all VNGs of type ExpressRoute, show any that do not have AZ in the SKU tier\r\nresources\r\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where properties.gatewayType == \"ExpressRoute\"\r\n| where properties.sku.tier !contains 'AZ'\r\n| project recommendationId = \"bbe668b7-eb5c-c746-8b82-70afdedf0cae\", name, id, tags, param1= strcat(\"sku-tier: \" , properties.sku.tier), param2=location\r\n| order by id asc\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c0f23a92-d322-4d4d-97e9-a238b5e3bbb8", + "aprlGuid": "4cbb7744-ff3d-0447-badb-baf068c95696", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources?tabs=json", - "name": "Protect your Azure resources with a lock - Azure Resource Manager | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "Configuring an Azure Resource lock for ExpressRoute gateway prevents accidental deletion by enabling administrators to lock an Azure subscription, resource group, or resource, thereby protecting them from unintended user deletions and modifications, with the lock overriding all user permissions.\n", + "recommendationControl": "Scalability", + "longDescription": "Use Databricks and MLflow for deploying models as Apache Spark UDFs, benefiting from job scheduling, retries, autoscaling, etc.\n", "pgVerified": true, - "description": "Configure an Azure Resource lock for ExpressRoute gateway to prevent accidental deletion", - "potentialBenefits": "Prevents accidental deletions", - "publishedToLearn": false, + "description": "Use a scalable and production-grade model serving infrastructure", + "potentialBenefits": "Enhances scalability and reliability", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1c34faa8-8b99-974c-adbf-71922eae943c", + "aprlGuid": "1b0d0893-bf0e-8f4c-9dc6-f18f145c1ecf", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/expressroute-monitoring-metrics-alerts#expressroute-gateways", - "name": "ExpressRoute monitoring, metrics, and alerts | ExpressRoute gateways" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/expressroute-network-insights", - "name": "Azure ExpressRoute Insights using Network Insights" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Use Network Insights for monitoring ExpressRoute Gateway's health, including availability, performance, and scalability.\n", + "recommendationControl": "High Availability", + "longDescription": "Curate data by creating a layered architecture to increase data quality across layers. Start with a raw layer for ingested source data, continue with a curated layer for cleansed and refined data, and finish with a final layer catered to business needs, focusing on security and performance.\n", "pgVerified": true, - "description": "Monitor gateway health for ExpressRoute gateways", - "potentialBenefits": "Enhanced monitoring and alerting", - "publishedToLearn": false, + "description": "Use a layered storage architecture", + "potentialBenefits": "Enhances data quality and trust", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "194c14ac-0d7a-5a48-ae32-75fa450ee564", + "aprlGuid": "e93fe702-e385-d741-ba37-1f1656482ecd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/expressroute-about-virtual-network-gateways#vnet-to-vnet-connectivity", - "name": "About ExpressRoute virtual network gateways - VNet-to-VNet connectivity" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "While multiple VNets can connect via the same ExpressRoute gateway, Microsoft recommends using alternatives like VNet peering, Azure Firewall, NVA, Azure Route Server, site-to-site VPN, virtual WAN, or SD-WAN for VNet-to-VNet communication to optimize network performance and management.\n", + "recommendationControl": "Business Continuity", + "longDescription": "Copying data leads to redundancy, lost integrity, lineage, and access issues, affecting lakehouse data quality. Temporary copies are useful for agility and innovation but can become problematic operational data silos, questioning data's master status and currency.\n", "pgVerified": true, - "description": "Avoid using ExpressRoute circuits for VNet to VNet communication", - "potentialBenefits": "Enhanced VNet integration efficiency", - "publishedToLearn": false, + "description": "Improve data integrity by reducing data redundancy", + "potentialBenefits": "Enhanced data integrity and quality", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3e115044-a3aa-433e-be01-ce17d67e50da", + "aprlGuid": "b7e1d13f-54c9-1648-8a52-34c0abe8ce16", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/customer-controlled-gateway-maintenance#azure-portal-steps", - "name": "Configure customer-controlled maintenance for your virtual network gateway - ExpressRoute | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "ExpressRoute gateways are updated for improved functionality, reliability, performance, and security. Customer-controlled maintenance configuration and scheduling minimize update impact and align with your maintenance windows.\n", + "recommendationControl": "Other Best Practices", + "longDescription": "Uncontrolled schema changes can lead to invalid data and failing jobs. Databricks validates and enforces schema through Delta Lake, which prevents bad records during ingestion, and Auto Loader, which detects new columns and supports schema evolution to maintain data integrity.\n", "pgVerified": true, - "description": "Configure customer-controlled ExpressRoute gateway maintenance", - "potentialBenefits": "Minimizes update impact", - "publishedToLearn": false, + "description": "Actively manage schemas", + "potentialBenefits": "Prevents invalid data and job failures", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Virtual Network Gateways without Maintenance Configurations\r\n\r\nresources\r\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\r\n| extend resourceId = tolower(id)\r\n| join kind=leftouter (\r\n maintenanceresources\r\n | where type =~ \"Microsoft.Maintenance/configurationAssignments\"\r\n | project JsonData = parse_json(properties)\r\n | extend maintenanceConfigurationId = tolower(tostring(JsonData.maintenanceConfigurationId))\r\n | join kind=inner (\r\n resources\r\n | where type =~ \"Microsoft.Maintenance/maintenanceConfigurations\"\r\n | project maintenanceConfigurationId=tolower(id)\r\n ) on maintenanceConfigurationId\r\n | project maintenanceConfigurationId, resourceId=tolower(tostring(JsonData.resourceId))\r\n) on resourceId\r\n| where isempty(maintenanceConfigurationId)\r\n| project recommendationId = \"3e115044-a3aa-433e-be01-ce17d67e50da\", name, id, tags, param1= strcat(\"sku-tier: \" , properties.sku.tier), param2=location\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5b1933a6-90e4-f642-a01f-e58594e5aab2", + "aprlGuid": "a42297c4-7e4f-8b41-8d4b-114033263f0e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways", - "name": "Zone redundant Virtual network gateway in availability zone" - }, - { - "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways#gwskus", - "name": "Gateway SKU" - }, - { - "url": "https://www.microsoft.com/licensing/docs/view/Service-Level-Agreements-SLA-for-Online-Services?lang=1", - "name": "SLA summary for Azure services" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#use-constraints-and-data-expectations", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "Azure VPN gateway offers variable SLAs based on deployment in one or two availability zones. Deploying zone-redundant virtual network gateways across availability zones ensures zone-resiliency, improving access to mission-critical, scalable services on Azure.\n", + "recommendationControl": "Business Continuity", + "longDescription": "Delta tables verify data quality automatically with SQL constraints, triggering an error for violations. Delta Live Tables enhance this by defining expectations for data quality, utilizing Python or SQL, to manage actions for record failures, ensuring data integrity and compliance.\n", "pgVerified": true, - "description": "Choose a Zone-redundant VPN gateway", - "potentialBenefits": "Enhanced reliability and scalability", - "publishedToLearn": false, + "description": "Use constraints and data expectations", + "potentialBenefits": "Ensures data quality and integrity", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// For all VNGs of type Vpn, show any that do not have AZ in the SKU tier\r\nresources\r\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where properties.gatewayType == \"Vpn\"\r\n| where properties.sku.tier !contains 'AZ'\r\n| project recommendationId = \"5b1933a6-90e4-f642-a01f-e58594e5aab2\", name, id, tags, param1= strcat(\"sku-tier: \" , properties.sku.tier), param2=location\r\n| order by id asc\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "281a2713-c0e0-3c48-b596-19f590c46671", - "recommendationTypeId": "c249dc0e-9a17-423e-838a-d72719e8c5dd", + "aprlGuid": "932d45d6-b46d-e341-abfb-d97bce832f1f", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/vpn-gateway/active-active-portal#gateway", - "name": "Active-active VPN gateway" - }, - { - "url": "https://learn.microsoft.com/azure/vpn-gateway/vpn-gateway-about-vpn-gateway-settings#gwsku", - "name": "Gateway SKU" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#create-regular-backups", + "name": "Best practices for reliability" } ], - "recommendationControl": "High Availability", - "longDescription": "The active-active mode is available for all SKUs except Basic, allowing for two Gateway IP configurations and two public IP addresses, enhancing redundancy and traffic handling.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "To recover from a failure, regular backups are needed. The Databricks Labs project migrate lets admins create backups by exporting workspace assets using the Databricks CLI/API. These backups help in restoring or migrating workspaces.\n", "pgVerified": true, - "description": "Enable Active-Active VPN Gateways for redundancy", - "potentialBenefits": "Enhanced reliability and network capacity", - "publishedToLearn": false, + "description": "Create regular backups", + "potentialBenefits": "Ensures data recovery and migration", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Identifies non-active-active VPN type virtual network gateways\r\nresources\r\n| where type =~ 'Microsoft.Network/virtualNetworkGateways'\r\n| where properties.gatewayType =~ \"vpn\"\r\n| extend gatewayType = properties.gatewayType, vpnType = properties.vpnType, connections = properties.connections, activeactive=properties.activeActive\r\n| where activeactive == false\r\n| project recommendationId = \"281a2713-c0e0-3c48-b596-19f590c46671\", name, id, tags\r\n\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Low", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "af11fc4c-c06c-4f4c-b98d-6eee6d5c4c70", + "aprlGuid": "12e9d852-5cdc-2743-bffe-ee21f2ef7781", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/vpn-gateway/vpn-gateway-highlyavailable#dual-redundancy-active-active-vpn-gateways-for-both-azure-and-on-premises-networks", - "name": "Dual-redundancy active-active VPN gateways for both Azure and on-premises networks" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#recover-from-structured-streaming-query-failures", + "name": "Best practices for reliability" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Deploying active-active VPN concentrators and Azure VPN Gateways maximizes resilience and availability using a fully-meshed topology with four IPSec tunnels.\n", + "recommendationControl": "High Availability", + "longDescription": "Structured Streaming ensures fault-tolerance and data consistency in streaming queries. With Azure Databricks workflows, you can set up your queries to automatically restart after failure, picking up precisely where they left off.\n", "pgVerified": true, - "description": "Deploy active-active VPN concentrators on your premises for maximum resiliency with VPN gateways", - "potentialBenefits": "Maximizes resilience and availability", - "publishedToLearn": false, + "description": "Recover from Structured Streaming query failures", + "potentialBenefits": "Fault-tolerance and auto-restart for queries", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9eab120e-f6d3-ee49-ba0d-766562ce7df1", + "aprlGuid": "a18d60f8-c98c-ba4e-ad6e-2fac72879df1", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/vpn-gateway/monitor-vpn-gateway-reference", - "name": "VPN gateway data reference" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices#recover-etl-jobs-based-on-delta-time-travel", + "name": "Best practices for reliability" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Set up monitoring and alerts for Virtual Network Gateway health to utilize a variety of metrics for ensuring operational efficiency and prompt response to any disruptions.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Despite thorough testing, a production job can fail or yield unexpected data. Sometimes, repairs are done by adding jobs post-issue identification and pipeline correction.\n", "pgVerified": true, - "description": "Monitor VPN gateway connections and health", - "potentialBenefits": "Improved uptime and issue awareness", - "publishedToLearn": false, + "description": "Recover ETL jobs based on Delta time travel", + "potentialBenefits": "Easy rollback and fix for ETL jobs", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9186dae0-7ddc-8f4b-bea5-55538cea4893", + "aprlGuid": "c0e22580-3819-444d-8546-a80e4ed85c83", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-monitor/essentials/metrics-getting-started", - "name": "Getting started with Azure Metrics Explorer" - }, - { - "url": "https://learn.microsoft.com/azure/vpn-gateway/monitor-vpn-gateway-reference#metrics", - "name": "Monitor VPN gateway" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/reliability/best-practices", + "name": "Best practices for reliability" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "VPN gateway leverages service health to inform users about both planned and unplanned maintenance, ensuring they are notified about modifications to their VPN connectivity.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Databricks Workflows enable efficient error recovery in multi-task jobs by offering a matrix view for issue examination. Fixes can be applied to initiate repair runs targeting only failed and dependent tasks, preserving successful outcomes and thereby saving time and money.\n", "pgVerified": true, - "description": "Enable VPN gateway service health", - "potentialBenefits": "Improves VPN maintenance alerts", - "publishedToLearn": false, + "description": "Use Databricks Workflows and built-in recovery", + "potentialBenefits": "Saves time and money with smart recovery", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4bae5a28-5cf4-40d9-bcf1-623d28f6d917", + "aprlGuid": "4fdb7112-4531-6f48-b60e-c917a6068d9b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/vpn-gateway/about-zone-redundant-vnet-gateways", - "name": "About zone-redundant virtual network gateway in Azure availability zones" + "url": "https://github.com/Azure/AzureDatabricksBestPractices/tree/master", + "name": "Azure Databricks Best Practices" } ], - "recommendationControl": "High Availability", - "longDescription": "For zone-redundant VPN gateways, always use zone-redundant Standard SKU public IPs to avoid deploying all instances in one zone. This ensures the gateway's reliability, applying to both active-passive (single IP) and active-active (dual IP) setups.\n", - "pgVerified": true, - "description": "Deploy zone-redundant VPN gateways with zone-redundant Public IP(s)", - "potentialBenefits": "Enhanced reliability and disaster recovery", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Implementing a disaster recovery pattern is vital for Azure Databricks, ensuring data teams' access even during rare regional outages.\n\nIt is important to note that the Azure Databricks service is not entirely zone redudant and does support zonal failover.\n", + "pgVerified": false, + "description": "Configure a disaster recovery pattern", + "potentialBenefits": "Ensures service continuity during disasters", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworkGateways", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of zone-redundant Azure VPN gateways associated with non-zone-redundant Public IPs\r\nresources\r\n| where type =~ \"Microsoft.Network/virtualNetworkGateways\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where properties.gatewayType == \"Vpn\"\r\n| where properties.sku.tier contains 'AZ'\r\n| mv-expand ipconfig = properties.ipConfigurations\r\n| extend pipId = tostring(ipconfig.properties.publicIPAddress.id)\r\n| join kind=inner (\r\n resources\r\n | where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n | where type == \"microsoft.network/publicipaddresses\"\r\n | where isnull(zones) or array_length(zones) < 3 )\r\n on $left.pipId == $right.id\r\n| project recommendationId = \"4bae5a28-5cf4-40d9-bcf1-623d28f6d917\", name, id, tags, param1 = strcat(\"PublicIpAddressName: \", name1), param2 = strcat (\"PublicIpAddressId: \",id1), param3 = strcat (\"PublicIpAddressTags: \",tags1)\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f0bf9ae6-25a5-974d-87d5-025abec73539", - "recommendationTypeId": "eade5b56-eefd-444f-95c8-23f29e5d93cb", + "aprlGuid": "42aedaa8-6151-424d-b782-b8666c779969", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-network/concepts-and-best-practices", - "name": "Azure Virtual Network - Concepts and best practices | Microsoft Learn" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/vpn-gateway/vpn-gateway-about-vpn-gateway-settings#gwsub", - "name": "GatewaySUbnet" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/route-server/route-server-faq#can-i-associate-a-network-security-group-nsg-to-the-routeserversubnet", - "name": "Can I associate a network security group (NSG) to the RouteServerSubnet?" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/firewall/firewall-faq#are-network-security-groups--nsgs--supported-on-the-azurefirewallsubnet", - "name": "Are Network Security Groups (NSGs) supported on the AzureFirewallSubnet?" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/operational-excellence/best-practices#2-automate-deployments-and-workloads", + "name": "Best practices for operational excellence" } ], - "recommendationControl": "Security", - "longDescription": "Network security groups and application security groups allow filtering of inbound and outbound traffic by IP, port, and protocol, adding a security layer at the Subnet level.\n", - "pgVerified": true, - "description": "All Subnets should have a Network Security Group associated", - "potentialBenefits": "Enhanced subnet security and traffic control", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "The Databricks Terraform provider manages Azure Databricks workspaces and cloud infrastructure flexibly and powerfully.\n", + "pgVerified": false, + "description": "Automate deployments and workloads", + "potentialBenefits": "Efficient, reliable automation", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworks", - "recommendationImpact": "Low", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Subnets without NSG associated\r\nresources\r\n| where type =~ 'Microsoft.Network/virtualnetworks'\r\n| mv-expand subnets = properties.subnets\r\n| extend sn = string_size(subnets.properties.networkSecurityGroup)\r\n| where sn == 0 and subnets.name !in (\"GatewaySubnet\", \"AzureFirewallSubnet\", \"AzureFirewallManagementSubnet\", \"RouteServerSubnet\")\r\n| project recommendationId = \"f0bf9ae6-25a5-974d-87d5-025abec73539\", name, id, tags, param1 = strcat(\"SubnetName: \", subnets.name), param2 = \"NSG: False\"\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "69ea1185-19b7-de40-9da1-9e8493547a5c", + "aprlGuid": "20193ff9-dbcd-a74e-b197-71d7d9d3c1e6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/azure-virtual-network/reliability", - "name": "Reliability and Azure Virtual Network - Microsoft Azure Well-Architected Framework | Microsoft Learn" + "url": "https://learn.microsoft.com/en-us/azure/databricks/lakehouse-architecture/operational-excellence/best-practices#system-monitoring", + "name": "Best practices for operational excellence" } ], - "recommendationControl": "Security", - "longDescription": "Azure DDoS Protection offers enhanced mitigation features against DDoS attacks and is auto-tuned to protect specific resources in a virtual network, combined with application design best practices.\n", - "pgVerified": true, - "description": "Shield public endpoints in Azure VNets with Azure DDoS Standard Protection Plans", - "potentialBenefits": "Enhanced DDoS attack mitigation", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "The Databricks Terraform provider is a flexible, powerful tool for managing Azure Databricks workspaces and cloud infrastructure.\n", + "pgVerified": false, + "description": "Set up monitoring, alerting, and logging", + "potentialBenefits": "Enhanced reliability and automation", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworks", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find virtual networks without DDoS Protection\r\nresources\r\n| where type =~ 'Microsoft.Network/virtualNetworks'\r\n| where isnull(properties.enableDdosProtection) or properties.enableDdosProtection contains \"false\"\r\n| project recommendationId = \"69ea1185-19b7-de40-9da1-9e8493547a5c\", name, id, tags, param1 = strcat(\"EnableDdosProtection: \", properties.enableDdosProtection)\r\n\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "24ae3773-cc2c-3649-88de-c9788e25b463", + "aprlGuid": "397cdebb-9d6e-ab4f-83a1-8c481de0a3a7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-network/virtual-networks-faq", - "name": "Azure Virtual Network FAQ | Microsoft Learn" - }, - { - "url": "https://learn.microsoft.com/azure/architecture/framework/services/networking/network-connectivity/reliability", - "name": "Reliability and Network connectivity - Microsoft Azure Well-Architected Framework | Microsoft LearnNetworking Reliability" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/private-link/availability", - "name": "Azure Private Link availability" + "url": "https://github.com/Azure/AzureDatabricksBestPractices/blob/master/toc.md#deploy-workspaces-in-multiple-subscriptions-to-honor-azure-capacity-limits", + "name": "Azure Databricks Best Practices" } ], - "recommendationControl": "Security", - "longDescription": "Use VNet service endpoints only if Private Link isn't available and no data movement concerns. This feature restricts Azure service access to specified VNet and subnet, enhancing network security and isolating service traffic.\n", - "pgVerified": true, - "description": "When available, use Private Endpoints instead of Service Endpoints for PaaS Services", - "potentialBenefits": "Enhanced security and data isolation", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Customers often naturally divide workspaces by teams or departments. However, it's crucial to also consider Azure Subscription and Azure Databricks (ADB) Workspace limits when partitioning.\n", + "pgVerified": false, + "description": "Deploy workspaces in separate Subscriptions", + "potentialBenefits": "Enhanced limits management, team separation", "tags": null, - "recommendationResourceType": "Microsoft.Network/virtualNetworks", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find Subnets with Service Endpoint enabled for services that offer Private Link\r\nresources\r\n| where type =~ 'Microsoft.Network/virtualnetworks'\r\n| mv-expand subnets = properties.subnets\r\n| extend se = array_length(subnets.properties.serviceEndpoints)\r\n| where se >= 1\r\n| project name, id, tags, subnets, serviceEndpoints=todynamic(subnets.properties.serviceEndpoints)\r\n| mv-expand serviceEndpoints\r\n| project name, id, tags, subnetName=subnets.name, serviceName=tostring(serviceEndpoints.service)\r\n| where serviceName in (parse_json('[\"Microsoft.CognitiveServices\",\"Microsoft.AzureCosmosDB\",\"Microsoft.DBforMariaDB\",\"Microsoft.DBforMySQL\",\"Microsoft.DBforPostgreSQL\",\"Microsoft.EventHub\",\"Microsoft.KeyVault\",\"Microsoft.ServiceBus\",\"Microsoft.Sql\", \"Microsoft.Storage\",\"Microsoft.StorageSync\",\"Microsoft.Synapse\",\"Microsoft.Web\"]'))\r\n| project recommendationId = \"24ae3773-cc2c-3649-88de-c9788e25b463\", name, id, tags, param1 = strcat(\"subnet=\", subnetName), param2=strcat(\"serviceName=\",serviceName), param3=\"ServiceEndpoints=true\"\r\n\r\n" + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f0d4f766-ac19-48c4-b228-4601cc038baa", + "aprlGuid": "5e722c4f-415a-9b4c-bd4c-96b74dce29ad", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-wan/monitoring-best-practices#virtual-wan-gateways", - "name": "Virtual WAN Monitoring Best Practices" + "url": "https://github.com/Azure/AzureDatabricksBestPractices/blob/master/toc.md#consider-isolating-each-workspace-in-its-own-vnet", + "name": "Azure Databricks Best Practices" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Set up monitoring and alerts for v-Hub's VPN Gateway. Create alert rule for ensuring promptly response to critical events such as packet drop counts, BGP status, Gateway overutilization.", + "recommendationControl": "Scalability", + "longDescription": "Deploying only one Databricks Workspace per VNet aligns with Azure Databricks' isolation model.\n", "pgVerified": false, - "description": "Monitor gateway for Site-to-site v-Hub's VPN gateway", - "potentialBenefits": "Detection and mitigation to avoid disruptions.", - "publishedToLearn": false, + "description": "Isolate each workspace in its own VNet", + "potentialBenefits": "Enhanced security and resource isolation", "tags": null, - "recommendationResourceType": "Microsoft.Network/vpnGateways", + "recommendationResourceType": "Microsoft.Databricks/workspaces", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1ceea4b5-1d8b-4be0-9bbe-9594557be51a", + "aprlGuid": "14310ba6-77ad-3641-a2db-57a2218b9bc7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/expressroute/traffic-collector", - "name": "Azure ExpressRoute Traffic Collector" + "url": "https://github.com/Azure/AzureDatabricksBestPractices/blob/master/toc.md#do-not-store-any-production-data-in-default-dbfs-folders", + "name": "Azure Databricks Best Practices" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "ExpressRoute Traffic Collector samples network flows over ExpressRoute Direct or Service-Provider based circuits, sending flow logs to a Log Analytics workspace for analysis or export to visualization tools/SIEM.\n", - "pgVerified": true, - "description": "Ensure ExpressRoute Traffic Collector is enabled and configured for Direct or Provider circuits", - "potentialBenefits": "Enhanced network flow analysis and DR readiness", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Driven by security and data availability concerns, each Azure Databricks Workspace comes with a default DBFS designed for system-level artifacts like libraries and Init scripts, not for production data.\n", + "pgVerified": false, + "description": "Do not Store any Production Data in Default DBFS Folders", + "potentialBenefits": "Enhanced security, data protection", "tags": null, - "recommendationResourceType": "Microsoft.NetworkFunction/azureTrafficCollectors", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b36fd2ac-dd83-664a-ab48-ff7b8d3b189d", + "aprlGuid": "b5af7e26-3939-1b48-8fba-f8d4a475c67a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-monitor/logs/logs-data-export", - "name": "Log Analytics workspace data export in Azure Monitor" - }, - { - "url": "https://learn.microsoft.com/azure/azure-monitor/best-practices-logs#configuration-recommendations", - "name": "Azure Monitor configuration recommendations" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/spot-vms", + "name": "Use Azure Spot Virtual Machines" } ], - "recommendationControl": "Governance", - "longDescription": "Data export in a Log Analytics workspace to an Azure Storage account enhances data protection against regional failures by using geo-redundant (GRS) or geo-zone-redundant storage (GZRS), mainly for compliance and integration with other Azure services and tools.\n", - "pgVerified": true, - "description": "Enable Log Analytics data export to GRS or GZRS", - "potentialBenefits": "Enhances compliance and regional fault tolerance", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure Spot VMs are not suitable for critical production workloads needing high availability and reliability. They are meant for fault-tolerant tasks and can be evicted with 30-seconds notice if Azure needs the capacity, with no SLA guarantees.\n", + "pgVerified": false, + "description": "Do not use Azure Spot VMs for critical Production workloads", + "potentialBenefits": "Ensures high reliability for production", "tags": null, - "recommendationResourceType": "Microsoft.OperationalInsights/workspaces", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4b77191c-cc3c-8c4e-844b-0f56d0927890", + "aprlGuid": "8aa63c34-dd9d-49bd-9582-21ec310dfbdd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-monitor/logs/log-analytics-workspace-health", - "name": "Monitor Log Analytics workspace health" + "url": "https://learn.microsoft.com/azure/databricks/resources/supported-regions#--azure-databricks-control-plane-addresses", + "name": "Azure Databricks control plane addresses" }, { - "url": "https://learn.microsoft.com/azure/azure-monitor/best-practices-logs#configuration-recommendations", - "name": "Azure Monitor configuration recommendations" + "url": "https://github.com/databrickslabs/migrate", + "name": "Migrate - maintained by Databricks Inc." + }, + { + "url": "https://registry.terraform.io/providers/databricks/databricks/latest/docs/guides/experimental-exporter", + "name": "Databricks Terraform Exporter - maintained by Databricks Inc. (Experimental)" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "A health status alert will proactively notify you if a workspace becomes unavailable because of a datacenter or regional failure.\n", - "pgVerified": true, - "description": "Create a health status alert rule for your Log Analytics workspace", - "potentialBenefits": "Early alert for workspace failure", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Move workspaces to in-region control plane for increased regional isolation. Identify current control plane region using the workspace URL and nslookup. When region from CNAME differs from workspace region and an in-region control is available, consider migration using tools provided below.\n", + "pgVerified": false, + "description": "Evaluate regional isolation for workspaces", + "potentialBenefits": "Improves resilience and data sovereignty", "tags": null, - "recommendationResourceType": "Microsoft.OperationalInsights/workspaces", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e93bb813-b356-48f3-9bdf-a06a0a6ba039", + "aprlGuid": "028593be-956e-4736-bccf-074cb10b92f4", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/site-recovery/azure-to-azure-network-mapping#set-up-ip-addressing-for-target-vms", - "name": "Setup network mapping for site recovery" + "url": "https://learn.microsoft.com/azure/databricks/compute/cluster-config-best-practices", + "name": "Compute configuration best practices" + }, + { + "url": "https://learn.microsoft.com/azure/databricks/compute/gpu", + "name": "GPU-enabled compute" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Ensure VM failover settings' static IP addresses are available in the failover subnet to maintain consistent IP assignment during failover, with the target VM receiving the same static IP if it's available or the next available IP otherwise. IP adjustments can be made in VM Network settings.\n", - "pgVerified": true, - "description": "Ensure static IP addresses in Site Recovery VM failover settings are available in failover subnet", - "potentialBenefits": "Smooth failover IP management", - "publishedToLearn": false, + "recommendationControl": "Personalized", + "longDescription": "Azure Databricks planning should include VM SKU swap strategies for capacity issues. VMs are regional, and allocation failures may occur, shown by a \"CLOUD PROVIDER\" error.\n", + "pgVerified": false, + "description": "Define alternate VM SKUs", + "potentialBenefits": "Ensures service availability", "tags": null, - "recommendationResourceType": "Microsoft.RecoveryServices/vaults", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Databricks/workspaces", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "17e877f7-3a89-4205-8a24-0670de54ddcd", + "aprlGuid": "e7495e1c-0c75-0946-b266-b429b5c7f3bf", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/site-recovery/azure-to-azure-tutorial-dr-drill#run-a-test-failover", - "name": "Run a test failover" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-design-overview#when-to-use-scale-sets-instead-of-virtual-machines", + "name": "When to use VMSS instead of VMs" + }, + { + "url": "https://learn.microsoft.com/azure/well-architected/services/compute/virtual-machines/virtual-machines-review", + "name": "Azure Well-Architected Framework review - Virtual Machines and Scale Sets" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Perform a test failover to validate your BCDR strategy and ensure that your applications are functioning correctly in the target region without impacting your production environment. Test your Disaster Recovery plan periodically without any data loss or downtime, using test failovers.\n", + "recommendationControl": "Scalability", + "longDescription": "Deploying even single instance VMs into a scale set with Flexible orchestration mode future-proofs applications for scaling and availability. This mode guarantees high availability (up to 1000 VMs) by distributing VMs across fault domains in a region or within an Availability Zone.\n", "pgVerified": true, - "description": "Validate VM functionality with a Site Recovery test failover to check performance at target", - "potentialBenefits": "Ensures BCDR plan accuracy and VM performance", - "publishedToLearn": false, + "description": "Deploy VMSS with Flex orchestration mode instead of Uniform", + "potentialBenefits": "Higher scalability and availability", "tags": null, - "recommendationResourceType": "Microsoft.RecoveryServices/vaults", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all VMs where replication has been enabled but Test Failover was never performed\r\nrecoveryservicesresources\r\n| where type == \"microsoft.recoveryservices/vaults/replicationfabrics/replicationprotectioncontainers/replicationprotecteditems\"\r\n| where properties.providerSpecificDetails.dataSourceInfo.datasourceType == 'AzureVm' and isnull(properties.lastSuccessfulTestFailoverTime)\r\n| project recommendationId=\"17e877f7-3a89-4205-8a24-0670de54ddcd\" , name = properties.providerSpecificDetails.recoveryAzureVMName, id=properties.providerSpecificDetails.dataSourceInfo.resourceId\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all zonal VMs that are NOT deployed with Flex orchestration mode\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| where properties.orchestrationMode != \"Flexible\"\n| project recommendationId = \"e7495e1c-0c75-0946-b266-b429b5c7f3bf\", name, id, tags, param1 = strcat(\"orchestrationMode: \", tostring(properties.orchestrationMode))\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2912472d-0198-4bdc-aa90-37f145790edc", - "recommendationTypeId": "06578866-1877-41e6-9d22-3ea5122e8048", + "aprlGuid": "94794d2a-eff0-2345-9b67-6f9349d0a627", + "recommendationTypeId": "3b587048-b04b-4f81-aaed-e43793652b0f", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/backup/move-to-azure-monitor-alerts", - "name": "Move to Azure monitor Alerts" - }, - { - "url": "https://azure.microsoft.com/updates/transition-to-builtin-azure-monitor-alerts-for-recovery-services-vaults-in-azure-backup-by-31-march-2026/", - "name": "Classic alerts retirement announcement" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-health-extension?tabs=rest-api", + "name": "Using Application Health extension with Virtual Machine Scale Sets" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Classic alerts for Recovery Services vaults in Azure Backup will be retired on 31 March 2026.\n", + "longDescription": "Monitoring application health in Azure Virtual Machine Scale Sets is crucial for deployment management. It supports rolling upgrades such as automatic OS-image upgrades and VM guest patching, leveraging health monitoring for upgrading.\n", "pgVerified": true, - "description": "Migrate from classic alerts to built-in Azure Monitor alerts for Azure Recovery Services Vaults", - "potentialBenefits": "Enhanced, scalable, and consistent alerting.", - "publishedToLearn": false, + "description": "Enable Azure Virtual Machine Scale Set Application Health Monitoring", + "potentialBenefits": "Enhances deployment management and upgrades", "tags": null, - "recommendationResourceType": "Microsoft.RecoveryServices/vaults", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This Resource Graph query will return all Recovery services vault with Classic alerts enabled.\r\nresources\r\n| where type in~ ('microsoft.recoveryservices/vaults')\r\n| extend monitoringSettings = parse_json(properties).monitoringSettings\r\n| extend isUsingClassicAlerts = case(isnull(monitoringSettings),'Enabled',monitoringSettings.classicAlertSettings.alertsForCriticalOperations)\r\n| extend isUsingJobsAlerts = case(isnull(monitoringSettings), 'Enabled', monitoringSettings.azureMonitorAlertSettings.alertsForAllJobFailures)\r\n| where isUsingClassicAlerts == 'Enabled'\r\n| project recommendationId = \"2912472d-0198-4bdc-aa90-37f145790edc\", name, id, tags, param1=strcat(\"isUsingClassicAlerts: \", isUsingClassicAlerts), param2=strcat(\"isUsingJobsAlerts: \", isUsingJobsAlerts)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs that do NOT have health monitoring enabled\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| join kind=leftouter (\n resources\n | where type == \"microsoft.compute/virtualmachinescalesets\"\n | mv-expand extension=properties.virtualMachineProfile.extensionProfile.extensions\n | where extension.properties.type in ( \"ApplicationHealthWindows\", \"ApplicationHealthLinux\" )\n | project id\n) on id\n| where id1 == \"\"\n| project recommendationId = \"94794d2a-eff0-2345-9b67-6f9349d0a627\", name, id, tags, param1 = \"extension: null\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1549b91f-2ea0-4d4f-ba2a-4596becbe3de", - "recommendationTypeId": "9b1308f1-4c25-4347-a061-7cc5cd6a44ab", + "aprlGuid": "820f4743-1f94-e946-ae0b-45efafd87962", + "recommendationTypeId": "b4d988a9-85e6-4179-b69c-549bdd8a55bb", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/backup/backup-create-recovery-services-vault#set-cross-region-restore", - "name": "Set Cross Region Restore" - }, - { - "url": "https://learn.microsoft.com/azure/backup/guidance-best-practices", - "name": "Azure Backup Best Practices" - }, - { - "url": "https://learn.microsoft.com/azure/backup/backup-rbac-rs-vault#minimum-role-requirements-for-azure-vm-backup", - "name": "Minimum Role Requirements for Cross Region Restore" - }, - { - "url": "https://learn.microsoft.com/azure/backup/backup-azure-arm-vms-prepare", - "name": "Recovery Services Vault" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-instance-repairs#requirements-for-using-automatic-instance-repairs", + "name": "Automatic instance repairs for Azure Virtual Machine Scale Sets" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Cross Region Restore enables the restoration of Azure VMs in a secondary, Azure paired region, facilitating drills for audit or compliance and allowing recovery of VMs or disks in the event of a primary region disaster. It is an opt-in feature available exclusively for GRS vaults.\n", + "recommendationControl": "High Availability", + "longDescription": "Enabling automatic instance repairs in Azure Virtual Machine Scale Sets enhances application availability through a continuous health check and maintenance process.\n", "pgVerified": true, - "description": "Enable Cross Region Restore for your GRS Recovery Services Vault", - "potentialBenefits": "Enhances disaster recovery capabilities", - "publishedToLearn": false, + "description": "Enable Automatic Repair Policy on Azure Virtual Machine Scale Sets", + "potentialBenefits": "Boosts app availability by auto-repair", "tags": null, - "recommendationResourceType": "Microsoft.RecoveryServices/vaults", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Displays all recovery services vaults that do not have cross region restore enabled\r\nresources\r\n| where type =~ \"Microsoft.RecoveryServices/vaults\" and\r\n properties.redundancySettings.standardTierStorageRedundancy =~ \"GeoRedundant\" and\r\n properties.redundancySettings.crossRegionRestore !~ \"Enabled\"\r\n| extend\r\n param1 = strcat(\"CrossRegionRestore: \", properties.redundancySettings.crossRegionRestore),\r\n param2 = strcat(\"StorageReplicationType: \", properties.redundancySettings.standardTierStorageRedundancy)\r\n| project recommendationId = \"1549b91f-2ea0-4d4f-ba2a-4596becbe3de\", name, id, tags, param1, param2\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs that do NOT have automatic repair policy enabled\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| where properties.automaticRepairsPolicy.enabled == false\n| project recommendationId = \"820f4743-1f94-e946-ae0b-45efafd87962\", name, id, tags, param1 = \"automaticRepairsPolicy: Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9e39919b-78af-4a0b-b70f-c548dae97c25", + "aprlGuid": "ee66ff65-9aa3-2345-93c1-25827cf79f44", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/backup/backup-azure-security-feature-cloud?tabs=azure-portal", - "name": "Soft Delete for Azure Backup" + "url": "https://learn.microsoft.com/azure/azure-monitor/autoscale/autoscale-get-started?WT.mc_id=Portal-Microsoft_Azure_Monitoring", + "name": "Get started with autoscale in Azure" + }, + { + "url": "https://learn.microsoft.com/azure/azure-monitor/autoscale/autoscale-overview", + "name": "Overview of autoscale in Azure" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "With soft delete, if backup data is deleted, the backup data is retained for 14 additional days, allowing the recovery of that backup item with no data loss with no cost to you. Soft delete is enabled by default. Disabling this feature isn't recommended.\n", - "pgVerified": false, - "description": "Enable Soft Delete for Recovery Services Vaults in Azure Backup", - "potentialBenefits": "Enhances disaster recovery capabilities", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Use custom autoscale for VMSS based on metrics and schedules to improve performance and cost effectiveness, adjusting instances as demand changes.\n", + "pgVerified": true, + "description": "Configure VMSS Autoscale to custom and configure the scaling metrics", + "potentialBenefits": "Enhances performance and cost-efficiency", "tags": null, - "recommendationResourceType": "Microsoft.RecoveryServices/vaults", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Azure Recovery Services vaults that do not have soft delete enabled\r\nresources\r\n| where type == \"microsoft.recoveryservices/vaults\"\r\n| mv-expand issoftDelete=properties.securitySettings.softDeleteSettings.softDeleteState\r\n| where issoftDelete == 'Disabled'\r\n| project recommendationId = \"9e39919b-78af-4a0b-b70f-c548dae97c25\", name, id, tags, param1=strcat(\"Soft Delete: \",issoftDelete)\r\n" + "query": "// Azure Resource Graph Query\n// Find VMSS instances associated with autoscale settings when autoscale is disabled\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| project name, id, tags\n| join kind=leftouter (\n resources\n | where type == \"microsoft.insights/autoscalesettings\"\n | where tostring(properties.targetResourceUri) contains \"Microsoft.Compute/virtualMachineScaleSets\"\n | project id = tostring(properties.targetResourceUri), autoscalesettings = properties\n) on id\n| where isnull(autoscalesettings) or autoscalesettings.enabled == \"false\"\n| project recommendationId = \"ee66ff65-9aa3-2345-93c1-25827cf79f44\", name, id, tags, param1 = \"autoscalesettings: Disabled\"\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "98bd7098-49d6-491b-86f1-b143d6b1a0ff", + "aprlGuid": "3f85a51c-e286-9f44-b4dc-51d00768696c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/overview#resource-group-location-alignment", - "name": "Azure Resource Manager Overview" + "url": "https://learn.microsoft.com/azure/azure-monitor/autoscale/autoscale-predictive", + "name": "Use predictive autoscale to scale out before load demands in virtual machine scale sets" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Ensure resource locations align with their resource group to manage resources during regional outages. ARM stores resource data, which if in an unavailable region, could halt updates, rendering resources read-only.\n", + "recommendationControl": "Scalability", + "longDescription": "Predictive autoscale utilizes machine learning to efficiently manage and scale Azure Virtual Machine Scale Sets by forecasting CPU load through historical usage analysis, ensuring timely scale-out to meet demand.\n", "pgVerified": true, - "description": "Ensure Resource Group and its Resources are located in the same Region", - "potentialBenefits": "Improves outage management", - "publishedToLearn": false, + "description": "Enable Predictive autoscale and configure at least for Forecast Only", + "potentialBenefits": "Optimizes scaling with ML predictions", "tags": null, - "recommendationResourceType": "Microsoft.Resources/resourceGroups", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure Resource Groups that have resources deployed in a region different than the Resource Group region\r\nresourcecontainers\r\n| where type =~ \"Microsoft.Resources/subscriptions/resourceGroups\"\r\n| project resourceGroupId = tolower(id), resourceGroupLocation = location\r\n| join kind = inner (\r\n resources\r\n | where location !~ \"Global\" and // Exclude global resources\r\n resourceGroup !~ \"NetworkWatcherRG\" and // Exclude resources in the NetworkWatcherRG\r\n id has \"/resourceGroups/\" // Exclude resources not in a resource group\r\n | project id, name, tags, resourceGroup, location, resourceGroupId = tolower(strcat_array(array_slice(split(id, \"/\"), 0, 4), \"/\"))\r\n )\r\n on resourceGroupId\r\n| where resourceGroupLocation !~ location\r\n| project\r\n recommendationId = \"98bd7098-49d6-491b-86f1-b143d6b1a0ff\",\r\n name,\r\n id,\r\n tags,\r\n param1 = strcat(\"resourceLocation: \", location),\r\n param2 = strcat(\"resourceGroupLocation: \", resourceGroupLocation),\r\n param3 = strcat(\"resourceGroup: \", resourceGroup)\r\n" + "query": "// Azure Resource Graph Query\n// Find VMSS instances associated with autoscale settings when predictiveAutoscalePolicy_scaleMode is disabled\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| project name, id, tags\n| join kind=leftouter (\n resources\n | where type == \"microsoft.insights/autoscalesettings\"\n | where tostring(properties.targetResourceUri) contains \"Microsoft.Compute/virtualMachineScaleSets\"\n | project id = tostring(properties.targetResourceUri), autoscalesettings = properties\n) on id\n| where autoscalesettings.enabled == \"true\" and autoscalesettings.predictiveAutoscalePolicy.scaleMode == \"Disabled\"\n| project recommendationId = \"3f85a51c-e286-9f44-b4dc-51d00768696c\", name, id, tags, param1 = \"predictiveAutoscalePolicy_scaleMode: Disabled\"\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "20057905-262c-49fe-a9be-49f423afb359", + "aprlGuid": "b5a63aa0-c58e-244f-b8a6-cbba0560a6db", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/services/messaging/service-bus/reliability", - "name": "Service Bus and reliability" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/service-bus-messaging/service-bus-geo-dr#availability-zones", - "name": "Azure Service Bus Geo-disaster recovery" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/service-bus-messaging/service-bus-outages-disasters", - "name": "Insulate Azure Service Bus applications against outages and disasters" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-scale-in-policy", + "name": "Use scale-in policies with Azure Virtual Machine Scale Sets" } ], "recommendationControl": "High Availability", - "longDescription": "Availability zones are now enabled by default on new namespaces where possible. Existing namespaces are being migrated to availability zones where possible. The property zoneRedundant might still show as false, even when availability zones has been enabled.\n", - "pgVerified": false, - "description": "Enable Availability Zones for Service Bus namespaces", - "potentialBenefits": "Enhances fault tolerance and uptime", - "publishedToLearn": false, + "longDescription": "Microsoft advises disabling strictly even VM instance distribution across Availability Zones in VMSS to improve scalability and flexibility, noting that uneven distribution may better serve application load demands despite the potential trade-off in resilience.\n", + "pgVerified": true, + "description": "Disable Force strictly even balance across zones to avoid scale in and out fail attempts", + "potentialBenefits": "Improves scaling, reduces fail attempts", "tags": null, - "recommendationResourceType": "Microsoft.ServiceBus/namespaces", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find VMSS instances where strictly zoneBalance is set to True\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| where properties.orchestrationMode == \"Uniform\" and properties.zoneBalance == true\n| project recommendationId = \"b5a63aa0-c58e-244f-b8a6-cbba0560a6db\", name, id, tags, param1 = \"strictly zoneBalance: Enabled\"\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d810e3a8-600f-4be1-895b-1a93e61d37fd", + "aprlGuid": "1422c567-782c-7148-ac7c-5fc14cf45adc", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/service-bus-messaging/automate-update-messaging-units", - "name": "Service Bus auto-scaling" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-availability-zones", + "name": "Create a Virtual Machine Scale Set that uses Availability Zones" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-use-availability-zones?tabs=cli-1%2Cportal-2#update-scale-set-to-add-availability-zones", + "name": "Update scale set to add availability zones" } ], "recommendationControl": "High Availability", - "longDescription": "Use Service Bus with auto-scale for high availability. The Premium SKU supports auto-scale, ensuring that the resources are automatically scaled based on the load.\n", - "pgVerified": false, - "description": "Enable auto-scale for production workloads on Service Bus namespaces", - "potentialBenefits": "Ensures high availability and performance", - "publishedToLearn": false, + "longDescription": "When creating VMSS, implement availability zones as a protection measure for your applications and data against the rare event of datacenter failure.\n", + "pgVerified": true, + "description": "Deploy VMSS across availability zones with VMSS Flex", + "potentialBenefits": "Enhances disaster resilience", "tags": null, - "recommendationResourceType": "Microsoft.ServiceBus/namespaces", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find VMSS instances with one or no Zones selected\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| where array_length(zones) <= 1 or isnull(zones)\n| project recommendationId = \"1422c567-782c-7148-ac7c-5fc14cf45adc\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f075a1bd-de9e-4819-9a1d-1ac41037a74f", + "aprlGuid": "e4ffd7b0-ba24-c84e-9352-ba4819f908c0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.microsoft.com/updates/azure-support-tls-will-end-by-31-october-2024-2/", - "name": "Azure support for TLS 1.0 and TLS 1.1 will end by 31 October 2024" + "url": "https://learn.microsoft.com/azure/virtual-machines/automatic-vm-guest-patching", + "name": "Automatic VM Guest Patching for Azure VMs" }, { - "url": "https://learn.microsoft.com/azure/service-bus-messaging/transport-layer-security-configure-minimum-version", - "name": "Configure the minimum TLS version for a Service Bus namespace" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-automatic-upgrade", + "name": "Auto OS Image Upgrades" } ], - "recommendationControl": "Service Upgrade and Retirement", - "longDescription": "As of 31 October 2024, TLS 1.0 and TLS 1.1 will no longer be supported on Azure including Service Bus to enhance security and provide best-in-class encryption for your data. Change the minimum TLS version for your Service Bus namespace to TLS v1.2 or higher.\n", - "pgVerified": false, - "description": "Configure the minimum TLS version for Service Bus namespaces to TLS v1.2 or higher", - "potentialBenefits": "Avoids service disruption", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "Enabling automatic VM guest patching eases update management by safely, automatically patching virtual machines to maintain security compliance, while limiting blast radius of VMs. Note, the KQL will not return sets using Uniform orchestration.\n", + "pgVerified": true, + "description": "Set Patch orchestration options to Azure-orchestrated", + "potentialBenefits": "Eases patch management, enhances security", "tags": null, - "recommendationResourceType": "Microsoft.ServiceBus/namespaces", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Service Bus Namespace resources that have the lower minimum TLS version.\r\nresources\r\n| where type =~ \"Microsoft.ServiceBus/namespaces\"\r\n| where properties.minimumTlsVersion in (\"1.0\", \"1.1\")\r\n| project\r\n recommendationId = \"f075a1bd-de9e-4819-9a1d-1ac41037a74f\",\r\n name,\r\n id,\r\n tags,\r\n param1 = strcat(\"minimumTlsVersion: \", properties.minimumTlsVersion)\r\n" + "query": "// Azure Resource Graph query\n// Identifies VMs and VMSS with manual patch settings, excluding automatic patch modes\nresources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| join kind=inner (\n resources\n | where type == \"microsoft.compute/virtualmachines\"\n | project id = tostring(properties.virtualMachineScaleSet.id), vmproperties = properties\n) on id\n| extend recommendationId = \"e4ffd7b0-ba24-c84e-9352-ba4819f908c0\", param1 = \"patchMode: Manual\", vmproperties.osProfile.linuxConfiguration.patchSettings.patchMode\n| where isnotnull(vmproperties.osProfile.linuxConfiguration) and vmproperties.osProfile.linuxConfiguration.patchSettings.patchMode !in (\"AutomaticByPlatform\", \"AutomaticByOS\")\n| distinct recommendationId, name, id, param1\n| union (resources\n| where type == \"microsoft.compute/virtualmachinescalesets\"\n| join kind=inner (\n resources\n | where type == \"microsoft.compute/virtualmachines\"\n | project id = tostring(properties.virtualMachineScaleSet.id), vmproperties = properties\n) on id\n| extend recommendationId = \"e4ffd7b0-ba24-c84e-9352-ba4819f908c0\", param1 = \"patchMode: Manual\", vmproperties.osProfile.windowsConfiguration.patchSettings.patchMode\n| where isnotnull(vmproperties.osProfile.windowsConfiguration) and vmproperties.osProfile.windowsConfiguration.patchSettings.patchMode !in (\"AutomaticByPlatform\", \"AutomaticByOS\")\n| distinct recommendationId, name, id, param1)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6a8b3db9-5773-413a-a127-4f7032f34bbd", - "recommendationTypeId": null, + "aprlGuid": "83d61669-7bd6-9642-a305-175db8adcdf4", + "recommendationTypeId": "3b739bd1-c193-4bb6-a953-1362ee3b03b2", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-signalr/availability-zones", - "name": "Availability zones support in Azure SignalR Service" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/deprecated-images", + "name": "Deprecated Azure Marketplace images" } ], - "recommendationControl": "High Availability", - "longDescription": "Use SignalR with zone redundancy for production to improve uptime. This feature, available in the Premium tier, is activated upon creating or upgrading to Premium. Standard can upgrade to Premium without downtime.\n", - "pgVerified": false, - "description": "Enable zone redundancy for SignalR", - "potentialBenefits": "Enhances reliability and uptime", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "Ensure current versions of images are in use to avoid disruption after image deprecation. Please review the publisher, offer, sku information of the VM to ensure you are running on a supported image. Enable Auto Guest Patching or Image Upgrades, to get notifications about image deprecation.\n", + "pgVerified": true, + "description": "Upgrade VMSS Image versions scheduled to be deprecated or already retired", + "potentialBenefits": "Avoid disruptions by updating VMSS images.", "tags": null, - "recommendationResourceType": "Microsoft.SignalRService/SignalR", + "recommendationResourceType": "Microsoft.Compute/virtualMachineScaleSets", "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find SignalR instances that are not configured with the Premium tier\r\nresources\r\n| where type == \"microsoft.signalrservice/signalr\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where sku.tier != \"Premium\"\r\n| project recommendationId = \"6a8b3db9-5773-413a-a127-4f7032f34bbd\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\r\n| order by id asc\r\n\r\n" + "automationAvailable": false, + "query": "//cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f8f834a9-c761-4e84-b2cb-ac55494d0c37", + "aprlGuid": "273f6b30-68e0-4241-85ea-acf15ffb60bf", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/high-availability-sla-local-zone-redundancy?view=azuresql-mi#zone-redundant-availability", - "name": "High availability through zone-redundancy" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-orchestration-modes#what-has-changed-with-flexible-orchestration-mode", + "name": "What has changed with Flexible orchestration mode" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-attach-detach-vm?branch=main&tabs=portal-1%2Cportal-2%2Cportal-3", + "name": "Attach or detach a Virtual Machine to or from a Virtual Machine Scale Set" } ], "recommendationControl": "High Availability", - "longDescription": "By default, Azure SQL Database premium tier provisions multiple copies within the same region. For geo redundancy, databases can be set as Zone Redundant, distributing copies across Azure Availability Zones to maintain availability during regional outages.\n", - "pgVerified": false, - "description": "Enable zone redundancy for Azure SQL Managed Instance to achieve high availability and resiliency", - "potentialBenefits": "Enhanced availability and reliability", - "publishedToLearn": false, + "longDescription": "Production VM workloads should be deployed on multiple VMs and grouped in a VMSS Flex instance to intelligently distribute across the platform, minimizing the impact of platform faults and updates.\n", + "pgVerified": true, + "description": "Run production workloads on two or more VMs using VMSS Flex", + "potentialBenefits": "Enhanced fault/update resilience", "tags": null, - "recommendationResourceType": "Microsoft.Sql/managedInstances", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs that are not associated with a VMSS Flex instance\nresources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnull(properties.virtualMachineScaleSet.id)\n| project recommendationId=\"273f6b30-68e0-4241-85ea-acf15ffb60bf\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "15e2712c-f3ea-4a8d-9081-11e822b1ccfb", - "recommendationTypeId": null, + "aprlGuid": "2bd0be95-a825-6f47-a8c6-3db1fb5eb387", + "recommendationTypeId": "066a047a-9ace-45f4-ac50-6325840a6b00", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/automated-backups-overview?view=azuresql-mi&preserve-view=true#backup-storage-redundancy", - "name": "Backup storage redundancy" + "url": "https://learn.microsoft.com/azure/virtual-machines/create-portal-availability-zone?tabs=standard", + "name": "Create virtual machines in an availability zone using the Azure portal" } ], "recommendationControl": "High Availability", - "longDescription": "This copies your backups synchronously across three Azure availability zones in the primary region, if Geo is selected it creates 3 more copies in a secondary region.\n", - "pgVerified": false, - "description": "Use Zone-redundant or Geo-zone-redundant Backup storage redundancy", - "potentialBenefits": "Enhanced availability and reliability", - "publishedToLearn": false, + "longDescription": "Azure Availability Zones, within each Azure region, are tolerant to local failures, protecting applications and data against unlikely Datacenter failures by being physically separate.\n", + "pgVerified": true, + "description": "Deploy VMs across Availability Zones", + "potentialBenefits": "Enhanced VM resilience to failures", "tags": null, - "recommendationResourceType": "Microsoft.Sql/managedInstances", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs that are not assigned to a Zone\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnull(zones)\n| project recommendationId=\"2bd0be95-a825-6f47-a8c6-3db1fb5eb387\", name, id, tags, param1=\"No Zone\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c14de326-2729-4be7-a91f-4ea185d24b10", + "aprlGuid": "a8d25876-7951-b646-b4e8-880c9031596b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/connection-types-overview?view=azuresql#connection-types", - "name": "Connection types" + "url": "https://learn.microsoft.com/azure/virtual-machine-scale-sets/flexible-virtual-machine-scale-sets-migration-resources", + "name": "Migrate deployments and resources to Virtual Machine Scale Sets in Flexible orchestration" } ], - "recommendationControl": "Scalability", - "longDescription": "Redirect mode enables direct connectivity to the instance resulting in improved latency and throughput. Redirect mode applies to the VNet-local endpoint only, while the public endpoint will always default to Proxy connection mode.\n", - "pgVerified": false, - "description": "Use Redirect connection type to accelerate application access", - "potentialBenefits": "Improved latency and throughput", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "While availability sets are not scheduled for immediate deprecation, they are planned to be deprecated in the future. Migrate workloads from VMs to VMSS Flex for deployment across zones or within the same zone across different fault domains (FDs) for better reliability.\n", + "pgVerified": true, + "description": "Migrate VMs using availability sets to VMSS Flex", + "potentialBenefits": "Enhances reliability and future-proofs VMs", "tags": null, - "recommendationResourceType": "Microsoft.Sql/managedInstances", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs using Availability Sets\nresources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnotnull(properties.availabilitySet)\n| project recommendationId = \"a8d25876-7951-b646-b4e8-880c9031596b\", name, id, tags, param1=strcat(\"availabilitySet: \",properties.availabilitySet.id)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "257cd903-700f-4a79-bd37-7dce2b511df4", - "recommendationTypeId": null, + "aprlGuid": "cfe22a65-b1db-fd41-9e8e-d573922709ae", + "recommendationTypeId": "ed651749-cd37-4fd5-9897-01b416926745", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/failover-group-sql-mi?view=azuresql", - "name": "Failover groups overview and best practices" + "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#virtual-machines", + "name": "Resiliency checklist for Virtual Machines" + }, + { + "url": "https://learn.microsoft.com/azure/site-recovery/site-recovery-test-failover-to-azure", + "name": "Run a test failover (disaster recovery drill) to Azure" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "If an outage impacts one or more of the databases in the managed instance, you can manually or automatically failover all the databases inside the instance to a secondary region.\n", - "pgVerified": false, - "description": "Configure a secondary instance and a Failover group to enable failover to another region", - "potentialBenefits": "Ensure seamless service with cross-region failover", - "publishedToLearn": false, + "longDescription": "Replicating Azure VMs via Site Recovery entails continuous, asynchronous disk replication to a target region. Recovery points are generated every few minutes, ensuring a Recovery Point Objective (RPO) in minutes.\n", + "pgVerified": true, + "description": "Replicate VMs using Azure Site Recovery", + "potentialBenefits": "Minimize downtime in disasters", "tags": null, - "recommendationResourceType": "Microsoft.Sql/managedInstances", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs that do NOT have replication with ASR enabled\nresources\n| where type =~ \"Microsoft.Compute/virtualMachines\"\n| extend securityType = iif(isnull(properties.securityProfile.securityType), \"Standard\", properties.securityProfile.securityType)\n| where securityType !in~ (\"TrustedLaunch\", \"ConfidentialVM\")\n| project id, vmIdForJoin = tolower(id), name, tags\n| join kind = leftouter (\n recoveryservicesresources\n | where type =~ \"Microsoft.RecoveryServices/vaults/replicationFabrics/replicationProtectionContainers/replicationProtectedItems\"\n and properties.providerSpecificDetails.dataSourceInfo.datasourceType =~ \"AzureVm\"\n | project vmResourceId = tolower(properties.providerSpecificDetails.dataSourceInfo.resourceId)\n )\n on $left.vmIdForJoin == $right.vmResourceId\n| where isempty(vmResourceId)\n| project recommendationId = \"cfe22a65-b1db-fd41-9e8e-d573922709ae\", name, id, tags\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c9afeb1e-e706-4809-be4e-75d9fac708f2", - "recommendationTypeId": null, + "aprlGuid": "122d11d7-b91f-8747-a562-f56b79bcfbdc", + "recommendationTypeId": "57ecb3cd-f2b4-4cad-8b3a-232cca527a0b", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://techcommunity.microsoft.com/t5/azure-sql/monitoring-options-available-for-azure-sql-managed-instance/ba-p/1065416", - "name": "Azure SQL Managed Instance monitoring options" + "url": "https://learn.microsoft.com/azure/virtual-machines/unmanaged-disks-deprecation", + "name": "Migrate your Azure unmanaged disks by Sep 30, 2025" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machines/windows/convert-unmanaged-to-managed-disks", + "name": "Migrate Windows VM from unmanaged disks to managed disks" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machines/linux/convert-unmanaged-to-managed-disks", + "name": "Migrate Linux VM from unmanaged disks to managed disks" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring and alerting are an important part of database operations. When working with Azure SQL Database, make use of Azure Monitor and SQL Insights to ensure that you capture relevant database metrics.\n", - "pgVerified": false, - "description": "Monitor your Azure SQL MI Managed Instance in near-real time to detect reliability incidents", - "potentialBenefits": "Quick incident detection and response", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure is retiring unmanaged disks on September 30, 2025. Users should plan the migration to avoid disruptions and maintain service reliability.\n", + "pgVerified": true, + "description": "Use Managed Disks for VM disks", + "potentialBenefits": "Avoid retirement disruption, enhance reliability", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs that are not using Managed Disks\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnull(properties.storageProfile.osDisk.managedDisk)\n| project recommendationId = \"122d11d7-b91f-8747-a562-f56b79bcfbdc\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9fad5392-b852-4807-9b6d-3f700ff9771a", + "aprlGuid": "4ea2878f-0d69-8d4a-b715-afc10d1e538e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-sql/database/always-encrypted-landing?view=azuresql", - "name": "Overview of Always Encrypted" + "url": "https://learn.microsoft.com/azure/virtual-machines/managed-disks-overview#data-disk", + "name": "Introduction to Azure managed disks - Data disks" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-types", + "name": "Azure managed disk types" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "It is highly recommended to use Azure Key Vault (AKV) to store encryption keys related to Always Encrypted configurations, however it is not required. If you are not using AKV, then ensure that your keys are properly backed up and stored in a secure manner.\n", - "pgVerified": false, - "description": "Back Up Your Keys", - "potentialBenefits": "Enhanced security and data recovery", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "A data disk is a managed disk attached to a virtual machine for storing database or other essential data. These disks are SCSI drives labeled as per choice.\n", + "pgVerified": true, + "description": "Host database data on a data disk", + "potentialBenefits": "Enhances performance, recovery, migration flexibility", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs that only have OS Disk\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where array_length(properties.storageProfile.dataDisks) < 1\n| project recommendationId = \"4ea2878f-0d69-8d4a-b715-afc10d1e538e\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "74c2491d-048b-0041-a140-935960220e20", - "recommendationTypeId": "2ea11bcb-dfd0-48dc-96f0-beba578b989a", + "aprlGuid": "1981f704-97b9-b645-9c57-33f8ded9261a", + "recommendationTypeId": "651c7925-17a3-42e5-85cd-73bd095cf27f", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/active-geo-replication-overview", - "name": "Active Geo Replication" + "url": "https://learn.microsoft.com/azure/backup/backup-overview", + "name": "What is the Azure Backup service?" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "Active Geo Replication ensures business continuity by utilizing readable secondary database replicas. In case of primary database failure, manually failover to secondary database. Secondaries, up to four, can be in same/different regions, used for read-only access.\n", + "longDescription": "Enable backups for your virtual machines with Azure Backup to secure and quickly recover your data. This service offers simple, secure, and cost-effective solutions for backing up and recovering data from the Microsoft Azure cloud.\n", "pgVerified": true, - "description": "Use Active Geo Replication to Create a Readable Secondary in Another Region", - "potentialBenefits": "Enhanced disaster recovery and read scalability", - "publishedToLearn": false, + "description": "Backup VMs with Azure Backup service", + "potentialBenefits": "Secure data recovery and backup", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of SQL databases that are not part of Geo Replication.\r\nresources\r\n| where type == \"microsoft.sql/servers/databases\"\r\n| summarize secondaryTypeCount = countif(isnotempty(properties.secondaryType)) by name\r\n| where secondaryTypeCount == 0\r\n| join kind=inner (\r\n Resources\r\n | where type == \"microsoft.sql/servers/databases\"\r\n) on name\r\n| extend param1 = \"Not part of Geo Replication\"\r\n| project recommendationId = \"74c2491d-048b-0041-a140-935960220e20\", name, id, tags, param1\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs that do NOT have Backup enabled\n// Run query to see results.\nresources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| project name, id, tags\n| join kind=leftouter (\n recoveryservicesresources\n | where type =~ 'Microsoft.RecoveryServices/vaults/backupFabrics/protectionContainers/protectedItems'\n | where properties.dataSourceInfo.datasourceType =~ 'Microsoft.Compute/virtualMachines'\n | project idBackupEnabled=properties.sourceResourceId\n | extend name=strcat_array(array_slice(split(idBackupEnabled, '/'), 8, -1), '/')\n) on name\n| where isnull(idBackupEnabled)\n| project-away idBackupEnabled\n| project-away name1\n| project recommendationId = \"1981f704-97b9-b645-9c57-33f8ded9261a\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "943c168a-2ec2-a94c-8015-85732a1b4859", + "aprlGuid": "98b334c0-8578-6046-9e43-b6e8fce6318e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/auto-failover-group-overview?tabs=azure-powershell", - "name": "AutoFailover Groups" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/designing-cloud-solutions-for-disaster-recovery", - "name": "DR Design" + "url": "https://learn.microsoft.com/azure/virtual-machines/states-billing?context=%2Ftroubleshoot%2Fazure%2Fvirtual-machines%2Fcontext%2Fcontext#power-states-and-billing", + "name": "States and billing status of Azure Virtual Machines" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Failover Groups facilitate disaster recovery by configuring databases on one logical server to replicate to another region's logical server. This streamlines geo-replicated database management, offering a single endpoint for connection routing to replicated databases if the primary server fails.\n", + "recommendationControl": "Governance", + "longDescription": "Azure Virtual Machines (VM) instances have various states, like provisioning and power states. A non-running VM may indicate issues or it being unnecessary, suggesting removal could help cut costs.\n", "pgVerified": true, - "description": "Auto Failover Groups can encompass one or multiple databases, usually used by the same app.", - "potentialBenefits": "Improves load balancing and disaster recovery", - "publishedToLearn": false, + "description": "Review VMs in stopped state", + "potentialBenefits": "Reduce costs by removing unused VMs", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of SQL databases that are not configured to use a failover-group.\r\nresources\r\n| where type =~'microsoft.sql/servers/databases'\r\n| where isnull(properties['failoverGroupId'])\r\n| project recommendationId = \"943c168a-2ec2-a94c-8015-85732a1b4859\", name, id, tags, param1= strcat(\"databaseId=\", properties['databaseId'])\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs that are NOT running\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where properties.extended.instanceView.powerState.displayStatus != 'VM running'\n| project recommendationId = \"98b334c0-8578-6046-9e43-b6e8fce6318e\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c0085c32-84c0-c247-bfa9-e70977cbf108", - "recommendationTypeId": "807e58d0-e385-41ad-987b-4a4b3e3fb563", + "aprlGuid": "dfedbeb1-1519-fc47-86a5-52f96cf07105", + "recommendationTypeId": "3a3c1a2a-8597-4d3a-981a-0a24a0ee9de4", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/high-availability-sla", - "name": "Zone Redundant Databases" + "url": "https://learn.microsoft.com/azure/virtual-network/accelerated-networking-overview", + "name": "Accelerated Networking (AccelNet) overview" } ], - "recommendationControl": "High Availability", - "longDescription": "By default, Azure SQL Database premium tier provisions multiple copies within the same region. For geo redundancy, databases can be set as Zone Redundant, distributing copies across Azure Availability Zones to maintain availability during regional outages.\n", + "recommendationControl": "Scalability", + "longDescription": "Accelerated networking enables SR-IOV to a VM, greatly improving its networking performance by bypassing the host from the data path, which reduces latency, jitter, and CPU utilization for demanding network workloads on supported VM types.\n", "pgVerified": true, - "description": "Enable zone redundancy for Azure SQL Database to achieve high availability and resiliency", - "potentialBenefits": "Enhanced reliability, no extra cost", - "publishedToLearn": false, + "description": "Enable Accelerated Networking (AccelNet)", + "potentialBenefits": "Reduces latency, jitter and CPU use", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Finds non-zone redundant SQL databases and lists them\r\nResources\r\n| where type =~ 'microsoft.sql/servers/databases'\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where tolower(tostring(properties.zoneRedundant))=~'false'\r\n|project recommendationId = \"c0085c32-84c0-c247-bfa9-e70977cbf108\", name, id, tags\r\n\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all VM NICs that do not have Accelerated Networking enabled\nresources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| mv-expand nic = properties.networkProfile.networkInterfaces\n| project name, id, tags, lowerCaseNicId = tolower(nic.id), vmSize = tostring(properties.hardwareProfile.vmSize)\n| join kind = inner (\n resources\n | where type =~ 'Microsoft.Network/networkInterfaces'\n | where properties.enableAcceleratedNetworking == false\n | project nicName = split(id, \"/\")[8], lowerCaseNicId = tolower(id)\n )\n on lowerCaseNicId\n| summarize nicNames = make_set(nicName) by name, id, tostring(tags), vmSize\n| extend param1 = strcat(\"NicName: \", strcat_array(nicNames, \", \")), param2 = strcat(\"VMSize: \", vmSize)\n| project recommendationId = \"dfedbeb1-1519-fc47-86a5-52f96cf07105\", name, id, tags, param1, param2\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "cbb17a29-64fb-c943-95d0-8df814a37c40", + "aprlGuid": "73d1bb04-7d3e-0d47-bc0d-63afe773b5fe", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/troubleshoot-common-connectivity-issues", - "name": "How to Implement Retry Logic" + "url": "https://learn.microsoft.com/azure/virtual-network/accelerated-networking-overview", + "name": "Accelerated Networking (AccelNet) overview" } ], - "recommendationControl": "High Availability", - "longDescription": "During transient failures, the application should handle connection retries effectively with Azure SQL Database. No Database layer configuration is needed; instead, the application must be set up for graceful retrying.\n", + "recommendationControl": "Governance", + "longDescription": "When Accelerated Networking is enabled, the default Azure VNet interface in GuestOS is swapped for a Mellanox, and its driver comes from a 3rd party. Marketplace images have the latest Mellanox drivers, but post-deployment, updating the driver is the user's responsibility.\n", "pgVerified": true, - "description": "Implement Retry Logic", - "potentialBenefits": "Enhanced connectivity stability", - "publishedToLearn": false, + "description": "When AccelNet is enabled, you must manually update the GuestOS NIC driver", + "potentialBenefits": "Enhanced VM network efficiency", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7e7daec9-6a81-3546-a4cc-9aef72fec1f7", + "aprlGuid": "1f629a30-c9d0-d241-82ee-6f2eb9d42cb4", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/insights/azure-sql#analyze-data-and-create-alerts", - "name": "Azure Monitor" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/monitoring-sql-database-azure-monitor", - "name": "Azure SQL Database Monitoring" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/monitoring-sql-database-azure-monitor-reference", - "name": "Monitoring SQL Database Reference" + "url": "https://learn.microsoft.com/azure/load-balancer/load-balancer-outbound-connections", + "name": "Use Source Network Address Translation (SNAT) for outbound connections" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Monitoring and alerting are an important part of database operations. When working with Azure SQL Database, make use of Azure Monitor and SQL Insights to ensure that you capture relevant database metrics.\n", + "recommendationControl": "Security", + "longDescription": "For outbound internet connectivity of Virtual Machines, using NAT Gateway or Azure Firewall is recommended to enhance security and service resilience, thanks to their higher availability and SNAT ports.\n", "pgVerified": true, - "description": "Monitor your Azure SQL Database in Near Real-Time to Detect Reliability Incidents", - "potentialBenefits": "Quick incident detection and response", - "publishedToLearn": false, + "description": "VMs should not have a Public IP directly associated", + "potentialBenefits": "Enhanced security and service resiliency", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of SQL databases that are not configured for monitoring.\r\nresources\r\n| where type == \"microsoft.insights/metricalerts\"\r\n| mv-expand properties.scopes\r\n| mv-expand properties.criteria.allOf\r\n| project databaseid = properties_scopes, monitoredMetric = properties_criteria_allOf.metricName\r\n| where databaseid contains 'databases'\r\n| summarize monitoredMetrics=make_list(monitoredMetric) by databaseid=tolower(tostring(databaseid))\r\n| join kind=fullouter (\r\n resources\r\n | where type =~ 'microsoft.sql/servers/databases'\r\n | project databaseid = tolower(id), name, tags\r\n) on databaseid\r\n| where isnull(monitoredMetrics)\r\n| project recommendationId = \"7e7daec9-6a81-3546-a4cc-9aef72fec1f7\", name, id=databaseid1, tags, param1=strcat(\"MonitoringMetrics=false\" )\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs with PublicIPs directly associated with them\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnotnull(properties.networkProfile.networkInterfaces)\n| mv-expand nic=properties.networkProfile.networkInterfaces\n| project name, id, tags, nicId = nic.id\n| extend nicId = tostring(nicId)\n| join kind=inner (\n Resources\n | where type =~ 'Microsoft.Network/networkInterfaces'\n | where isnotnull(properties.ipConfigurations)\n | mv-expand ipconfig=properties.ipConfigurations\n | extend publicIp = tostring(ipconfig.properties.publicIPAddress.id)\n | where publicIp != \"\"\n | project name, nicId = tostring(id), publicIp\n) on nicId\n| project recommendationId = \"1f629a30-c9d0-d241-82ee-6f2eb9d42cb4\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d6ef87aa-574e-584e-a955-3e6bb8b5425b", + "aprlGuid": "82b3cf6b-9ae2-2e44-b193-10793213f676", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/key-vault/general/overview", - "name": "Azure Key Vault" - }, + "url": "https://learn.microsoft.com/azure/virtual-network/network-security-group-how-it-works#intra-subnet-traffic", + "name": "How network security groups filter network traffic" + } + ], + "recommendationControl": "Security", + "longDescription": "Unless you have a specific reason, it's advised to associate a network security group to a subnet or a network interface, but not both, to avoid unexpected communication issues and troubleshooting due to potential rule conflicts between the two associations.\n", + "pgVerified": true, + "description": "VM network interfaces and associated subnets both have a Network Security Group associated", + "potentialBenefits": "Reduces communication problems", + "tags": null, + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of virtual machines and associated NICs that do have an NSG associated to them and also an NSG associated to the subnet.\nResources\n| where type =~ 'Microsoft.Network/networkInterfaces'\n| where isnotnull(properties.networkSecurityGroup)\n| mv-expand ipConfigurations = properties.ipConfigurations, nsg = properties.networkSecurityGroup\n| project nicId = tostring(id), subnetId = tostring(ipConfigurations.properties.subnet.id), nsgName=split(nsg.id, '/')[8]\n| parse kind=regex subnetId with '/virtualNetworks/' virtualNetwork '/subnets/' subnet\n | join kind=inner (\n Resources\n | where type =~ 'Microsoft.Network/NetworkSecurityGroups' and isnotnull(properties.subnets)\n | project name, resourceGroup, subnet=properties.subnets\n | mv-expand subnet\n | project subnetId=tostring(subnet.id)\n ) on subnetId\n | project nicId\n| join kind=leftouter (\n Resources\n | where type =~ 'Microsoft.Compute/virtualMachines'\n | where isnotnull(properties.networkProfile.networkInterfaces)\n | mv-expand nic=properties.networkProfile.networkInterfaces\n | project vmName = name, vmId = id, tags, nicId = nic.id, nicName=split(nic.id, '/')[8]\n | extend nicId = tostring(nicId)\n) on nicId\n| project recommendationId = \"82b3cf6b-9ae2-2e44-b193-10793213f676\", name=vmName, id = vmId, tags, param1 = strcat(\"nic-name=\", nicName)\n\n" + }, + { + "publishedToAdvisor": null, + "aprlGuid": "41a22a5e-5e08-9647-92d0-2ffe9ef1bdad", + "recommendationTypeId": "c3b51c94-588b-426b-a892-24696f9e54cc", + "recommendationMetadataState": "Active", + "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/always-encrypted-landing?view=azuresql", - "name": "Getting Started with Always Encrypted" + "url": "https://learn.microsoft.com/azure/virtual-network/virtual-network-network-interface?tabs=network-interface-portal#enable-or-disable-ip-forwarding", + "name": "Enable or disable IP forwarding" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "It is highly recommended to use Azure Key Vault (AKV) to store encryption keys related to Always Encrypted configurations, however it is not required. If you are not using AKV, then ensure that your keys are properly backed up and stored in a secure manner.\n", + "recommendationControl": "Security", + "longDescription": "IP forwarding allows a virtual machine network interface to receive and send network traffic not destined for or originating from its assigned IP addresses.\n", "pgVerified": true, - "description": "Back Up Your Keys", - "potentialBenefits": "Enhanced security and data recovery", - "publishedToLearn": false, + "description": "IP Forwarding should only be enabled for Network Virtual Appliances", + "potentialBenefits": "Enhances network appliance function", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VM NICs that have IPForwarding enabled. This feature is usually only required for Network Virtual Appliances\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnotnull(properties.networkProfile.networkInterfaces)\n| mv-expand nic=properties.networkProfile.networkInterfaces\n| project name, id, tags, nicId = nic.id\n| extend nicId = tostring(nicId)\n| join kind=inner (\n Resources\n | where type =~ 'Microsoft.Network/networkInterfaces'\n | where properties.enableIPForwarding == true\n | project nicId = tostring(id)\n) on nicId\n| project recommendationId = \"41a22a5e-5e08-9647-92d0-2ffe9ef1bdad\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "de266d8a-a9f3-4cb9-be95-9306001fceea", + "aprlGuid": "1cf8fe21-9593-1e4e-966b-779a294c0d30", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/azure-sql/database/failover-group-sql-db?view=azuresql#endpoint-redirection", - "name": "Failover Group endpoint redirection" + "url": "https://learn.microsoft.com/azure/virtual-network/virtual-networks-name-resolution-for-vms-and-role-instances", + "name": "Name resolution for resources in Azure virtual networks" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "When using Failover Groups, it is recommended to connect to the Failover Group endpoint instead of individual database endpoints. This allows for automatic redirection to the secondary database in case of a failover, ensuring high availability.\n", - "pgVerified": false, - "description": "Use Failover Group endpoints for database connections", - "potentialBenefits": "Enhanced disaster recovery", - "publishedToLearn": false, + "recommendationControl": "Other Best Practices", + "longDescription": "Configure the DNS Server at the Virtual Network level to prevent any inconsistency across the environment.\n", + "pgVerified": true, + "description": "Customer DNS Servers should be configured in the Virtual Network level", + "potentialBenefits": "Ensures DNS consistency", "tags": null, - "recommendationResourceType": "Microsoft.Sql/servers", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VM NICs that have DNS Server settings configured in any of the NICs\nResources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnotnull(properties.networkProfile.networkInterfaces)\n| mv-expand nic=properties.networkProfile.networkInterfaces\n| project name, id, tags, nicId = nic.id\n| extend nicId = tostring(nicId)\n| join kind=inner (\n Resources\n | where type =~ 'Microsoft.Network/networkInterfaces'\n | project name, id, dnsServers = properties.dnsSettings.dnsServers\n | extend hasDns = array_length(dnsServers) >= 1\n | where hasDns != 0\n | project name, nicId = tostring(id)\n) on nicId\n| project recommendationId = \"1cf8fe21-9593-1e4e-966b-779a294c0d30\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e6c7e1cc-2f47-264d-aa50-1da421314472", + "aprlGuid": "3263a64a-c256-de48-9818-afd3cbc55c2a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/common/storage-redundancy", - "name": "Azure Storage redundancy" + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-shared", + "name": "Azure Shared Disk Introduction" }, { - "url": "https://learn.microsoft.com/azure/storage/common/redundancy-migration", - "name": "Change the redundancy configuration for a storage account" + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-shared-enable?tabs=azure-portal", + "name": "Enable Shared Disks" } ], - "recommendationControl": "High Availability", - "longDescription": "Redundancy ensures storage accounts meet availability and durability targets amidst failures, weighing lower costs against higher availability. Locally redundant storage offers the least durability at the lowest cost.\n", + "recommendationControl": "Other Best Practices", + "longDescription": "Azure shared disks let you attach a disk to multiple VMs at once for deploying or migrating clustered applications, suitable only when a disk is shared among VM cluster members.\n", "pgVerified": true, - "description": "Ensure that storage accounts are zone or region redundant", - "potentialBenefits": "High availability and durability for storage", - "publishedToLearn": false, + "description": "Shared disks should only be enabled in clustered servers", + "potentialBenefits": "Enhances clustered server performance", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This query will return all storage accounts that are not using Zone or Region replication\r\nResources\r\n| where type =~ \"Microsoft.Storage/storageAccounts\"\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| where sku.name in~ (\"Standard_LRS\", \"Premium_LRS\")\r\n| project recommendationId = \"e6c7e1cc-2f47-264d-aa50-1da421314472\", name, id, tags, param1 = strcat(\"sku: \", sku.name)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all Disks configured to be Shared. This is not an indication of an issue, but if a disk with this configuration is assigned to two or more VMs without a proper disk control mechanism (like a WSFC) it can lead to data loss\nresources\n| where type =~ 'Microsoft.Compute/disks'\n| where isnotnull(properties.maxShares) and properties.maxShares >= 2\n| project id, name, tags, lowerCaseDiskId = tolower(id), diskState = tostring(properties.diskState)\n| join kind = leftouter (\n resources\n | where type =~ 'Microsoft.Compute/virtualMachines'\n | project osDiskVmName = name, lowerCaseOsDiskId = tolower(properties.storageProfile.osDisk.managedDisk.id)\n | join kind = fullouter (\n resources\n | where type =~ 'Microsoft.Compute/virtualMachines'\n | mv-expand dataDisks = properties.storageProfile.dataDisks\n | project dataDiskVmName = name, lowerCaseDataDiskId = tolower(dataDisks.managedDisk.id)\n )\n on $left.lowerCaseOsDiskId == $right.lowerCaseDataDiskId\n | project lowerCaseDiskId = coalesce(lowerCaseOsDiskId, lowerCaseDataDiskId), vmName = coalesce(osDiskVmName, dataDiskVmName)\n )\n on lowerCaseDiskId\n| summarize vmNames = make_set(vmName) by name, id, tostring(tags), diskState\n| extend param1 = strcat(\"DiskState: \", diskState), param2 = iif(isempty(vmNames[0]), \"VMName: n/a\", strcat(\"VMName: \", strcat_array(vmNames, \", \")))\n| project recommendationId = \"3263a64a-c256-de48-9818-afd3cbc55c2a\", name, id, tags, param1, param2\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "63ad027e-611c-294b-acc5-8e3234db9a40", - "recommendationTypeId": "47bb383c-8e25-95f0-c2aa-437add1d87d3", + "aprlGuid": "70b1d2be-e6c4-b54e-9959-b1b690f9e485", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.microsoft.com/updates/classic-azure-storage-accounts-will-be-retired-on-31-august-2024/", - "name": "Azure classic storage accounts retirement announcement" - }, - { - "url": "https://learn.microsoft.com/azure/storage/common/classic-account-migration-overview", - "name": "Migrate your classic storage accounts to Azure Resource Manager" + "url": "https://learn.microsoft.com/azure/virtual-machines/disks-enable-private-links-for-import-export-portal", + "name": "Restrict import/export access for managed disks using Azure Private Link" } ], - "recommendationControl": "Service Upgrade and Retirement", - "longDescription": "Classic storage accounts will be fully retired on August 31, 2024. If you have classic storage accounts, start planning your migration now.\n", + "recommendationControl": "Security", + "longDescription": "Recommended changing to \"Disable public access and enable private access\" and creating a Private Endpoint to improve security by restricting direct public access and ensuring connections are made privately, enhancing data protection and minimizing potential external threats.\n", "pgVerified": true, - "description": "Classic Storage Accounts must be migrated to new Azure Resource Manager resources", - "potentialBenefits": "Avoids service retirement issues", - "publishedToLearn": false, + "description": "Network access to the VM disk should be set to Disable public access and enable private access", + "potentialBenefits": "Enhances VM security and privacy", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Azure classic Storage Account\r\nresources\r\n| where type =~ 'microsoft.classicstorage/storageaccounts'\r\n| project recommendationId = '63ad027e-611c-294b-acc5-8e3234db9a40', name, id, tags, param1=type\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all Disks with \"Enable public access from all networks\" enabled\nresources\n| where type =~ 'Microsoft.Compute/disks'\n| where properties.publicNetworkAccess == \"Enabled\"\n| project id, name, tags, lowerCaseDiskId = tolower(id)\n| join kind = leftouter (\n resources\n | where type =~ 'Microsoft.Compute/virtualMachines'\n | project osDiskVmName = name, lowerCaseOsDiskId = tolower(properties.storageProfile.osDisk.managedDisk.id)\n | join kind = fullouter (\n resources\n | where type =~ 'Microsoft.Compute/virtualMachines'\n | mv-expand dataDisks = properties.storageProfile.dataDisks\n | project dataDiskVmName = name, lowerCaseDataDiskId = tolower(dataDisks.managedDisk.id)\n )\n on $left.lowerCaseOsDiskId == $right.lowerCaseDataDiskId\n | project lowerCaseDiskId = coalesce(lowerCaseOsDiskId, lowerCaseDataDiskId), vmName = coalesce(osDiskVmName, dataDiskVmName)\n )\n on lowerCaseDiskId\n| summarize vmNames = make_set(vmName) by name, id, tostring(tags)\n| extend param1 = iif(isempty(vmNames[0]), \"VMName: n/a\", strcat(\"VMName: \", strcat_array(vmNames, \", \")))\n| project recommendationId = \"70b1d2be-e6c4-b54e-9959-b1b690f9e485\", name, id, tags, param1\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5587ef77-7a05-a74d-9c6e-449547a12f27", - "recommendationTypeId": "c6b94711-f1f5-4e7e-9c89-c17ed4190969", + "aprlGuid": "c42343ae-2712-2843-a285-3437eb0b28a1", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/common/storage-account-overview#types-of-storage-accounts", - "name": "Types of storage accounts" - }, - { - "url": "https://learn.microsoft.com/azure/storage/common/scalability-targets-standard-account", - "name": "Scalability and performance targets for standard storage accounts" - }, - { - "url": "https://learn.microsoft.com/azure/storage/blobs/storage-performance-checklist", - "name": "Performance and scalability checklist for Blob storage" - }, - { - "url": "https://learn.microsoft.com/azure/storage/blobs/scalability-targets", - "name": "Scalability and performance targets for Blob storage" + "url": "https://learn.microsoft.com/azure/cloud-adoption-framework/ready/landing-zone/design-principles#policy-driven-governance", + "name": "Policy-driven governance" }, { - "url": "https://learn.microsoft.com/azure/storage/blobs/storage-blob-block-blob-premium", - "name": "Premium block blob storage accounts" + "url": "https://learn.microsoft.com/azure/virtual-machines/security-policy", + "name": "Azure Policy Regulatory Compliance controls for Azure Virtual Machines" } ], - "recommendationControl": "Scalability", - "longDescription": "Use premium performance block blob storage instead of standard performance storage for workloads that require fast storage response times and/or high transaction rates.\n", + "recommendationControl": "Governance", + "longDescription": "Keeping your virtual machine (VM) secure is crucial for the applications you run. This involves using various Azure services and features to ensure secure access to your VMs and the secure storage of your data, aiming for overall security of your VM and applications.\n", "pgVerified": true, - "description": "Use premium performance block blob storage for high performance workloads", - "potentialBenefits": "Optimized cost and performance", - "publishedToLearn": false, + "description": "Ensure that your VMs are compliant with Azure Policies", + "potentialBenefits": "Secure VMs and applications", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all VMs in \"Non-compliant\" state with Azure Policies\npolicyresources\n| where type =~ \"Microsoft.PolicyInsights/policyStates\" and properties.resourceType =~ \"Microsoft.Compute/virtualMachines\" and properties.complianceState =~ \"NonCompliant\"\n| project\n policyDefinitionId = tolower(properties.policyDefinitionId),\n policyAssignmentId = tolower(properties.policyAssignmentId),\n targetResourceId = tolower(properties.resourceId)\n// Join the policy definition details\n| join kind = leftouter (\n policyresources\n | where type =~ \"Microsoft.Authorization/policyDefinitions\"\n | project policyDefinitionId = tolower(id), policyDefinitionDisplayName = properties.displayName\n )\n on policyDefinitionId\n| project policyDefinitionId, policyDefinitionDisplayName, policyAssignmentId, targetResourceId\n// Join the policy assignment details\n| join kind = leftouter (\n policyresources\n | where type =~ \"Microsoft.Authorization/policyAssignments\"\n | project policyAssignmentId = tolower(id), policyAssignmentDisplayName = properties.displayName\n )\n on policyAssignmentId\n| project policyDefinitionId, policyDefinitionDisplayName, policyAssignmentId, policyAssignmentDisplayName, targetResourceId\n// Join the target resource details\n| join kind = leftouter (\n resources\n | where type =~ \"Microsoft.Compute/virtualMachines\"\n | project targetResourceId = tolower(id), targetResourceIdPreservedCase = id, targetResourceName = name, targetResourceTags = tags\n )\n on targetResourceId\n| project\n recommendationId = \"c42343ae-2712-2843-a285-3437eb0b28a1\",\n name = targetResourceName,\n id = targetResourceIdPreservedCase,\n tags = targetResourceTags,\n param1 = strcat(\"DefinitionName: \", policyDefinitionDisplayName),\n param2 = strcat(\"DefinitionID: \", policyDefinitionId),\n param3 = strcat(\"AssignmentName: \", policyAssignmentDisplayName),\n param4 = strcat(\"AssignmentID: \", policyAssignmentId)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "03263c57-c869-3841-9e0a-3dbb9ef3e28d", - "recommendationTypeId": "42dbf883-9e4b-4f84-9da4-232b87c4b5e9", + "aprlGuid": "f0a97179-133a-6e4f-8a49-8a44da73ffce", + "recommendationTypeId": "a40cc620-e72c-fdf4-c554-c6ca2cd705c0", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com//azure/storage/blobs/soft-delete-blob-enable?tabs=azure-portal ", - "name": "Soft delete detail docs" + "url": "https://learn.microsoft.com/azure/virtual-machines/disk-encryption-overview", + "name": "Overview of managed disk encryption options" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "The soft delete option enables data recovery if mistakenly deleted, while the Lock feature prevents the accidental deletion of the storage account itself, ensuring additional security and data integrity measures.\n", + "recommendationControl": "Security", + "longDescription": "Consider enabling Azure Disk Encryption (ADE) for encrypting Azure VM disks using DM-Crypt (Linux) or BitLocker (Windows). Additionally, consider Encryption at host and Confidential disk encryption for enhanced data security.\n", "pgVerified": true, - "description": "Enable Soft Delete to protect your data", - "potentialBenefits": "Prevents accidental data/account loss", - "publishedToLearn": false, + "description": "Virtual Machines should have Azure Disk Encryption or EncryptionAtHost enabled", + "potentialBenefits": "Enhances data security and integrity", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure VM disks without Azure Disk Encryption or encryption at host enabled\nresources\n| where type =~ \"microsoft.compute/disks\"\n| project diskId = id, diskName = name, vmId = tolower(managedBy), azureDiskEncryption = iff(properties.encryptionSettingsCollection.enabled == true, true, false)\n| join kind=leftouter (resources\n| where type =~ \"microsoft.compute/virtualmachines\"\n| project vmId = tolower(id), vmName = name, encryptionAtHost = iff(properties.securityProfile.encryptionAtHost == true, true, false)) on vmId\n| where not(encryptionAtHost) and not(azureDiskEncryption)\n| project recommendationId = 'f0a97179-133a-6e4f-8a49-8a44da73ffce', name = vmName, id =vmId, param1 = strcat('diskName:',diskName), param2 = strcat('azureDiskEncryption:',iff(azureDiskEncryption, \"Enabled\", \"Disabled\")), param3 = strcat('encryptionAtHost:',iff(encryptionAtHost, \"Enabled\", \"Disabled\"))\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8ebda7c0-e0e1-ed45-af59-2d7ea9a1c05d", + "aprlGuid": "b72214bb-e879-5f4b-b9cd-642db84f36f4", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/blobs/versioning-overview ", - "name": "Blob versioning" + "url": "https://learn.microsoft.com/azure/azure-monitor/vm/vminsights-overview", + "name": "Overview of VM insights" + }, + { + "url": "https://learn.microsoft.com/azure/azure-monitor/vm/vminsights-troubleshoot#did-the-extension-install-properly", + "name": "Did the extension install properly?" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Consider enabling versioning for Azure Storage Accounts to recover from accidental modifications or deletions and manage blob operation latency. Microsoft advises maintaining fewer than 1000 versions per blob to optimize performance. Lifecycle management can help delete old versions automatically.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "VM Insights monitors VM and scale set performance, health, running processes, and dependencies. It enhances the predictability of application performance and availability by pinpointing performance bottlenecks and network issues, and it clarifies if problems are related to other dependencies.\n", "pgVerified": true, - "description": "Enable versioning for accidental modification and keep the number of versions below 1000", - "potentialBenefits": "Recover data, manage latency", - "publishedToLearn": false, + "description": "Enable VM Insights", + "potentialBenefits": "Improves VM performance and health", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Check for VMs without Azure Monitoring Agent extension installed, missing Data Collection Rule or Data Collection Rule without performance enabled.\nResources\n| where type == 'microsoft.compute/virtualmachines'\n| project idVm = tolower(id), name, tags\n| join kind=leftouter (\n InsightsResources\n | where type =~ \"Microsoft.Insights/dataCollectionRuleAssociations\" and id has \"Microsoft.Compute/virtualMachines\"\n | project idDcr = tolower(properties.dataCollectionRuleId), idVmDcr = tolower(substring(id, 0, indexof(id, \"/providers/Microsoft.Insights/dataCollectionRuleAssociations/\"))))\non $left.idVm == $right.idVmDcr\n| join kind=leftouter (\n Resources\n | where type =~ \"Microsoft.Insights/dataCollectionRules\"\n | extend\n isPerformanceEnabled = iif(properties.dataSources.performanceCounters contains \"Microsoft-InsightsMetrics\" and properties.dataFlows contains \"Microsoft-InsightsMetrics\", true, false),\n isMapEnabled = iif(properties.dataSources.extensions contains \"Microsoft-ServiceMap\" and properties.dataSources.extensions contains \"DependencyAgent\" and properties.dataFlows contains \"Microsoft-ServiceMap\", true, false)//,\n | where isPerformanceEnabled or isMapEnabled\n | project dcrName = name, isPerformanceEnabled, isMapEnabled, idDcr = tolower(id))\non $left.idDcr == $right.idDcr\n| join kind=leftouter (\n Resources\n | where type == 'microsoft.compute/virtualmachines/extensions' and (name contains 'AzureMonitorWindowsAgent' or name contains 'AzureMonitorLinuxAgent')\n | extend idVmExtension = tolower(substring(id, 0, indexof(id, '/extensions'))), extensionName = name)\non $left.idVm == $right.idVmExtension\n| where isPerformanceEnabled != 1 or (extensionName != 'AzureMonitorWindowsAgent' and extensionName != 'AzureMonitorLinuxAgent')\n| project recommendationId = \"b72214bb-e879-5f4b-b9cd-642db84f36f4\", name, id = idVm, tags, param1 = strcat('MonitoringExtension:', extensionName), param2 = strcat('DataCollectionRuleId:', idDcr), param3 = strcat('isPerformanceEnabled:', isPerformanceEnabled)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1b965cb9-7629-214e-b682-6bf6e450a100", + "aprlGuid": "4a9d8973-6dba-0042-b3aa-07924877ebd5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/blobs/point-in-time-restore-overview", - "name": "Point-in-time restore for block blobs" - }, - { - "url": "https://learn.microsoft.com/azure/storage/blobs/point-in-time-restore-manage?tabs=portal", - "name": "Perform a point-in-time restore on block blob data" + "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/agents/agents-overview", + "name": "Azure Monitor Agent overview" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Consider enabling point-in-time restore for standard general purpose v2 accounts with flat namespace to protect against accidental deletion or corruption by restoring block blob data to an earlier state.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Azure Monitor Metrics automatically receives platform metrics, but platform logs, which offer detailed diagnostics and auditing for resources and their Azure platform, need to be manually routed for collection.\n", "pgVerified": true, - "description": "Enable point-in-time restore for GPv2 accounts to safeguard against data loss", - "potentialBenefits": "Protects data from loss/corruption", - "publishedToLearn": false, + "description": "Configure monitoring for all Azure Virtual Machines", + "potentialBenefits": "Enhanced diagnostics and auditing capability", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all Virtual Machines without diagnostic settings enabled/with diagnostic settings enabled but not configured both performance counters and event logs/syslogs.\nresources\n| where type =~ \"microsoft.compute/virtualmachines\"\n| project name, id, tags, lowerCaseVmId = tolower(id)\n| join kind = leftouter (\n resources\n | where type =~ \"Microsoft.Compute/virtualMachines/extensions\" and properties.publisher =~ \"Microsoft.Azure.Diagnostics\"\n | project\n lowerCaseVmIdOfExtension = tolower(substring(id, 0, indexof(id, \"/extensions/\"))),\n extensionType = properties.type,\n provisioningState = properties.provisioningState,\n storageAccount = properties.settings.StorageAccount,\n // Windows\n wadPerfCounters = properties.settings.WadCfg.DiagnosticMonitorConfiguration.PerformanceCounters.PerformanceCounterConfiguration,\n wadEventLogs = properties.settings.WadCfg.DiagnosticMonitorConfiguration.WindowsEventLog,\n // Linux\n ladPerfCounters = properties.settings.ladCfg.diagnosticMonitorConfiguration.performanceCounters.performanceCounterConfiguration,\n ladSyslog = properties.settings.ladCfg.diagnosticMonitorConfiguration.syslogEvents\n | extend\n // Windows\n isWadPerfCountersConfigured = iif(array_length(wadPerfCounters) > 0, true, false),\n isWadEventLogsConfigured = iif(isnotnull(wadEventLogs) and array_length(wadEventLogs.DataSource) > 0, true, false),\n // Linux\n isLadPerfCountersConfigured = iif(array_length(ladPerfCounters) > 0, true, false),\n isLadSyslogConfigured = isnotnull(ladSyslog)\n | project\n lowerCaseVmIdOfExtension,\n extensionType,\n provisioningState,\n storageAccount,\n isPerfCountersConfigured = case(extensionType =~ \"IaaSDiagnostics\", isWadPerfCountersConfigured, extensionType =~ \"LinuxDiagnostic\", isLadPerfCountersConfigured, false),\n isEventLogsConfigured = case(extensionType =~ \"IaaSDiagnostics\", isWadEventLogsConfigured, extensionType =~ \"LinuxDiagnostic\", isLadSyslogConfigured, false)\n )\n on $left.lowerCaseVmId == $right.lowerCaseVmIdOfExtension\n| where isempty(lowerCaseVmIdOfExtension) or provisioningState !~ \"Succeeded\" or not(isPerfCountersConfigured and isEventLogsConfigured)\n| extend\n param1 = strcat(\"DiagnosticSetting: \", iif(isnotnull(extensionType), strcat(\"Enabled, partially configured (\", extensionType, \")\"), \"Not enabled\")),\n param2 = strcat(\"ProvisioningState: \", iif(isnotnull(provisioningState), provisioningState, \"n/a\")),\n param3 = strcat(\"storageAccount: \", iif(isnotnull(storageAccount), storageAccount, \"n/a\")),\n param4 = strcat(\"PerformanceCounters: \", case(isnull(isPerfCountersConfigured), \"n/a\", isPerfCountersConfigured, \"Configured\", \"Not configured\")),\n param5 = strcat(\"EventLogs/Syslogs: \", case(isnull(isEventLogsConfigured), \"n/a\", isEventLogsConfigured, \"Configured\", \"Not configured\"))\n| project recommendationId = \"4a9d8973-6dba-0042-b3aa-07924877ebd5\", name, id, tags, param1, param2, param3, param4, param5\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "96cb8331-6b06-8242-8ce8-4e2f665dc679", + "aprlGuid": "52ab9e5c-eec0-3148-8bd7-b6dd9e1be870", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/blobs/monitor-blob-storage", - "name": "Monitor Azure Blob Storage" - }, - { - "url": "https://learn.microsoft.com/azure/storage/blobs/blob-storage-monitoring-scenarios", - "name": "Best practices for monitoring Azure Blob Storage" + "url": "https://learn.microsoft.com/azure/virtual-machines/maintenance-configurations", + "name": "Use maintenance configurations to control and manage the VM updates" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "For critical applications and business processes relying on Azure, monitoring and alerts are crucial. Resource logs are only stored after creating a diagnostic setting to route logs to specified locations, requiring selection of log categories to collect.\n", + "recommendationControl": "High Availability", + "longDescription": "The maintenance configuration settings let users schedule and manage updates, making sure the updates or interruptions on the VM are performed within a planned timeframe.\n", "pgVerified": true, - "description": "Monitor all blob storage accounts", - "potentialBenefits": "Enhanced alerting and log analysis", - "publishedToLearn": false, + "description": "Use maintenance configurations for the VMs", + "potentialBenefits": "Scheduled updates for VMs", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find VMS that do not have maintenance configuration assigned\nResources\n| extend resourceId = tolower(id)\n| project name, location, type, id, tags, resourceId, properties\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| join kind=leftouter (\nmaintenanceresources\n| where type =~ \"microsoft.maintenance/configurationassignments\"\n| project planName = name, type, maintenanceProps = properties\n| extend resourceId = tostring(maintenanceProps.resourceId)\n) on resourceId\n| where isnull(maintenanceProps)\n| project recommendationId = \"52ab9e5c-eec0-3148-8bd7-b6dd9e1be870\",name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2ad78dec-5a4d-4a30-8fd1-8584335ad781", + "aprlGuid": "3201dba8-d1da-4826-98a4-104066545170", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/common/storage-account-overview#legacy-storage-account-types", - "name": "Legacy storage account types" - }, - { - "url": "https://learn.microsoft.com/azure/storage/common/storage-account-upgrade", - "name": "Upgrade to a general-purpose v2 storage account" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/sizes-b-series-burstable", + "name": "B-series burstable virtual machine sizes" } ], "recommendationControl": "Scalability", - "longDescription": "General-purpose v2 accounts are recommended for most storage scenarios offering the latest features or the lowest per-gigabyte pricing. Legacy accounts like Standard general-purpose v1 and Blob Storage aren't advised by Microsoft but may fit specific scenarios.\n", + "longDescription": "A-series VMs are tailored for entry-level workloads like development and testing, including use cases such as development and test servers, low traffic web servers, and small to medium databases.\n", "pgVerified": true, - "description": "Consider upgrading legacy storage accounts to v2 storage accounts", - "potentialBenefits": "Latest features, lowest cost", - "publishedToLearn": false, + "description": "Don't use A or B-Series VMs for production needing constant full CPU performance", + "potentialBenefits": "Ensures full CPU usage for heavy tasks", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Azure Storage Accounts, that upgradeable to General purpose v2.\r\nResources\r\n| where type =~ \"Microsoft.Storage/storageAccounts\" and kind in~ (\"Storage\", \"BlobStorage\")\r\n| extend\r\n param1 = strcat(\"AccountKind: \", case(kind =~ \"Storage\", \"Storage (general purpose v1)\", kind =~ \"BlobStorage\", \"BlobStorage\", kind)),\r\n param2 = strcat(\"Performance: \", sku.tier),\r\n param3 = strcat(\"Replication: \", sku.name)\r\n| project recommendationId = \"2ad78dec-5a4d-4a30-8fd1-8584335ad781\", name, id, tags, param1, param2, param3\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs using A or B series families\nresources\n| where type == 'microsoft.compute/virtualmachines'\n| where properties.hardwareProfile.vmSize contains \"Standard_B\" or properties.hardwareProfile.vmSize contains \"Standard_A\"\n| project recommendationId = \"3201dba8-d1da-4826-98a4-104066545170\", name, id, tags, param1=strcat(\"vmSku: \" , properties.hardwareProfile.vmSize)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "dc55be60-6f8c-461e-a9d5-a3c7686ed94e", - "recommendationTypeId": null, + "aprlGuid": "df0ff862-814d-45a3-95e4-4fad5a244ba6", + "recommendationTypeId": "58d6648d-32e8-4346-827c-4f288dd8ca24", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/architecture/example-scenario/wvd/windows-virtual-desktop#azure-virtual-desktop-limitations", - "name": "Learn More" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-virtual-desktop/networking#private-endpoints-private-link", - "name": "Private Link" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types#disk-type-comparison", + "name": "Disk type comparison and decision tree" } ], - "recommendationControl": "Security", - "longDescription": "Leverage Azure Private Link Service for secure access to Azure Storage and services via Private Endpoint in your VNet. Eliminate the need for public IPs, ensuring data privacy. Enjoy granular access control for enhanced security.\n", + "recommendationControl": "Scalability", + "longDescription": "Compared to Standard HDD and SSD, Premium SSD, SSD v2, and Ultra Disks offer improved performance, configurability, and higher single-instance VM uptime SLAs. The lowest SLA of all disks on a VM applies, so it is best to use Premium or Ultra Disks for the highest uptime SLA.\n", "pgVerified": true, - "description": "Enable Azure Private Link service for storage accounts", - "potentialBenefits": "Secure, private access to storage with no public IPs", - "publishedToLearn": false, + "description": "Mission Critical Workloads should consider using Premium or Ultra Disks", + "potentialBenefits": "Enhanced performance, cost efficiency, and uptime SLA", "tags": null, - "recommendationResourceType": "Microsoft.Storage/storageAccounts", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This resource graph query will return all storage accounts that does not have a Private Endpoint Connection or where a private endpoint exists but public access is enabled\r\nresources\r\n| where type =~ \"Microsoft.Storage/StorageAccounts\"\r\n| where isnull(properties.privateEndpointConnections) or properties.privateEndpointConnections[0].properties.provisioningState != (\"Succeeded\") or (isnull(properties.networkAcls) and properties.publicNetworkAccess == 'Enabled')\r\n| extend param1 = strcat('Private Endpoint: ', iif(array_length(properties.privateEndpointConnections) != 0,split(properties.privateEndpointConnections[0].properties.privateEndpoint.id,'/')[8],'No Private Endpoint'))\r\n| extend param2 = strcat('Access: ', iif(properties.publicNetworkAccess == 'Disabled', 'Public Access Disabled', iif(isnotnull(properties.networkAcls), 'NetworkACLs in place','Public Access Enabled')))\r\n| project recommendationId = \"dc55be60-6f8c-461e-a9d5-a3c7686ed94e\", name, id, tags, param1, param2\r\n" + "query": "// Azure Resource Graph Query\n// Find all VMs that have an attached disk that is not in the Premium or Ultra sku tier.\n\nresources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| extend lname = tolower(name)\n| join kind=leftouter(resources\n | where type =~ 'Microsoft.Compute/disks'\n | where not(sku.tier =~ 'Premium') and not(sku.tier =~ 'Ultra')\n | extend lname = tolower(tostring(split(managedBy, '/')[8]))\n | project lname, name\n | summarize disks = make_list(name) by lname) on lname\n| where isnotnull(disks)\n| project recommendationId = \"df0ff862-814d-45a3-95e4-4fad5a244ba6\", name, id, tags, param1=strcat(\"AffectedDisks: \", disks)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e48a7227-5ec7-463a-b955-ee7cb598ded4", + "aprlGuid": "9ab499d8-8844-424d-a2d4-8f53690eb8f8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/stream-analytics/cluster-overview", - "name": "Overview of Azure Stream Analytics Cluster" + "url": "https://learn.microsoft.com/azure/azure-boost/overview", + "name": "Microsoft Azure Boost" + }, + { + "url": "https://aka.ms/AzureBoostGABlog", + "name": "Announcing the general availability of Azure Boost" } ], - "recommendationControl": "Scalability", - "longDescription": "Stream Analytics cluster (dedicated) offers more reliable performance guarantees. All the jobs running on your cluster belong only to you. You can also have access to important features like private endpoints, Auto-Scaling, Vnet Support, etc.\n", - "pgVerified": false, - "description": "Run jobs in your own dedicated Stream Analytics cluster for increased reliability and security", - "potentialBenefits": "Enhanced reliability and security", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "If the workload is Maintenance sensitive, consider Azure Boost compatible VMs. Azure Boost is designed to lessen the impact on customers when Azure maintenance activities occur on the host.\n", + "pgVerified": true, + "description": "Use Azure Boost VMs for Maintenance sensitive workload", + "potentialBenefits": "Less maintenance impact", "tags": null, - "recommendationResourceType": "Microsoft.StreamAnalytics/streamingjobs", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Azure Stream Analytics jobs that are not associated with a dedicated cluster\r\nresources\r\n| where type =~ \"Microsoft.StreamAnalytics/streamingjobs\"\r\n| where isnull(properties.cluster.id)\r\n| project recommendationId = \"e48a7227-5ec7-463a-b955-ee7cb598ded4\", name, id, tags\r\n" + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5d40d3d4-179d-4cf5-ac24-901210f512e7", + "aprlGuid": "2de8fa5e-14f4-4c4c-857f-1520f87a629f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/stream-analytics/stream-analytics-streaming-unit-consumption", - "name": "Understand and adjust streaming units" + "url": "https://learn.microsoft.com/azure/virtual-machines/windows/scheduled-event-service", + "name": "Monitor scheduled events for your Azure VMs" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machines/linux/scheduled-events", + "name": "Azure Metadata Service Scheduled Events for Linux VMs" + }, + { + "url": "https://learn.microsoft.com/azure/virtual-machines/windows/scheduled-events", + "name": "Azure Metadata Service Scheduled Events for Windows VMs" } - ], - "recommendationControl": "High Availability", - "longDescription": "Configure Autoscale to allow your job to dynamically change the allocated number of Streaming Units (SU) based on load, metrics, and/or schedule.\n", - "pgVerified": false, - "description": "Migrate Stream Analytics jobs to StandardV2 SKU", - "potentialBenefits": "Enhanced reliability and security", - "publishedToLearn": false, + ], + "recommendationControl": "High Availability", + "longDescription": "If your workload is Maintenance sensitive, enable Scheduled Events. This Azure Metadata Service lets your app prepare for virtual machine maintenance by providing information on upcoming events like reboots, reducing disruptions.\n", + "pgVerified": true, + "description": "Enable Scheduled Events for Maintenance sensitive workload VMs", + "potentialBenefits": "Minimize downtime for VMs", "tags": null, - "recommendationResourceType": "Microsoft.StreamAnalytics/streamingjobs", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all Azure Stream Analytics jobs that are not using the latest version of the service.\r\nresources\r\n| where type =~ \"Microsoft.StreamAnalytics/streamingjobs\"\r\n| where properties.sku.name !~ \"StandardV2\"\r\n| project recommendationId = \"5d40d3d4-179d-4cf5-ac24-901210f512e7\", name, id, tags\r\n" + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c041d596-6c97-4c5f-b4b3-9cd37628f2e2", - "recommendationTypeId": null, + "aprlGuid": "fa0cf4f5-0b21-47b7-89a9-ee936f193ce1", + "recommendationTypeId": "d4102c0f-ebe3-4b22-8fe0-e488866a87af", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://docs.citrix.com/en-us/citrix-daas-azure/limits", - "name": "Citrix Limits" + "url": "https://aka.ms/zrsdisksdoc", + "name": "Redundancy options for managed disks" } ], - "recommendationControl": "Governance", - "longDescription": "A Citrix Managed Azure subscription supports VMs with VDA for app/desktop delivery, excluding other machines like Cloud Connectors. When close to the limit, signaled by a dashboard notification, and with sufficient licenses, request another subscription. Can't exceed the given limits for catalogs.\n", + "recommendationControl": "High Availability", + "longDescription": "Azure disks offers a zone-redundant storage (ZRS) option for workloads that need to be resilient to an entire zone being down. Due to the cross-zone data replication, ZRS disks have higher write latency when compared to the locally-redundant option (LRS), so make sure to benchmark your disks.\n", "pgVerified": true, - "description": "Do not create more than 2000 Citrix VDA servers per subscription", - "potentialBenefits": "Avoids hitting limit, ensures reliability", - "publishedToLearn": false, + "description": "Use Azure Disks with Zone Redundant Storage for higher resiliency and availability", + "potentialBenefits": "Enhanced Disk resilience to failures", "tags": null, - "recommendationResourceType": "Microsoft.Subscription/Subscriptions", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Count VM instances with a tag that contains \"Citrix VDA\" and create output if that count is >2000 for each subscription.\r\n// The Citrix published limit is 2500. This query runs an 80% check.\r\n\r\nresources\r\n| where type == 'microsoft.compute/virtualmachines'\r\n| where tags contains 'Citrix VDA'\r\n| summarize VMs=count() by subscriptionId\r\n| where VMs > 2000\r\n| join (resourcecontainers| where type =='microsoft.resources/subscriptions' | project subname=name, subscriptionId) on subscriptionId\r\n| project recommendationId='c041d596-6c97-4c5f-b4b3-9cd37628f2e2', name= subname, id = subscriptionId, param1='Too many instances.', param2= VMs\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find eligible Disks that are not zonal nor zone redundant\nresources\n| where type == 'microsoft.compute/disks'\n| where sku has \"Premium_LRS\" or sku has \"StandardSSD_LRS\"\n| where sku.name has_cs 'ZRS' or array_length(zones) > 0\n| project recommendationId=\"fa0cf4f5-0b21-47b7-89a9-ee936f193ce1\", name, id, tags, param1 = sku, param2 = sku.name\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5ada5ffa-7149-4e49-9fbf-e67be7c2594c", + "aprlGuid": "302fda08-ee65-4fbe-a916-6dc0b33169c4", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/cloud-adoption-framework/ready/landing-zone/design-area/resource-org-management-groups#management-group-recommendations", - "name": "Management group recommendations" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/governance/management-groups/overview#root-management-group-for-each-directory", - "name": "Root management group for each directory" + "url": "https://aka.ms/on-demand-capacity-reservations-docs", + "name": "On-demand Capacity Reservation" } ], - "recommendationControl": "Governance", - "longDescription": "The root management group in Azure is designed for organizational hierarchy, allowing for all management groups and subscriptions to fold into it.\n", - "pgVerified": true, - "description": "Subscriptions should not be placed under the Tenant Root Management Group", - "potentialBenefits": "Enhanced security, compliance, and management", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure Capacity Reservations ensure high availability for virtual machines by reserving compute capacity in advance within a specific region or availability zone. This guarantees that VMs will have the necessary resources during peak demand or maintenance events, enhancing reliability and uptime.\n", + "pgVerified": false, + "description": "Reserve Compute Capacity for critical workloads", + "potentialBenefits": "Guaranteed capacity in constrained regions/zones", "tags": null, - "recommendationResourceType": "Microsoft.Subscription/Subscriptions", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure Subscriptions that are placed under the Tenant Root Management Group\r\nresourcecontainers\r\n| where type == 'microsoft.resources/subscriptions'\r\n| extend mgParentSize = array_length(properties.managementGroupAncestorsChain)\r\n| where mgParentSize == 1\r\n| project recommendationId=\"5ada5ffa-7149-4e49-9fbf-e67be7c2594c\", name, id, tags\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Find all Virtual Machines not associated with a Capacity Reservation, and provide details for Capacity Reservation like vmSize, location, and zone.\nresources\n| where type =~ 'Microsoft.Compute/virtualMachines'\n| where isnull(properties.capacityReservation)\n| extend zoneValue = iff(isnull(zones), \"null\", zones)\n| project recommendationId = \"302fda08-ee65-4fbe-a916-6dc0b33169c4\", name, id, tags, param1 = strcat(\"VmSize: \", properties.hardwareProfile.vmSize), param2 = strcat(\"Location: \", location), param3 = strcat(\"Zone: \", zoneValue)\n" }, { "publishedToAdvisor": null, - "aprlGuid": "19b6df57-f6b5-3e4f-843a-273daa087cb0", + "aprlGuid": "5f7e8a12-3c4f-456b-919c-2e9adff98c38", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/generation-2#features-and-capabilities", - "name": "Generation 1 vs generation 2 virtual machines" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/extensions/update-linux-agent?tabs=ubuntu", + "name": "How to update the Azure Linux Agent on a VM" } ], "recommendationControl": "High Availability", - "longDescription": "When building Image Templates, use sources for gen 2 VMs. Gen 2 offers more memory, supports >2TB disks, uses UEFI for faster boot/installation, has Intel SGX, and virtualized persistent memory (vPMEM), unlike gen 1's BIOS-based architecture.\n", - "pgVerified": true, - "description": "Use Generation 2 virtual machine source image", - "potentialBenefits": "More memory, supports >2TB disks, faster boot", - "publishedToLearn": false, + "longDescription": "If you've installed the Azure Linux Agent or are using an endorsed distribution image, ensure your agent version is up-to-date. Some Linux distributions may disable auto-update or use older agent versions.\n", + "pgVerified": false, + "description": "Update the Azure Linux VM Agent", + "potentialBenefits": "Reduces complications with VM provisioning", "tags": null, - "recommendationResourceType": "Microsoft.VirtualMachineImages/imageTemplates", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "21fb841b-ba70-1f4e-a460-1f72fb41aa51", + "aprlGuid": "b49a39fd-f431-4b61-9062-f2157849d845", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/reliability/reliability-image-builder?toc=%2Fazure%2Fvirtual-machines%2Ftoc.json&bc=%2Fazure%2Fvirtual-machines%2Fbreadcrumb%2Ftoc.json&tabs=graph#disaster-recovery", - "name": "Image Template resiliency" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/image-builder-overview?tabs=azure-powershell#regions", - "name": "Azure Image Builder Supported Regions" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#best-practices", + "name": "Compute Gallery best practices" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "The Azure Image Builder service lacks availability zones support. Replicating Image Templates to a secondary region will enable the build of new images in secondary region.\n", + "recommendationControl": "High Availability", + "longDescription": "Keeping a minimum of 3 replicas for production images in Azure's Compute Gallery ensures scalability and prevents throttling in multi-VM deployments by distributing VM deployments across different replicas. This reduces the risk of overloading a single replica.\n", "pgVerified": true, - "description": "Replicate your Image Templates to a secondary region", - "potentialBenefits": "Enhances disaster recovery capability", - "publishedToLearn": false, + "description": "A minimum of three replicas should be kept for production image versions", + "potentialBenefits": "Enhances scalability and avoids throttling", "tags": null, - "recommendationResourceType": "Microsoft.VirtualMachineImages/imageTemplates", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Compute/galleries", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// List all Image Templates that are not replicated to another region\r\nresources\r\n| where type =~ \"microsoft.virtualmachineimages/imagetemplates\"\r\n| mv-expand distribution=properties.distribute\r\n| where array_length(parse_json(distribution).replicationRegions) == 1\r\n| project recommendationId = \"21fb841b-ba70-1f4e-a460-1f72fb41aa51\", name, id, param1=strcat(\"replicationRegions:\",parse_json(distribution).replicationRegions)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Query to list all image versions,its associated image name and version replica configurations per region in a compute gallery whose version replicas is less than 3\nresources\n| where type =~ \"microsoft.compute/galleries/images/versions\"\n| extend GalleryName = tostring(split(tostring(id), \"/\")[8]), ImageName = tostring(split(tostring(id), \"/\")[10])\n| mv-expand VersionReplicas = properties.publishingProfile.targetRegions\n| project RecommendationId=\"b49a39fd-f431-4b61-9062-f2157849d845\",name,id,tags,param1=strcat(\"GalleryName: \",GalleryName),param2=strcat(\"ImageName: \",ImageName),param3=strcat(\"VersionReplicaRegionName: \",VersionReplicas.name),param4=strcat(\"VersionReplicationCount: \",VersionReplicas.regionalReplicaCount),rc=toint(VersionReplicas.regionalReplicaCount)\n| where rc < 3\n| project-away rc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "88cb90c2-3b99-814b-9820-821a63f600dd", + "aprlGuid": "488dcc8b-f2e3-40ce-bf95-73deb2db095f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/reliability/migrate-app-service", - "name": "Migrate App Service to availability zone support" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#best-practices", + "name": "Compute Gallery best practices" }, { - "url": "https://learn.microsoft.com/en-us/azure/architecture/reference-architectures/enterprise-integration/ase-high-availability-deployment", - "name": "High availability enterprise deployment using App Service Environment" + "url": "https://learn.microsoft.com/en-us/azure/storage/common/storage-redundancy#zone-redundant-storage", + "name": "Zone-redundant storage" } ], "recommendationControl": "High Availability", - "longDescription": "Azure's feature of deploying App Service plans across availability zones enhances resiliency and reliability by ensuring operation during datacenter failures, providing redundancy without needing different regions, thus minimizing downtime and maintaining uninterrupted services.\n", - "pgVerified": false, - "description": "Migrate App Service to availability Zone Support", - "potentialBenefits": "Enhances app resiliency and reliability", - "publishedToLearn": false, + "longDescription": "Use ZRS for high availability when creating image/VM versions in Azure Compute Gallery, offering resilience against Availability Zone failures. ZRS accounts are advisable in regions with Availability Zones, with the choice of Standard_ZRS recommended over Standard_LRS for these regions.\n", + "pgVerified": true, + "description": "Zone redundant storage should be used for image versions", + "potentialBenefits": "Enhances image version availability", "tags": null, - "recommendationResourceType": "Microsoft.Web/serverFarms", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/galleries", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// The query filters the qualified App Service Plans that do not have Zone Redundancy enabled.\r\n// Its important to check regions that support availability zones for Azure App Services running on multi-tenant and App Service Environments https://learn.microsoft.com/en-us/azure/reliability/reliability-app-service?tabs=graph%2Ccli#:~:text=The%20following%20regions%20support%20Azure%20App%20Services%20running%20on%20multi%2Dtenant%20environments%3A\r\n\r\nresources\r\n| where type =~ 'microsoft.web/serverfarms'\r\n| where location in~ (\"australiaeast\", \"brazilsouth\", \"canadacentral\", \"centralindia\", \"centralus\", \"eastasia\", \"eastus\", \"eastus2\", \"francecentral\", \"germanywestcentral\", \"israelcentral\", \"italynorth\", \"japaneast\", \"japanwest\", \"koreacentral\", \"mexicocentral\", \"newzealandnorth\", \"northeurope\", \"norwayeast\", \"polandcentral\", \"qatarcentral\", \"southafricanorth\", \"southcentralus\", \"southeastasia\", \"spaincentral\", \"swedencentral\", \"switzerlandnorth\", \"uaenorth\", \"uksouth\", \"westeurope\", \"westus2\", \"westus3\", \"usgovvirginia\", \"chinanorth3\")\r\n| extend zoneRedundant = tobool(properties.zoneRedundant)\r\n| extend sku_tier = tostring(sku.tier)\r\n| where (tolower(sku_tier) contains \"isolated\" or tolower(sku_tier) contains \"premium\") and zoneRedundant == false\r\n| project recommendationId=\"88cb90c2-3b99-814b-9820-821a63f600dd\", name, id, tags, param1=sku_tier, param2=\"Not Zone Redundant\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Query to list all image versions and its associated image and gallery name whose Storage account type is not using ZRS\nresources\n| where type =~ \"microsoft.compute/galleries/images/versions\"\n| extend GalleryName = tostring(split(tostring(id), \"/\")[8]), ImageName = tostring(split(tostring(id), \"/\")[10])\n| extend StorageAccountType = tostring(properties.publishingProfile.storageAccountType)\n| where StorageAccountType !has \"ZRS\"\n| project RecommendationId=\"488dcc8b-f2e3-40ce-bf95-73deb2db095f\",name,id,tags,param1=strcat(\"GalleryName: \",GalleryName),param2=strcat(\"ImageName: \",ImageName),param3=strcat(\"StorageAccountType: \",StorageAccountType)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b2113023-a553-2e41-9789-597e2fb54c31", + "aprlGuid": "1c5e1e58-4e56-491c-8529-10f37af9d4ed", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/architecture/checklist/resiliency-per-service#app-service", - "name": "Resiliency checklist for specific Azure services" + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/azure-compute-gallery#best-practices", + "name": "Compute Gallery best practices" + }, + { + "url": "https://learn.microsoft.com/en-us/windows-server/virtualization/hyper-v/plan/should-i-create-a-generation-1-or-2-virtual-machine-in-hyper-v", + "name": "Generation 1 vs Generation 2 in Hyper-V" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/shared-image-galleries?tabs=azure-cli", + "name": "Images in Compute gallery" } ], "recommendationControl": "High Availability", - "longDescription": "Choose Standard/Premium Azure App Service Plan for robust apps with advanced scaling, high availability, better performance, and multiple slots, ensuring resilience and continuous operation.\n", - "pgVerified": false, - "description": "Use Standard or Premium tier", - "potentialBenefits": "Enhanced scaling and reliability", - "publishedToLearn": false, + "longDescription": "We recommend creating Trusted Launch Supported Images for benefits like Secure Boot, vTPM, trusted launch VMs, large boot volume. These are Gen 2 Images by default and you cannot change a VM's generation after creation, so review the considerations first.\n", + "pgVerified": true, + "description": "Consider creating TrustedLaunchSupported images where possible", + "potentialBenefits": "Enhances VM security and features", "tags": null, - "recommendationResourceType": "Microsoft.Web/serverFarms", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Compute/galleries", + "recommendationImpact": "Low", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure App Service Plans that are not in the \"Standard\", \"Premium\", or \"IsolatedV2\" SKU tiers.\r\n\r\nresources\r\n| where type =~ 'microsoft.web/serverfarms'\r\n| extend sku_tier = tostring(sku.tier)\r\n| where tolower(sku_tier) !contains \"standard\" and\r\n tolower(sku_tier) !contains \"premium\" and\r\n tolower(sku_tier) !contains \"isolatedv2\"\r\n| project recommendationId=\"b2113023-a553-2e41-9789-597e2fb54c31\", name, id, tags, param1= strcat(\"SKU=\",sku_tier)\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Query to list all images whose Hyper-V generation is not V2\nresources\n| where type =~ \"microsoft.compute/galleries/images\"\n| extend VMGeneration = properties.hyperVGeneration\n| where VMGeneration <> 'V2'\n| project RecommendationId=\"1c5e1e58-4e56-491c-8529-10f37af9d4ed\",name,id,tags,param1=strcat(\"VMGeneration: \",VMGeneration)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "07243659-4643-d44c-a1c6-07ac21635072", + "aprlGuid": "74fcb9f2-9a25-49a6-8c42-d32851c4afb7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/architecture/checklist/resiliency-per-service#app-service", - "name": "Resiliency checklist for specific Azure services" + "url": "https://learn.microsoft.com/en-us/azure/cloud-adoption-framework/scenarios/azure-vmware/eslz-management-and-monitoring#design-recommendations", + "name": "Configure Azure Service Health alerts" } ], - "recommendationControl": "Scalability", - "longDescription": "Avoid frequent scaling up/down of Azure App Service instances to prevent service disruptions. Choose the right tier and size for the workload and scale out for traffic changes, as scaling adjustments can trigger application restarts.\n", - "pgVerified": false, - "description": "Avoid scaling up or down", - "potentialBenefits": "Minimizes restarts, enhances stability", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Ensure Azure Service Health notifications are set for Azure VMware Solution across all used regions and subscriptions. This communicates service/security issues and maintenance activities like host replacements and upgrades, reducing service request submissions.\n", + "pgVerified": true, + "description": "Configure Azure Service Health notifications and alerts for Azure VMware Solution", + "potentialBenefits": "Prompt mitigation of issues.", "tags": null, - "recommendationResourceType": "Microsoft.Web/serverFarms", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure App Service Plans and the number of changes that was made to the pricing tier, if the count is higher that 3 it means you need to avoid scaling up and down that often\r\n\r\nresourcechanges\r\n| extend changeTime = todatetime(properties.changeAttributes.timestamp), targetResourceId = tostring(properties.targetResourceId),\r\nchangeType = tostring(properties.changeType), correlationId = properties.changeAttributes.correlationId,\r\nchangedProperties = properties.changes, changeCount = properties.changeAttributes.changesCount\r\n| where changeTime > ago(14d)\r\n| join kind=inner (resources | project resources_Name = name, resources_Type = type, resources_Subscription= subscriptionId, resources_ResourceGroup= resourceGroup, id) on $left.targetResourceId == $right.id\r\n| where resources_Type contains \"microsoft.web/serverfarms\"\r\n| where changedProperties['sku.name'].propertyChangeType == 'Update' or changedProperties['sku.tier'].propertyChangeType == 'Update'\r\n| summarize count() by targetResourceId, resources_Name ,tostring(changedProperties['sku.name'].previousValue), tostring(changedProperties['sku.tier'].newValue)\r\n| project recommendationId=\"07243659-4643-d44c-a1c6-07ac21635072\", name=resources_Name, id=targetResourceId, tags=\"\", param1=['changedProperties_sku.name_previousValue'], param2=['changedProperties_sku.tier_newValue'], param3=count_\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Provides a list of Azure VMware Solution resources that don't have one or more service health alerts covering AVS private clouds in the deployed subscription and region pairs.\n//full list of private clouds\n(resources\n| where ['type'] == \"microsoft.avs/privateclouds\"\n| extend locale = tolower(location)\n| extend subscriptionId = tolower(subscriptionId)\n| project id, name, tags, subscriptionId, locale)\n| join kind=leftouter\n//Alert ID's that include all incident types filtered by AVS Service Health alerts\n((resources\n| where type == \"microsoft.insights/activitylogalerts\"\n| extend alertproperties = todynamic(properties)\n| where alertproperties.condition.allOf[0].field == \"category\" and alertproperties.condition.allOf[0].equals == \"ServiceHealth\"\n| where alertproperties.condition.allOf[1].field == \"properties.impactedServices[*].ServiceName\" and set_has_element(alertproperties.condition.allOf[1].containsAny, \"Azure VMware Solution\")\n| extend locale = strcat_array(split(tolower(alertproperties.condition.allOf[2].containsAny),' '), '')\n| mv-expand todynamic(locale)\n| where locale != \"global\"\n| project subscriptionId, tostring(locale) )\n| union\n//Alert ID's that include only some of the incident types after filtering by service health alerts covering AVS private clouds.\n(resources\n| where type == \"microsoft.insights/activitylogalerts\"\n| extend subscriptionId = tolower(subscriptionId)\n| extend alertproperties = todynamic(properties)\n| where alertproperties.condition.allOf[0].field == \"category\" and alertproperties.condition.allOf[0].equals == \"ServiceHealth\"\n| where alertproperties.condition.allOf[2].field == \"properties.impactedServices[*].ServiceName\" and set_has_element(alertproperties.condition.allOf[2].containsAny, \"Azure VMware Solution\")\n| extend locale = strcat_array(split(tolower(alertproperties.condition.allOf[3].containsAny),' '), '')\n| mv-expand todynamic(locale)\n| mv-expand alertproperties.condition.allOf[1].anyOf\n| extend incidentType = alertproperties_condition_allOf_1_anyOf.equals\n| where locale != \"global\"\n| project id, subscriptionId, locale, incidentType\n| distinct subscriptionId, tostring(locale), tostring(incidentType)\n| summarize incidentTypes=count() by subscriptionId, locale\n| where incidentTypes == 5 //only include this subscription, region pair if it includes all the incident types.\n| project subscriptionId, locale)) on subscriptionId, locale\n| where subscriptionId1 == \"\" or locale1 == \"\" or isnull(subscriptionId1) or isnull(locale1)\n| project recommendationId = \"74fcb9f2-9a25-49a6-8c42-d32851c4afb7\", name, id, tags, param1 = \"avsServiceHealthAlertsAllIncidentTypesConfigured: False\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "dbe3fd66-fb2a-9d46-b162-1791e21da236", + "aprlGuid": "29d7a115-dfb6-4df1-9205-04824109548f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/architecture/checklist/resiliency-per-service#app-service", - "name": "Resiliency checklist for specific Azure services" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", + "name": "Configure and streamline alerts" } ], - "recommendationControl": "Governance", - "longDescription": "It is strongly recommended to create separate App Service plans for production and test environments to avoid using slots within your production deployment for testing purposes.\n", - "pgVerified": false, - "description": "Create separate App Service plans for production and test", - "potentialBenefits": "Protects prod performance; avoids test impact", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Set an alert for when the node count in Azure VMware Solution Private Cloud hits or exceeds 90 hosts, enabling timely planning for a new private cloud.\n", + "pgVerified": true, + "description": "Monitor when Azure VMware Solution Private Cloud is reaching the capacity limit", + "potentialBenefits": "Proactive capacity planning", "tags": null, - "recommendationResourceType": "Microsoft.Web/serverFarms", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6320abf6-f917-1843-b2ae-4779c35985ae", + "aprlGuid": "f86355e3-de7c-4dad-8080-1b0b411e66c8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/app-service/manage-automatic-scaling?tabs=azure-portal", - "name": "Automatic scaling in Azure App Service" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/autoscale/autoscale-get-started", - "name": "Auto Scale Web Apps" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", + "name": "Configure and streamline alerts" } ], - "recommendationControl": "Scalability", - "longDescription": "Enabling Autoscale/Automatic Scaling for your Azure App Service ensures sufficient resources for incoming requests. Autoscaling is rule-based, whereas Automatic Scaling, a newer feature, automatically adjusts resources based on HTTP traffic.\n", - "pgVerified": false, - "description": "Enable Autoscale/Automatic scaling to ensure adequate resources are available to service requests", - "potentialBenefits": "Optimizes resources for traffic", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Alert when the cluster size reaches 14 hosts. Set up periodic alerts for planning new clusters or datastores due to growth, especially from storage needs. Beyond 14 hosts, trigger alerts for each new host addition for proactive resource monitoring.\n", + "pgVerified": true, + "description": "Monitor when Azure VMware Solution Cluster Size is approaching the host limit", + "potentialBenefits": "Proactive resource management", "tags": null, - "recommendationResourceType": "Microsoft.Web/serverFarms", + "recommendationResourceType": "Microsoft.AVS/privateClouds", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "493f6079-3bb6-4a56-96ba-ab3248474cb1", + "aprlGuid": "9ec5b4c8-3dd8-473a-86ee-3273290331b9", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/app-service/troubleshoot-diagnostic-logs", - "name": "Enable diagnostics logging for apps in Azure App Service" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/infrastructure#implement-high-availability", + "name": "Implement high availability" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/deploy-vsan-stretched-clusters", + "name": "Stretched Clusters" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Enabling diagnostics logging for your Azure App Service is crucial for monitoring and diagnostics, including both application logging and web server logging.\n", - "pgVerified": false, - "description": "Enable diagnostics logging", - "potentialBenefits": "Monitoring and Alerting", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "For Azure VMware Solution, enabling Stretched Clusters offers 99.99% SLA, synchronous storage replication (RPO=0), and spreads vSAN datastore across two AZs. Must be done at initial setup, needing double quota due to extension across AZs.\n", + "pgVerified": true, + "description": "Enable Stretched Clusters for Multi-AZ Availability of the vSAN Datastore", + "potentialBenefits": "99.99% SLA, 0 RPO, Multi-AZ", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", + "recommendationResourceType": "Microsoft.AVS/privateClouds", "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure VMware Solution resources that aren't configured as stretched clusters and in supported regions.\nresources\n| where ['type'] == \"microsoft.avs/privateclouds\"\n| extend avsproperties = todynamic(properties)\n| where avsproperties.availability.strategy != \"DualZone\"\n| where location in (\"uksouth\", \"westeurope\", \"germanywestcentral\", \"australiaeast\")\n| project recommendationId = \"9ec5b4c8-3dd8-473a-86ee-3273290331b9\", name, id, tags, param1 = \"stretchClusters: Disabled\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a7e8bb3d-8ceb-442d-b26f-007cd63f9ffc", + "aprlGuid": "4232eb32-3241-4049-9e14-9b8005817b56", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/application-insights/app-insights-overview", - "name": "Application Insights" - }, - { - "url": "https://learn.microsoft.com/azure/azure-monitor/app/azure-web-apps", - "name": "Application monitoring for Azure App Service" + "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-alerts-for-azure-vmware-solution#supported-metrics-and-activities", + "name": "Supported metrics and activities" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Use Application Insights to monitor app performance and load behavior, offering real-time insights, issue diagnosis, and root-cause analysis. It supports ASP.NET, ASP.NET Core, Java, and Node.js on Azure App Service, now with built-in monitoring.\n", - "pgVerified": false, - "description": "Monitor Performance", - "potentialBenefits": "Real-time insights and issue diagnosis", - "publishedToLearn": false, + "longDescription": "Ensure VMware vSAN datastore slack space is maintained for SLA by monitoring storage utilization and setting alerts at 70% and 75% utilization to allow for capacity planning. To expand, add hosts or external storage like Azure Elastic SAN, Azure NetApp Files, if CPU and RAM requirements are met.\n", + "pgVerified": true, + "description": "Configure Azure Monitor Alert warning thresholds for vSAN datastore utilization", + "potentialBenefits": "Optimized capacity planning for vSAN", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n\r\n" + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure VMware Solution resources that don't have a vSAN capacity critical alert with a threshold of 75% or a warning capacity of 70%.\n(\nresources\n| where ['type'] == \"microsoft.avs/privateclouds\"\n| extend scopeId = tolower(tostring(id))\n| project ['scopeId'], name, id, tags\n| join kind=leftouter (\nresources\n| where type == \"microsoft.insights/metricalerts\"\n| extend alertProperties = todynamic(properties)\n| mv-expand alertProperties.scopes\n| mv-expand alertProperties.criteria.allOf\n| extend scopeId = tolower(tostring(alertProperties_scopes))\n| extend metric = alertProperties_criteria_allOf.metricName\n| extend threshold = alertProperties_criteria_allOf.threshold\n| project scopeId, tostring(metric), toint(['threshold'])\n| where metric == \"DiskUsedPercentage\"\n| where threshold == 75\n) on scopeId\n| where isnull(['threshold'])\n| project recommendationId = \"4232eb32-3241-4049-9e14-9b8005817b56\", name, id, tags, param1 = \"vsanCapacityCriticalAlert: isNull or threshold != 75\"\n)\n| union (\nresources\n| where ['type'] == \"microsoft.avs/privateclouds\"\n| extend scopeId = tolower(tostring(id))\n| project ['scopeId'], name, id, tags\n| join kind=leftouter (\nresources\n| where type == \"microsoft.insights/metricalerts\"\n| extend alertProperties = todynamic(properties)\n| mv-expand alertProperties.scopes\n| mv-expand alertProperties.criteria.allOf\n| extend scopeId = tolower(tostring(alertProperties_scopes))\n| extend metric = alertProperties_criteria_allOf.metricName\n| extend threshold = alertProperties_criteria_allOf.threshold\n| project scopeId, tostring(metric), toint(['threshold'])\n| where metric == \"DiskUsedPercentage\"\n| where threshold == 70\n) on scopeId\n| where isnull(['threshold'])\n| project recommendationId = \"4232eb32-3241-4049-9e14-9b8005817b56\", name, id, tags, param1 = \"vsanCapacityWarningAlert: isNull or threshold != 70\"\n)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "78a5c033-ff51-4332-8a71-83464c34494b", + "aprlGuid": "fa4ab927-bced-429a-971a-53350de7f14b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#app-service", - "name": "Resiliency checklist for specific Azure services" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#manage-logs-and-archives", + "name": "Manage logs and archives" } ], - "recommendationControl": "Scalability", - "longDescription": "If your solution includes both a web front end and a web API, decomposing them into separate App Service apps facilitates solution decomposition by workload, allowing for independent scaling. Initially, you can deploy both in the same plan and separate them for independent scaling when necessary.\n", - "pgVerified": false, - "description": "Separate web apps from web APIs", - "potentialBenefits": "Independent scaling, easier management", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Ensure Diagnostic Settings are configured for each private cloud to send syslogs to external sources for analysis and/or archiving. Azure VMware Solution Syslogs contain data for troubleshooting and performance, aiding quicker issue resolution and early detection of issues.\n", + "pgVerified": true, + "description": "Configure Syslog in Diagnostic Settings for Azure VMware Solution", + "potentialBenefits": "Faster issue resolution, early detection", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3f9ddb59-0bb3-4acb-9c9b-99aa1776f0ab", + "aprlGuid": "4ee5d535-c47b-470a-9557-4a3dd297d62f", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/checklist/resiliency-per-service#app-service", - "name": "Resiliency checklist" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", + "name": "Configure and streamline alerts" } ], - "recommendationControl": "Scalability", - "longDescription": "Creating a separate storage account for logs and not using the same one for application data prevents logging activities from reducing application performance by ensuring that the resources dedicated to handling application data are not burdened by logging processes.\n", - "pgVerified": false, - "description": "Create a separate storage account for logs", - "potentialBenefits": "Improves app performance", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Ensure sufficient compute resources to avoid host resource exhaustion in Azure VMware Solution, which utilizes vSphere DRS and HA for dynamic workload resource management. However, sustained CPU utilization over 95% may increase CPU Ready times, impacting workloads.\n", + "pgVerified": true, + "description": "Monitor CPU Utilization to ensure sufficient resources for workloads", + "potentialBenefits": "Avoids resource exhaustion, optimizes performance", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", + "recommendationResourceType": "Microsoft.AVS/privateClouds", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure VMware Solution resources that don't have a Cluster CPU capacity critical alert with a threshold of 95%.\nresources\n| where ['type'] == \"microsoft.avs/privateclouds\"\n| extend scopeId = tolower(tostring(id))\n| project ['scopeId'], name, id, tags\n| join kind=leftouter (\nresources\n| where type == \"microsoft.insights/metricalerts\"\n| extend alertProperties = todynamic(properties)\n| mv-expand alertProperties.scopes\n| mv-expand alertProperties.criteria.allOf\n| extend scopeId = tolower(tostring(alertProperties_scopes))\n| extend metric = alertProperties_criteria_allOf.metricName\n| extend threshold = alertProperties_criteria_allOf.threshold\n| project scopeId, tostring(metric), toint(['threshold'])\n| where metric == \"EffectiveCpuAverage\"\n| where threshold == 95\n) on scopeId\n| where isnull(['threshold'])\n| project recommendationId = \"4ee5d535-c47b-470a-9557-4a3dd297d62f\", name, id, tags, param1 = \"hostCpuCriticalAlert: isNull or threshold != 95\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a1d91661-32d4-430b-b3b6-5adeb0975df7", - "recommendationTypeId": "1d3b5a51-62d4-4b77-96f6-40ed0a3aa21f", + "aprlGuid": "029208c8-5186-4a76-8ee8-6e3445fef4dd", + "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/app-service-web/web-sites-staged-publishing", - "name": "Set up staging environments in Azure App Service" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/monitoring#configure-and-streamline-alerts", + "name": "Configure and streamline alerts" } ], - "recommendationControl": "Governance", - "longDescription": "Create a deployment slot for staging to deploy updates, verify them, and ensure all instances are warmed up before production swap, reducing bad update chances. An LKG slot allows easy rollback to a previous good deployment if issues arise later, enhancing reliability.\n", - "pgVerified": false, - "description": "Deploy to a staging slot", - "potentialBenefits": "Safer updates and easy rollback", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Ensure sufficient memory resources to prevent host resource exhaustion in Azure VMware Solution. It uses vSphere DRS and vSphere HA for dynamic workload management. Yet, continuous memory use over 95% leads to disk swapping, affecting workloads.\n", + "pgVerified": true, + "description": "Monitor Memory Utilization to ensure sufficient resources for workloads", + "potentialBenefits": "Avoids host exhaustion and swapping", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Display App Service with the count of deployment slots for Apps under eligible App service plans and it shows if deployment slot is enabled or not\r\n\r\nresources\r\n| where type =~ 'microsoft.web/sites' or type =~ 'microsoft.web/sites/slots'\r\n| extend isSlot = iff(type =~ 'microsoft.web/sites/slots', 1, 0)\r\n| extend AspName = iff(isSlot == 1, split(name, '/')[0], name)\r\n| extend Sku = tostring(properties.sku)\r\n| where tolower(Sku) contains \"standard\" or tolower(Sku) contains \"premium\" or tolower(Sku) contains \"isolatedv2\"\r\n| project id, name, AspName, isSlot, Sku\r\n| summarize Slots = countif(isSlot == 1) by id, name, AspName, Sku\r\n| extend DeploymentSlotEnabled = iff(Slots > 1, true, false)\r\n| where DeploymentSlotEnabled = false\r\n| project recommendationId=\"a1d91661-32d4-430b-b3b6-5adeb0975df7\", name, id, tags=\"\", param1=Sku, param2=Slots, param3=\"DeploymentSlotEnabled=false\"\r\n\r\n" + "query": "// Azure Resource Graph Query\n// Provides a list of Azure VMware Solution resources that don't have a cluster host memory critical alert with a threshold of 95%.\nresources\n| where ['type'] == \"microsoft.avs/privateclouds\"\n| extend scopeId = tolower(tostring(id))\n| project ['scopeId'], name, id, tags\n| join kind=leftouter (\nresources\n| where type == \"microsoft.insights/metricalerts\"\n| extend alertProperties = todynamic(properties)\n| mv-expand alertProperties.scopes\n| mv-expand alertProperties.criteria.allOf\n| extend scopeId = tolower(tostring(alertProperties_scopes))\n| extend metric = alertProperties_criteria_allOf.metricName\n| extend threshold = alertProperties_criteria_allOf.threshold\n| project scopeId, tostring(metric), toint(['threshold'])\n| where metric == \"UsageAverage\"\n| where threshold == 95\n) on scopeId\n| where isnull(['threshold'])\n| project recommendationId = \"029208c8-5186-4a76-8ee8-6e3445fef4dd\", name, id, tags, param1 = \"hostMemoryCriticalAlert: isNull or threshold != 95\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0b80b67c-afbe-4988-ad58-a85a146b681e", + "aprlGuid": "a5ef7c05-c611-4842-9af5-11efdc99123a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/app-service-web/web-sites-configure", - "name": "Configure web apps in Azure App Service" + "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/lock-resources", + "name": "Lock your resources to protect your infrastructure" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Use app settings for configuration and define them in Resource Manager templates or via PowerShell to facilitate part of an automated deployment/update process for improved reliability.\n", - "pgVerified": false, - "description": "Store configuration as app settings", - "potentialBenefits": "Enhanced reliability via automation", - "publishedToLearn": false, + "recommendationControl": "Governance", + "longDescription": "Applying a resource delete lock to the Azure VMware Solution Private Cloud resource group prevents unauthorized or accidental deletion by anyone with contributor access, ensuring the protection and reliability of the Azure VMware Solution Private Cloud.\n", + "pgVerified": true, + "description": "Apply Resource delete lock on the resource group hosting the private cloud", + "potentialBenefits": "Prevents accidental deletion", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure App Service resources that don't have App Settings configured\r\n\r\nappserviceresources\r\n| where type == \"microsoft.web/sites/config\"\r\n| extend AppSettings = iif(isempty(properties.AppSettings), true, false)\r\n| where AppSettings == false\r\n| project recommendationId=\"0b80b67c-afbe-4988-ad58-a85a146b681e\", id, name, tags=\"\", param1=\"AppSettings is not configured\"\r\n\r\n" + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fd049c28-ae6d-48f0-a641-cc3ba1a3fe1d", + "aprlGuid": "e0ac2f57-c8c0-4b8c-a7c8-19e5797828b5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/app-service/monitor-instances-health-check?tabs=dotnet#enable-health-check", - "name": "Monitor the health of App Service instances" + "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-customer-managed-keys?tabs=azure-portal", + "name": "Configure Customer Managed Keys" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Use Health Check for production workloads. Health check increases your application's availability by rerouting requests away from unhealthy instances, and replacing instances if they remain unhealthy. The Health check path should check critical components of your application.\n", - "pgVerified": false, - "description": "Enable Health check for App Services", - "potentialBenefits": "Enhanced reliability via automation", - "publishedToLearn": false, + "recommendationControl": "Security", + "longDescription": "When using customer-managed keys for encrypting vSAN datastores, leveraging Azure Key Vault for central management and accessing them via a managed identity linked to the private cloud is advised. The expiration of these keys can render the vSAN datastore and its associated workloads inaccessible.\n", + "pgVerified": true, + "description": "Use key autorotation for vSAN datastore customer-managed keys", + "potentialBenefits": "Avoid outages with key auto-rotation", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Check if Health Check is enabled for App Service\r\n\r\nresources\r\n| where type =~ 'microsoft.web/sites'\r\n| where properties.kind has 'app'\r\n| join kind = inner\r\n (\r\n appserviceresources\r\n | where isnull(properties.HealthCheckPath) == true\r\n | project name\r\n ) on name\r\n| project recommendationId = \"fd049c28-ae6d-48f0-a641-cc3ba1a3fe1d\", name, id, tags, param1 = \"Healthcheckpath = not set\"\r\n" + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "aab6b4a4-9981-43a4-8728-35c7ecbb746d", + "aprlGuid": "fcc2e257-23af-4c68-aac8-9cc03033c939", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/app-service/app-service-ip-restrictions?tabs=azurecli", - "name": "Set up Azure App Service access restrictions" + "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-dns-azure-vmware-solution#configure-dns-forwarder", + "name": "Configure DNS forwarder" } ], - "recommendationControl": "Governance", - "longDescription": "Use network access restrictions to define a priority-ordered allow/deny list that controls network access to your app. Web application firewalls, such as the one available in Application Gateway, are recommended for protection of public-facing web applications.\n", - "pgVerified": false, - "description": "Configure network access restrictions", - "potentialBenefits": "Enhanced security", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Azure VMware Solution private clouds support up to three DNS servers for a single FQDN, preventing a single DNS server from becoming a point of failure. It's crucial to use multiple DNS servers for on-premises FQDN resolution from each private cloud.\n", + "pgVerified": true, + "description": "Use multiple DNS servers per private FQDN zone", + "potentialBenefits": "Enhances reliability and avoids failure", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Medium", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Check if Network access restrictions defined for App service\r\n\r\nresources\r\n| where type =~ 'microsoft.web/sites'\r\n| where properties.kind has 'app'\r\n| join kind = inner\r\n (\r\n appserviceresources\r\n | mv-expand IpSecurityRestrictions = properties.IpSecurityRestrictions\r\n | where isnotnull(IpSecurityRestrictions) == true\r\n | project name\r\n ) on name\r\n| project recommendationId = \"aab6b4a4-9981-43a4-8728-35c7ecbb746d\", name, id, tags, param1 = \"No network restrictions set\"\r\n" + "recommendationResourceType": "Microsoft.AVS/privateClouds", + "recommendationImpact": "High", + "automationAvailable": false, + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9e6682ac-31bc-4635-9959-ab74b52454e6", + "aprlGuid": "e48a7227-5ec7-463a-b955-ee7cb598ded4", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.github.io/AppService/2020/05/15/Robust-Apps-for-the-cloud.html", - "name": "Ultimate guide to running healthy apps in the cloud" + "url": "https://learn.microsoft.com/azure/stream-analytics/cluster-overview", + "name": "Overview of Azure Stream Analytics Cluster" } ], "recommendationControl": "Scalability", - "longDescription": "App Service should be configured with a minimum of two instances for production workloads. If apps have a longer warmup time a minimum of three instances should be used.\n", + "longDescription": "Stream Analytics cluster (dedicated) offers more reliable performance guarantees. All the jobs running on your cluster belong only to you. You can also have access to important features like private endpoints, Auto-Scaling, Vnet Support, etc.\n", "pgVerified": false, - "description": "Set minimum instance count to 2 for app service", - "potentialBenefits": "Improves app performace", - "publishedToLearn": false, + "description": "Run jobs in your own dedicated Stream Analytics cluster for increased reliability and security", + "potentialBenefits": "Enhanced reliability and security", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", + "recommendationResourceType": "Microsoft.StreamAnalytics/streamingjobs", "recommendationImpact": "Medium", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of App services that do not have minimum instance count of 2\r\n\r\nresources\r\n| where type =~ 'microsoft.web/sites'\r\n| where properties.kind has 'app'\r\n| join kind = inner\r\n (\r\n appserviceresources\r\n | where properties.PreWarmedInstanceCount < 2\r\n | project name\r\n ) on name\r\n| project recommendationId = \"9e6682ac-31bc-4635-9959-ab74b52454e6\", name, id, tags, param1 = \"PreWarmedInstanceCount is less than 2\"\r\n" + "query": "// Azure Resource Graph Query\n// Find all Azure Stream Analytics jobs that are not associated with a dedicated cluster\nresources\n| where type =~ \"Microsoft.StreamAnalytics/streamingjobs\"\n| where isnull(properties.cluster.id)\n| project recommendationId = \"e48a7227-5ec7-463a-b955-ee7cb598ded4\", name, id, tags\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c6c4b962-5af4-447a-9d74-7b9c53a5dff5", + "aprlGuid": "5d40d3d4-179d-4cf5-ac24-901210f512e7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.github.io/AppService/2018/09/10/Announcing-the-New-Auto-Healing-Experience-in-App-Service-Diagnostics.html", - "name": "Announcing the New Auto Healing Experience in App Service Diagnostics - Azure App Service" + "url": "https://learn.microsoft.com/azure/stream-analytics/stream-analytics-streaming-unit-consumption", + "name": "Understand and adjust streaming units" } ], "recommendationControl": "High Availability", - "longDescription": "Auto Heal allows you to mitigate your apps when it runs into unexpected situations like HTTP server errors, resource exhaustion, etc. You can configure different triggers based on your need and choose to recycle the app to recover it from a bad state.\n", + "longDescription": "Configure Autoscale to allow your job to dynamically change the allocated number of Streaming Units (SU) based on load, metrics, and/or schedule.\n", "pgVerified": false, - "description": "Enable auto heal for Functions App", - "potentialBenefits": "Improved app availability", - "publishedToLearn": false, + "description": "Migrate Stream Analytics jobs to StandardV2 SKU", + "potentialBenefits": "Enhanced reliability and security", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.StreamAnalytics/streamingjobs", + "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Provides a list of Azure Function App resources that do not have auto heal enabled\r\n\r\nResources\r\n| where type =~ 'microsoft.web/sites'\r\n| where properties.kind contains 'functionapp'\r\n| join kind=inner\r\n (appserviceresources\r\n | where type == \"microsoft.web/sites/config\"\r\n | where properties.AutoHealEnabled == 'false'\r\n | project id, name, tenantId, location, resourceGroup, properties.AutoHealEnabled\r\n ) on name\r\n| project recommendationID = \"c6c4b962-5af4-447a-9d74-7b9c53a5dff5\", name, id, type, kind, param1=\"AutoHealEnabled =false\"\r\n" + "query": "// Azure Resource Graph Query\n// Find all Azure Stream Analytics jobs that are not using the latest version of the service.\nresources\n| where type =~ \"Microsoft.StreamAnalytics/streamingjobs\"\n| where properties.sku.name !~ \"StandardV2\"\n| project recommendationId = \"5d40d3d4-179d-4cf5-ac24-901210f512e7\", name, id, tags\n" }, { "publishedToAdvisor": null, - "aprlGuid": "52f368ee-1d77-4b34-92db-64be269642d0", + "aprlGuid": "84636c6c-b317-4722-b603-7b1ffc16384b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-warmup?tabs=in-process%2Cnodejs-v4&pivots=programming-language-csharp#trigger", - "name": "Azure Functions Warmup Trigger" + "url": "https://learn.microsoft.com/azure/event-hubs/event-hubs-geo-dr?tabs=portal#availability-zones", + "name": "Azure Event Hubs - Geo-disaster recovery" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Add a warmup trigger to pre-load custom dependencies during the pre-warming process so that your functions are ready to start processing requests immediately.\n", - "pgVerified": false, - "description": "No warmup trigger added to Function App", - "potentialBenefits": "Improved app availability", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "When using the Azure portal, zone redundancy is automatically enabled. However, some Infrastructure as Code (IaC) tools may default this to false. To ensure replication of metadata and events across data centers in an availability zone, always verify that zone redundancy is enabled.\n", + "pgVerified": true, + "description": "Ensure zone redundancy is enabled in supported regions", + "potentialBenefits": "Enhanced fault tolerance for Event Hub", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.EventHub/namespaces", + "recommendationImpact": "High", + "automationAvailable": true, + "query": null }, { "publishedToAdvisor": null, - "aprlGuid": "0b06a688-0dd6-4d73-9f72-6666ff853ca9", + "aprlGuid": "fbfef3df-04a5-41b2-a8fd-b8541eb04956", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules", - "name": "Resource naming restrictions - Azure Resource Manager" + "url": "https://learn.microsoft.com/azure/event-hubs/event-hubs-auto-inflate", + "name": "Azure Event Hubs - Automatically scale throughput units" } ], - "recommendationControl": "Governance", - "longDescription": "A host ID must be between 1 and 32 characters, contain only lowercase letters, numbers, and dashes, not start or end with a dash, and not contain consecutive dashes. The host ID value should be unique for all apps/slots you're running.\n", - "pgVerified": false, - "description": "Ensure unique hostid set for Function App", - "potentialBenefits": "Easier management", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Enable auto-inflate on Event Hub Standard tier namespaces to automatically scale up throughput units (TUs), meeting usage needs and preventing data ingress or egress throttle scenarios by adjusting to allowed rates.\n", + "pgVerified": true, + "description": "Enable auto-inflate on Event Hub Standard tier", + "potentialBenefits": "Prevents throttling by autoscaling TUs", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.EventHub/namespaces", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Event Hub namespace instances that are Standard tier and do not have Auto Inflate enabled\nresources\n| where type == \"microsoft.eventhub/namespaces\"\n| where sku.tier == \"Standard\"\n| where properties.isAutoInflateEnabled == \"false\"\n| project recommendationId = \"fbfef3df-04a5-41b2-a8fd-b8541eb04956\", name, id, tags, param1 = \"AutoInflateEnabled: False\"\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c9a278b7-024b-454b-bd54-41587c512b74", + "aprlGuid": "499769ae-67c9-492e-9ca5-cfd4cece5209", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-functions/migrate-version-3-version-4?tabs=net6-in-proc%2Cazure-cli%2Cwindows&pivots=programming-language-csharp", - "name": "Migrate version 3.x to 4.x" + "url": "https://learn.microsoft.com/azure/virtual-desktop/autoscale-scaling-plan?tabs=portal", + "name": "Create and assign an autoscale scaling plan" } ], - "recommendationControl": "Governance", - "longDescription": "Beginning on December 13, 2022, function apps running on versions 2.x and 3.x of the Azure Functions runtime have reached the end of life (EOL) of extended support. We highly recommend you migrating your function apps to version 4.x of the Functions runtime.\n", - "pgVerified": false, - "description": "Ensure Function App runs a supported version", - "potentialBenefits": "Better governance", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Each region has its own scaling plans assigned to host pools within that region. However, these plans can become inaccessible if there's a regional failure. To mitigate this risk, it's advisable to create a secondary scaling plan in another region.\n", + "pgVerified": true, + "description": "Scaling plans should be created per region and not scaled across regions", + "potentialBenefits": "Enhanced scaling", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", + "recommendationResourceType": "Microsoft.DesktopVirtualization/scalingPlans", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7c608f46-46b2-4cc0-bbd6-1d457c16671c", + "aprlGuid": "013ac34e-7c4b-425f-9e0c-216f0cc06181", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-functions/functions-app-settings#functions_worker_runtime", - "name": "FUNCTIONS_WORKER_RUNTIME" + "url": "https://learn.microsoft.com/azure/virtual-desktop/configure-validation-environment?tabs=azure-portal", + "name": "Configure a host pool as a validation environment" } ], "recommendationControl": "Governance", - "longDescription": "The FUNCTIONS_WORKER_RUNTIME setting in the Function App configuration should be set to the appropriate value based on the language you are using. This setting is used to determine the language worker that will be used to execute your functions.\n", - "pgVerified": false, - "description": "Ensure FUNCTIONS_WORKER_RUNTIME is set properly", - "potentialBenefits": "Better governance", - "publishedToLearn": false, + "longDescription": "Create a Validation Pool for early issue detection with planned AVD updates. Adjust limits based on needs. Scale by adding multiple host pools for more users. Regularly test updates on host pools. Validate changes before applying to main environment to avoid downtime.\n", + "pgVerified": true, + "description": "Create a validation host pool for testing of planned updates", + "potentialBenefits": "Enhanced environment stability", "tags": null, - "recommendationResourceType": "Microsoft.Web/sites", + "recommendationResourceType": "Microsoft.DesktopVirtualization/hostPools", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0bf1a2bb-7617-4ab2-a784-e7ea40c5f01b", + "aprlGuid": "979ff8be-5f3a-4d8e-9aa3-407ecdd6d6f7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/azure-virtual-desktop/monitoring#resource-health", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/virtual-desktop/scheduled-agent-updates", + "name": "Scheduled Agent Updates for Azure Virtual Desktop host pools" } ], "recommendationControl": "Governance", - "longDescription": "Set up Service Health alerts so that you stay aware of service issues, planned maintenance, or other changes that might affect your Azure Virtual Desktop resources. Use Resource Health to monitor your VMs and storage solutions.\n", + "longDescription": "Create maintenance schedules for AVD agent updates to avoid disruptions. Use Scheduled Agent Updates to set maintenance windows for updating Azure Virtual Desktop agent, side-by-side stack, and Geneva Monitoring agent.\n", "pgVerified": true, - "description": "Monitor service health and resource health for AVD", - "potentialBenefits": "Enhanced AVD error tracking and resolution", - "publishedToLearn": false, + "description": "Configure host pool scheduled agent updates", + "potentialBenefits": "Enhanced environment stability", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "High", - "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// This resource graph query, will return rows if service health alerts haven't been configured for AVD service\r\nresourcecontainers\r\n| where type == 'microsoft.resources/subscriptions'\r\n| project subscriptionAlerts=tostring(id),name,tags\r\n| join kind=leftouter (\r\n resources\r\n | where type == 'microsoft.insights/activitylogalerts' and properties.condition contains \"ServiceHealth\"\r\n | extend subscriptions = properties.scopes\r\n | project subscriptions\r\n | mv-expand subscriptions\r\n | project subscriptionAlerts = tostring(subscriptions)\r\n) on subscriptionAlerts\r\n| where isempty(subscriptionAlerts1)\r\n| project-away subscriptionAlerts1\r\n| project recommendationId = \"0bf1a2bb-7617-4ab2-a784-e7ea40c5f01b\",id=subscriptionAlerts,name,tags, param1 = \"AVDServiceHealthAlertsConfigured: False\"\r\n" + "recommendationResourceType": "Microsoft.DesktopVirtualization/hostPools", + "recommendationImpact": "Medium", + "automationAvailable": false, + "query": "// Azure Resource Graph Query\n// This resource graph query will return all AVD host pools that does not have scheduled agent updates configured\nresources\n| where type =~ \"Microsoft.DesktopVirtualization/hostpools\"\n| where isnull(properties.agentUpdate)\n| project recommendationId = \"979ff8be-5f3a-4d8e-9aa3-407ecdd6d6f7\", name, id, tags, param1 = 'No scheduled agent updates'\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0cf72d91-644d-4591-9bb7-84ba3f705a41", + "aprlGuid": "939cb85c-102a-4e0a-ab82-5c92116d3778", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/insights?tabs=monitor", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/windows-server/identity/ad-ds/deploy/virtual-dc/adds-on-azure-vm#configure-the-vms-and-install-active-directory-domain-services", + "name": "Configure the VMs and install Active Directory Domain Services" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Configure AVD insights workbook template to monitor and troubleshoot AVD workloads across metrics, logs, events, and more. Both Production and DR workloads should be enabled with AVD Insights.\n", + "recommendationControl": "Governance", + "longDescription": "For optimized AVD configuration, place Hybrid VMs in unique OUs. Segregate Prod and DR units for environment-specific settings. This ensures targeted configurations for session hosts, including FSLogix, timeouts, and session controls.\n", "pgVerified": true, - "description": "Configure AVD Insights workbook", - "potentialBenefits": "Enhanced AVD monitoring and troubleshooting", - "publishedToLearn": false, + "description": "Ensure a unique OU is used when deploying host pools with domain joined session hosts", + "potentialBenefits": "Improved AVD hostpool config & segmentation", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.DesktopVirtualization/hostPools", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "89b4d8f6-6345-4d66-9012-c3fc2aef94e8", + "aprlGuid": "38721758-2cc2-4d6b-b7b7-8b47dadbf7df", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/diagnostics-log-analytics", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/site-recovery/site-recovery-overview", + "name": "About Site Recovery" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "Having separate Log Analytics ensures that your DR environment is fully operational for visibility of the metrics, performance, and other auditing tools your workload teams will rely on in the event of an incident.\n", + "longDescription": "Implement Azure Site Recovery (ASR) or Azure Backup for personal host pools to enable seamless failover and failback. This replicates VMs supporting personal desktops to a secondary Azure region, ensuring recovery from a known state in case of a disaster or outage.\n", "pgVerified": true, - "description": "Ensure separate log analytics workspaces for Prod and DR", - "potentialBenefits": "Improved DR visibility and operation", - "publishedToLearn": false, + "description": "Use Azure Site Recovery or backups to protect VMs supporting personal desktops", + "potentialBenefits": "Ensures VM recovery & failover", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Compute/virtualMachines", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "204b56b0-9710-4c16-b506-bafb5fb318ed", + "aprlGuid": "9437634c-d69e-2747-b13e-631c13182150", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cloud-adoption-framework/scenarios/azure-virtual-desktop/enterprise-scale-landing-zone", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/architecture/guide/technology-choices/load-balancing-overview", + "name": "Azure Load Balancing Options" + }, + { + "url": "https://learn.microsoft.com/azure/traffic-manager/traffic-manager-overview", + "name": "Azure Traffic Manager" + }, + { + "url": "https://learn.microsoft.com/azure/frontdoor/front-door-overview", + "name": "Azure Front Door" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/architecture/guide/networking/global-web-applications/mission-critical-content-delivery", + "name": "Mission-critical global content delivery" } ], - "recommendationControl": "Governance", - "longDescription": "Follow AVD Landing Zone best practices using multiple resource groups based on resource type and associated shared resources for AVD workloads.\n", + "recommendationControl": "Business Continuity", + "longDescription": "For most solutions, choose either Azure Front Door for content caching, CDN, TLS termination, and WAF, or Traffic Manager for simple global load balancing.\n", "pgVerified": true, - "description": "Organize AVD resources using the AVD scale unit model described by the AVD landing zone methodology", - "potentialBenefits": "Enhanced organization and scalability", - "publishedToLearn": false, + "description": "Avoid combining Traffic Manager and Front Door", + "potentialBenefits": "Optimized network routing and security", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Avoid combining Traffic Manager and Front Door\nresources\n| where type == \"microsoft.network/trafficmanagerprofiles\"\n| mvexpand(properties.endpoints)\n| extend endpoint=tostring(properties_endpoints.properties.target)\n| project name, trafficmanager=id, matchname=endpoint, tags\n| join (\n resources\n | where type =~ \"microsoft.cdn/profiles/afdendpoints\"\n | extend matchname= tostring(properties.hostName)\n | extend splitid=split(id, \"/\")\n | extend frontdoorid=tolower(strcat_array(array_slice(splitid, 0, 8), \"/\"))\n | project name, id, matchname, frontdoorid, type\n | union\n (cdnresources\n | where type =~ \"Microsoft.Cdn/Profiles/CustomDomains\"\n | extend matchname= tostring(properties.hostName)\n | extend splitid=split(id, \"/\")\n | extend frontdoorid=tolower(strcat_array(array_slice(splitid, 0, 8), \"/\"))\n | project name, id, matchname, frontdoorid, type)\n )\n on matchname\n| project\n recommendationId = \"9437634c-d69e-2747-b13e-631c13182150\",\n name=split(trafficmanager, \"/\")[-1],\n id=trafficmanager,\n tags,\n param1=strcat(\"hostname:\", matchname),\n param2=strcat(\"frontdoorid:\", frontdoorid)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ef4b3561-c85f-47cf-8cb0-51fae9ddf929", + "aprlGuid": "6c40b7ae-2bea-5748-be1a-9e9e3b834649", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/azure-virtual-desktop/business-continuity#capacity-planning", - "name": "Capacity Planning" - }, - { - "url": "https://learn.microsoft.com/azure/architecture/example-scenario/wvd/windows-virtual-desktop#azure-virtual-desktop-limitations", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/origin-security?tabs=app-service-functions&pivots=front-door-standard-premium", + "name": "Secure traffic to Azure Front Door origins" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Monitor and plan for subscription limits and API throttling limits. Keep track of resource usage within your subscription. Consider scaling across multiple subscriptions if further scaling is required.\nTo handle a large number of users, consider scaling horizontally by creating multiple host pools.\n", + "recommendationControl": "Security", + "longDescription": "Front Door's features perform optimally when traffic exclusively comes through Front Door. It's advised to set up your origin to deny access to traffic that bypasses Front Door.\n", "pgVerified": true, - "description": "Monitor and plan capacity for AVD resources", - "potentialBenefits": "Avoids limits, ensures smooth scaling", - "publishedToLearn": false, + "description": "Restrict traffic to your origins", + "potentialBenefits": "Enhances security and performance", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e1a34ac6-8761-4020-b537-d60c0be7514e", - "recommendationTypeId": null, + "aprlGuid": "52bc9a7b-23c8-bc4c-9d2a-7bc43b50104a", + "recommendationTypeId": "e607041e-3194-42ad-9994-b6ea5ec12f5e", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/example-scenario/azure-virtual-desktop/azure-virtual-desktop-multi-region-bcdr", - "name": "Learn More" + "url": "https://learn.microsoft.com/rest/api/frontdoor/", + "name": "REST API Reference" + }, + { + "url": "https://learn.microsoft.com/java/api/overview/azure/resourcemanager-frontdoor-readme?view=azure-java-preview", + "name": "Client library for Java" + }, + { + "url": "https://learn.microsoft.com/python/api/overview/azure/front-door?view=azure-python", + "name": "SDK for Python" } ], - "recommendationControl": "High Availability", - "longDescription": "Active Directory Domain Services (AD DS) integrated DNS/other should target Secondary/Tertiary customer DNS across multi-region zones. If using custom DNS, ensure there are redundant DNS servers to avoid a single point of failure.\n", + "recommendationControl": "Scalability", + "longDescription": "When working with Azure Front Door through APIs, ARM templates, Bicep, or SDKs, using the latest API or SDK version is crucial. Updates bring new functions, important security patches, and bug fixes.\n", "pgVerified": true, - "description": "Ensure DNS regions are replicated to avoid single point of failure", - "potentialBenefits": "Improves uptime & resilience", - "publishedToLearn": false, + "description": "Use the latest API version and SDK version", + "potentialBenefits": "Enhanced security and features", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0714d039-535e-468d-9732-e32b5c094faa", + "aprlGuid": "1ad74c3c-e3d7-0046-b83f-a2199974ef15", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/example-scenario/wvd/azure-virtual-desktop-multi-region-bcdr", - "name": "Multi-region BCDR" + "url": "https://learn.microsoft.com/azure/frontdoor/front-door-diagnostics?pivots=front-door-standard-premium", + "name": "Monitor metrics and logs in Azure Front Door" }, { - "url": "https://learn.microsoft.com/azure/well-architected/azure-virtual-desktop/business-continuity#active-active-scenarios", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-monitor?pivots=front-door-standard-premium#waf-logs", + "name": "WAF logs" + }, + { + "url": "https://learn.microsoft.com/azure/frontdoor/standard-premium/how-to-logs", + "name": "Configure Azure Front Door logs" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "It is recommended to adopt a multi-region deployment (active-active or active-passive) for AVD. Each region should contain at least identity, name resolution, AVD management resources, and session hosts in case of a primary region outage.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Front Door logs offer comprehensive telemetry on each request, crucial for understanding your solution's performance and responses, especially when caching is enabled, as origin servers might not receive every request.\n", "pgVerified": true, - "description": "Implement a multi-region BCDR Plan", - "potentialBenefits": "Enhanced resilience and uptime", - "publishedToLearn": false, + "description": "Configure logs", + "potentialBenefits": "Enhanced insights and solution monitoring", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ed1f0327-0914-49e8-9518-16acb0d6b8d6", + "aprlGuid": "d9bd6780-0d6f-cd4c-bc66-8ddcab12f3d1", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/storage/files/storage-files-scale-targets", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/end-to-end-tls?pivots=front-door-standard-premium", + "name": "End-to-end TLS with Azure Front Door" } ], - "recommendationControl": "Scalability", - "longDescription": "To maximize capacity and performance scaling it is recommended to creat only one file share per Azure files storage account, with this approach the single file share will be able to grow to the maximum capacities of the storage account.\n", + "recommendationControl": "Security", + "longDescription": "Front Door terminates TCP and TLS connections from clients and establishes new connections from each PoP to the origin. Securing these connections with TLS, even for Azure-hosted origins, ensures data is always encrypted during transit.\n", "pgVerified": true, - "description": "Create only one FSLogix file share per Storage Account", - "potentialBenefits": "Enhanced scaling and performance", - "publishedToLearn": false, + "description": "Use end-to-end TLS", + "potentialBenefits": "Ensures data encryption in transit", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Use end-to-end TLS\ncdnresources\n| where type == \"microsoft.cdn/profiles/afdendpoints/routes\"\n| extend forwardingProtocol=tostring(properties.forwardingProtocol),supportedProtocols=properties.supportedProtocols\n| project id,name,forwardingProtocol,supportedProtocols,tags\n| where forwardingProtocol !~ \"httpsonly\" or supportedProtocols has \"http\"\n| project recommendationId= \"d9bd6780-0d6f-cd4c-bc66-8ddcab12f3d1\", name,id,tags,param1=strcat(\"forwardingProtocol:\",forwardingProtocol),param2=strcat(\"supportedProtocols:\",supportedProtocols)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ff916698-7507-4519-b545-c94dd81d73c5", + "aprlGuid": "24ab9f11-a3e4-3043-a985-22cf94c4933a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/create-host-pools-user-profile", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/front-door-how-to-redirect-https#create-http-to-https-redirect-rule", + "name": "Create HTTP to HTTPS redirect rule" } ], - "recommendationControl": "Scalability", - "longDescription": "To maximize capacity and performance scaling of the file share service and avoid user's profile contention, it is recommended to create one file share target and FSLogix setup per host pool.\n", + "recommendationControl": "Security", + "longDescription": "Using HTTPS is ideal for secure connections. However, for compatibility with older clients, HTTP requests may be necessary. Azure Front Door enables auto redirection of HTTP to HTTPS, enhancing security without sacrificing accessibility.\n", "pgVerified": true, - "description": "Create a dedicated FSLogix file share and setup per host pool", - "potentialBenefits": "Enhanced performance", - "publishedToLearn": false, + "description": "Use HTTP to HTTPS redirection", + "potentialBenefits": "Enhances security and compliance", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n" + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Use HTTP to HTTPS redirection\ncdnresources\n| where type == \"microsoft.cdn/profiles/afdendpoints/routes\"\n| extend httpsRedirect=tostring(properties.httpsRedirect)\n| project id,name,httpsRedirect,tags\n| where httpsRedirect !~ \"enabled\"\n| project recommendationId= \"24ab9f11-a3e4-3043-a985-22cf94c4933a\", name,id,tags,param1=strcat(\"httpsRedirect:\",httpsRedirect)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0025ed2e-41f4-4ada-93c1-12484cef8b0c", + "aprlGuid": "29d65c41-2fad-d142-95eb-9eab95f6c0a5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/fslogix/overview-what-is-fslogix", - "name": "FSLogix" - }, - { - "url": "https://learn.microsoft.com/azure/backup/blob-backup-configure-manage?tabs=operational-backup", - "name": "Backup Storage Account" + "url": "https://learn.microsoft.com/azure/frontdoor/standard-premium/how-to-configure-https-custom-domain?tabs=powershell", + "name": "Configure HTTPS on an Azure Front Door custom domain using the Azure portal" } ], - "recommendationControl": "High Availability", - "longDescription": "It is recommended to enable backup on the FSLogix Storage Account. Ensuring the user profiles are resilient will allow user data and experience to be consistent through outages.\n", + "recommendationControl": "Security", + "longDescription": "When Front Door manages your TLS certificates, it reduces your operational costs and helps you to avoid costly outages caused by forgetting to renew a certificate. Front Door automatically issues and rotates the managed TLS certificates.\n", "pgVerified": true, - "description": "Enable Azure backup for FSLogix storage account file shares", - "potentialBenefits": "Ensures data resilience and consistency", - "publishedToLearn": false, + "description": "Use managed TLS certificates", + "potentialBenefits": "Lowers costs, avoids outages", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3835b4b3-0479-4be8-9ffd-34ae29fa33b9", - "recommendationTypeId": null, + "aprlGuid": "4638c2c0-03de-6d42-9e09-82ee4478cbf3", + "recommendationTypeId": "2c057605-4707-4d3e-bbb0-a7fe9b6a626b", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/rdp-shortpath?tabs=managed-networks", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/standard-premium/how-to-configure-https-custom-domain?tabs=powershell#select-the-certificate-for-azure-front-door-to-deploy", + "name": "Select the certificate for Azure Front Door to deploy" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "RDP Shortpath establishes a direct UDP-based connection between a client and the session host. By default, Remote Desktop Protocol (RDP) tries to establish connection using UDP and uses a TCP-based reverse connect transport as a fallback connection mechanism. UDP-based transport offers better connection reliability and more consistent latency.\n", + "recommendationControl": "High Availability", + "longDescription": "If you use your own TLS certificates, set the Key Vault certificate version to 'Latest' to avoid reconfiguring Azure Front Door for new certificate versions and waiting for deployment across Front Door's environments.\n", "pgVerified": true, - "description": "Implement RDP shortpath for public or managed networks", - "potentialBenefits": "Better reliability and consistent latency", - "publishedToLearn": false, + "description": "Use latest version for customer-managed certificates", + "potentialBenefits": "Saves time and automates TLS updates", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Cdn/profiles", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e718ac1a-ebab-4f75-9e4a-1a5ccef20d1f", + "aprlGuid": "cd6a32af-747a-e649-82a7-a98f528ca842", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/troubleshoot-rdp-shortpath", - "name": "Learn More" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-desktop/check-access-validate-required-fqdn-endpoint", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/architecture/best-practices/host-name-preservation", + "name": "Preserve the original HTTP host name between a reverse proxy and its back-end web application" } ], "recommendationControl": "Governance", - "longDescription": "Ensure that AVD session hosts can effectively communicate with the AVD control plane and that UDP ports are open if UDP is utilized. Validate the connectivity of VMs to the AVD Control Plane and confirm the accessibility of UDP TURN ports. Whitelist global URLs and ensure that UDP/TURN ports are open and accessible to facilitate smooth user connections.\n", + "longDescription": "Front Door can rewrite Host headers for custom domain names routing to a single origin, useful for avoiding custom domain configuration at both Front Door and the origin.\n", "pgVerified": true, - "description": "Ensure AVD session hosts connect to control plane & allow UDP ports for RDP shortpath", - "potentialBenefits": "Enhanced performance & user experience", - "publishedToLearn": false, + "description": "Use the same domain name on Front Door and your origin", + "potentialBenefits": "Improves session/auth handling", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Cdn/profiles", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d984eaf9-0fa1-4f8d-a326-bda751993c6f", + "aprlGuid": "1bd2b7e8-400f-e64a-99a2-c572f7b08a62", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/entra/identity/hybrid/connect/how-to-connect-install-multiple-domains", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/web-application-firewall", + "name": "Web Application Firewall on Azure Front Door" } ], "recommendationControl": "Security", - "longDescription": "Hybrid - Entra ID Connect best to run in Azure but can be hosted on-prem. Secondary or more VMs should be setup in staging mode in event of failover.\nSet up secondary server in staging mode for Entra Connect for syncing to Entra in case of primary server outage.\n", + "longDescription": "For internet-facing applications, enabling the Front Door web application firewall (WAF) and configuring it to use managed rules is recommended for protection against a wide range of attacks using Microsoft-managed rules.\n", "pgVerified": true, - "description": "Ensure secondary Entra ID connect synchronization server", - "potentialBenefits": "Improved failover reliability", - "publishedToLearn": false, + "description": "Enable the WAF", + "potentialBenefits": "Enhances web app security", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Low", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Enable the WAF\n\nresources\n| where type =~ \"microsoft.cdn/profiles\" and sku has \"AzureFrontDoor\"\n| project name, cdnprofileid=tolower(id), tostring(tags), resourceGroup, subscriptionId,skuname=tostring(sku.name)\n| join kind= fullouter (\n cdnresources\n | where type == \"microsoft.cdn/profiles/securitypolicies\"\n | extend wafpolicyid=tostring(properties['parameters']['wafPolicy']['id'])\n | extend splitid=split(id, \"/\")\n | extend cdnprofileid=tolower(strcat_array(array_slice(splitid, 0, 8), \"/\"))\n | project secpolname=name, cdnprofileid, wafpolicyid\n )\n on cdnprofileid\n| project name, cdnprofileid, secpolname, wafpolicyid,skuname\n| join kind = fullouter (\n resources\n | where type == \"microsoft.network/frontdoorwebapplicationfirewallpolicies\"\n | extend\n managedrulesenabled=iff(tostring(properties.managedRules.managedRuleSets) != \"[]\", true, false),\n enabledState = tostring(properties.policySettings.enabledState)\n | project afdwafname=name, managedrulesenabled, wafpolicyid=id, enabledState, tostring(tags)\n )\n on wafpolicyid\n| where name != \"\"\n| summarize\n associatedsecuritypolicies=countif(secpolname != \"\"),\n wafswithmanagedrules=countif(managedrulesenabled == 1)\n by name, id=cdnprofileid, tags,skuname\n| where associatedsecuritypolicies == 0 or wafswithmanagedrules == 0\n| project\n recommendationId = \"1bd2b7e8-400f-e64a-99a2-c572f7b08a62\",\n name,\n id,\n todynamic(tags),\n param1 = strcat(\"associatedsecuritypolicies:\", associatedsecuritypolicies),\n param2 = strcat(\"wafswithmanagedrules:\", wafswithmanagedrules),\n param3 = strcat(\"skuname:\",skuname)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "db1727d1-5c8e-4a01-a31e-f0d58cfd95b1", + "aprlGuid": "38f3d542-6de6-a44b-86c6-97e3be690281", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/expressroute/designing-for-disaster-recovery-with-expressroute-privatepeering#need-for-redundant-connectivity-solution", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/health-probes", + "name": "Health probes" } ], "recommendationControl": "High Availability", - "longDescription": "For high availability connections back to on-premises data centers should consider backup paths across the regions that have been utilized. Ensure redundancy in routing by having a secondary route table in the secondary region.\n", + "longDescription": "Front Door health probes help detect unavailable or unhealthy origins, directing traffic to alternate origins if needed.\n", "pgVerified": true, - "description": "Ensure virtual networks have route tables/route server configured for all regions", - "potentialBenefits": "Enhanced availability & routing", - "publishedToLearn": false, + "description": "Disable health probes when there is only one origin in an origin group", + "potentialBenefits": "Reduces unnecessary origin traffic", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Disable health probes when there is only one origin in an origin group\ncdnresources\n| where type =~ \"microsoft.cdn/profiles/origingroups\"\n| extend healthprobe=tostring(properties.healthProbeSettings)\n| project origingroupname=name, id, tags, resourceGroup, subscriptionId, healthprobe\n| join (\n cdnresources\n | where type =~ \"microsoft.cdn/profiles/origingroups/Origins\"\n | extend origingroupname = tostring(properties.originGroupName)\n )\n on origingroupname\n| summarize origincount=count(), enabledhealthprobecount=countif(healthprobe != \"\") by origingroupname, id, tostring(tags), resourceGroup, subscriptionId\n| where origincount == 1 and enabledhealthprobecount != 0\n| project\n recommendationId = \"38f3d542-6de6-a44b-86c6-97e3be690281\",\n name=origingroupname,\n id,\n todynamic(tags),\n param1 = strcat(\"origincount:\", origincount),\n param2 = strcat(\"enabledhealthprobecount:\", enabledhealthprobecount)\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "37d1091b-e599-4548-a067-a9286be16e45", + "aprlGuid": "5225bba3-28ec-1e43-8986-7eedfd466d65", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/cloud-adoption-framework/ready/azure-best-practices/plan-for-ip-addressing", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/architecture/patterns/health-endpoint-monitoring", + "name": "Health Endpoint Monitoring pattern" } ], - "recommendationControl": "Business Continuity", - "longDescription": "NSG and ASG per AVD persona and IP space per Prod/DR regions.\nIt's important your organization plans for IP addressing in Azure. Planning ensures the IP address space doesn't overlap across on-premises locations and Azure regions. Overlapping IP address spaces across on-premises and Azure regions create major contention challenges.\n", + "recommendationControl": "High Availability", + "longDescription": "Consider selecting a webpage or location specifically designed for health monitoring as the endpoint for Azure Front Door's health probes. This should encompass the status of critical components like application servers, databases, and caches to serve production traffic efficiently.\n", "pgVerified": true, - "description": "Ensure virtual networks isolation with separate IP space and NSGs for Prod and DR", - "potentialBenefits": "Enhances security and prevents IP conflicts", - "publishedToLearn": false, + "description": "Select good health probe endpoints", + "potentialBenefits": "Improves traffic routing and uptime", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Cdn/profiles", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1c6c97d7-4d03-4f53-985d-fa239f715173", + "aprlGuid": "5783defe-b49e-d947-84f7-d8677593f324", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-network/service-tags-overview", - "name": "Learn More" - }, - { - "url": "https://learn.microsoft.com/azure/virtual-network/virtual-networks-udr-overview", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/health-probes#supported-http-methods-for-health-probes", + "name": "Supported HTTP methods for health probes" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Ensure that Route Tables have static routes to allow session host traffic that targets AVD control plane to go outbound directly out of the subnet to the internet (next hop), this will avoid any delays of inspecting or adding additional hops in the communication of trusted traffic.\n", + "recommendationControl": "Scalability", + "longDescription": "Health probes in Azure Front Door can use GET or HEAD HTTP methods. Using the HEAD method for health probes is a recommended practice because it reduces the traffic load on your origins, being less resource-intensive.\n", "pgVerified": true, - "description": "Configure static routes for session hosts to directly access the AVD control plane subnet", - "potentialBenefits": "Enhanced performance and Disaster Recovery", - "publishedToLearn": false, + "description": "Use HEAD health probes", + "potentialBenefits": "Reduces traffic load on origins", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Cdn/profiles", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2831dab9-6a43-44a1-8aec-90a8e84894bc", + "aprlGuid": "b515690d-3bf9-3a49-8d38-188e0fd45896", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/training/modules/create-manage-session-host-image/", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/web-application-firewall/afds/waf-front-door-geo-filtering", + "name": "Geo filter WAF policy - GeoMatch" } ], - "recommendationControl": "Governance", - "longDescription": "Establish a systematic process for handling image updates within your Azure Virtual Desktop environment. Instead of directly updating individual session hosts, create a new version of the updated image. This process involves creating and configuring a golden image with the necessary updates and configurations.\n", + "recommendationControl": "Security", + "longDescription": "Azure Front Door's geo-filtering through WAF enables defining custom access rules by country/region to restrict or allow web app access.\n", "pgVerified": true, - "description": "Create updated image version and replace session hosts rather than updating host directly", - "potentialBenefits": "Ensures consistency; minimizes drift", - "publishedToLearn": false, + "description": "Use geo-filtering in Azure Front Door", + "potentialBenefits": "Enhanced regional access control", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c15b2b73-52a1-4db2-88dd-d592424ff4e4", + "aprlGuid": "1cfe7834-56ec-ff41-b11d-993734705dba", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/fslogix/reference-configuration-settings?tabs=profiles", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/frontdoor/private-link", + "name": "Private link for Azure Front Door" } ], - "recommendationControl": "Governance", - "longDescription": "Ensure all session hosts have the standard FSLogix configuration deployed. Regularly validate settings for consistency and alignment with best practices.\n", + "recommendationControl": "Security", + "longDescription": "Azure Private Link enables secure access to Azure PaaS and services over a private endpoint in your virtual network, ensuring traffic goes over the Microsoft backbone network, not the public internet.\n", "pgVerified": true, - "description": "Ensure the standard FSLogix configuration is deployed", - "potentialBenefits": "Optimized session reliability and performance", - "publishedToLearn": false, + "description": "Secure your Origin with Private Link in Azure Front Door", + "potentialBenefits": "Enhanced security and private connectivity", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Cdn/profiles", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7b170ddd-5770-4945-9bc3-cd1ccf5f8672", + "aprlGuid": "82fa3cff-74bd-4063-b726-834f160592fa", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/fslogix/how-to-configure-storage-permissions", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/frontdoor/understanding-pricing", + "name": "Compare pricing between Azure Front Door tiers" } ], - "recommendationControl": "Security", - "longDescription": "Verify user permissions are correctly set on SMB shares so that users have appropriate access to only their own profile and not other user profiles, while administrators have full access at the root volume. Also ensure secondary storage path permissions are set in case of a DR event.\n", - "pgVerified": true, - "description": "Ensure user permissions are set correctly on FSLogix SMB shares", - "potentialBenefits": "Enhanced security & disaster recovery", - "publishedToLearn": false, + "recommendationControl": "Service Upgrade and Retirement", + "longDescription": "Azure Front Door standard is ~45% cheaper then AFD classic and has many additional benefits. Classic is also scheduled to be retired on March 31, 2027.\n", + "pgVerified": false, + "description": "Avoid using Classic Azure Front Door", + "potentialBenefits": "Costs savings and additional supported features", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Cdn/profiles", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n\r\n" + "query": "//under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "483f5a00-84a0-49f7-903b-ef6f1fc0c389", + "aprlGuid": "20057905-262c-49fe-a9be-49f423afb359", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/fslogix/troubleshooting-events-logs-diagnostics", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/well-architected/services/messaging/service-bus/reliability", + "name": "Service Bus and reliability" }, { - "url": "https://learn.microsoft.com/azure/storage/files/storage-files-monitoring", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/service-bus-messaging/service-bus-geo-dr#availability-zones", + "name": "Azure Service Bus Geo-disaster recovery" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/service-bus-messaging/service-bus-outages-disasters", + "name": "Insulate Azure Service Bus applications against outages and disasters" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Configure diagnostic settings on FSLogix storage resources and regularly its metrics and FSLogix logs for errors. Events can be reviewed by looking locally inside the Session Host, but it is recommended to configure AVD insights workbook to consolidate this information to a Log Analytics workspace.\n", - "pgVerified": true, - "description": "Configure Diagnostic Settings on FSLogix storage and capture session hosts FSLogix events", - "potentialBenefits": "Enhanced AVD error tracking and resolution", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Availability zones are now enabled by default on new namespaces where possible. Existing namespaces are being migrated to availability zones where possible. The property zoneRedundant might still show as false, even when availability zones has been enabled.\n", + "pgVerified": false, + "description": "Enable Availability Zones for Service Bus namespaces", + "potentialBenefits": "Enhances fault tolerance and uptime", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.ServiceBus/namespaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d51e0a70-8b50-4be3-af8a-7c9065e47360", + "aprlGuid": "d810e3a8-600f-4be1-895b-1a93e61d37fd", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/fslogix/how-to-install-fslogix", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/service-bus-messaging/automate-update-messaging-units", + "name": "Service Bus auto-scaling" } ], - "recommendationControl": "Governance", - "longDescription": "Ensure a process is in place to regularly check for FSLogix agent upgrades and maintain FSLogix up to date. We recommend customers upgrade to the latest version of FSLogix as quickly as their deployment process can allow. FSLogix will provide hotfix releases which address current and potential bugs that impact customer deployments. Additionally, it is the first requirement when opening any support case.\n", - "pgVerified": true, - "description": "Manually install FSLogix updates", - "potentialBenefits": "Enhanced reliability & support", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "Use Service Bus with auto-scale for high availability. The Premium SKU supports auto-scale, ensuring that the resources are automatically scaled based on the load.\n", + "pgVerified": false, + "description": "Enable auto-scale for production workloads on Service Bus namespaces", + "potentialBenefits": "Ensures high availability and performance", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Low", + "recommendationResourceType": "Microsoft.ServiceBus/namespaces", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9b2301af-9cac-4f1a-871a-f17475d01812", + "aprlGuid": "8dbcd94b-0948-4df3-b608-1946726c3abf", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/app-attach-overview?pivots=msix-app-attach", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/container-apps/health-probes?tabs=arm-template", + "name": "Health probes for Azure Container Apps" } ], "recommendationControl": "High Availability", - "longDescription": "Turn on Continuous Availability if using Azure Netapp Files.\nVerify the number of users connecting to each file share to make sure the SMB path can handle the number of file connections. Currently, Azure Files supports up to 10k handles per root directory.\n", - "pgVerified": true, - "description": "Turn on continuous availability for ANF when using it for app attach", - "potentialBenefits": "Enhanced stability & user limit checks", - "publishedToLearn": false, + "longDescription": "Enable container health probes to monitor the health of your container apps and ensure that unhealthy containers are restarted automatically.\n", + "pgVerified": false, + "description": "Enable container health probes", + "potentialBenefits": "Enhanced availability and resilience", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.App/containerApps", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7d9c96a6-1ce5-4cf0-ad1b-638a37f753cb", + "aprlGuid": "f4201965-a88d-449d-b3b4-021394719eb2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/virtual-desktop/app-attach-overview?pivots=msix-app-attach", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/reliability/reliability-azure-container-apps", + "name": "Reliability in Azure Container Apps" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "App Attach packages should be on a separate share from profiles. And App Attach files should be backed up. Requirements can vary greatly depending on how many packaged applications are stored in an image, and you need to test your applications to understand your requirements.\nYour file share should be in the same Azure region as your session hosts.\n", - "pgVerified": true, - "description": "Use dedicated file share for App attach and include the storage in the disaster recovery plan", - "potentialBenefits": "Enhances performance and scalability", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "To take advantage of availability zones, you must enable zone redundancy when you create a Container Apps environment. The environment must include a virtual network with an available subnet. To ensure proper distribution of replicas, set your app's minimum replica count to three.\n", + "pgVerified": false, + "description": "Deploy zone redundant Container app environments", + "potentialBenefits": "Enhances app resiliency and reliability", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.App/managedenvironments", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// The query filters the qualified Container app environments that do not have Zone Redundancy enabled.\nresources\n| where type =~ \"microsoft.app/managedenvironments\"\n| where tobool(properties.zoneRedundant) == false\n| project recommendationId = \"f4201965-a88d-449d-b3b4-021394719eb2\", name, id, tags, param1 = \"AvailabilityZones: Single Zone\"\n| order by id asc\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1f57434f-f884-41f3-b818-129bbe3c5d3b", + "aprlGuid": "f8f834a9-c761-4e84-b2cb-ac55494d0c37", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/key-vault/general/disaster-recovery-guidance", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/high-availability-sla-local-zone-redundancy?view=azuresql-mi#zone-redundant-availability", + "name": "High availability through zone-redundancy" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "To ensure continuous availability and disaster recovery readiness, it is recommended to provision a secondary Key Vault in a secondary region. In the event of a primary region failure, this secondary Key Vault will ensure that critical secrets are accessible for use in deployments in the secondary region.\n", - "pgVerified": true, - "description": "Ensure resilient deployment of key vaults for AVD Host Pools", - "potentialBenefits": "Ensures DR readiness and access", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "By default, Azure SQL Database premium tier provisions multiple copies within the same region. For geo redundancy, databases can be set as Zone Redundant, distributing copies across Azure Availability Zones to maintain availability during regional outages.\n", + "pgVerified": false, + "description": "Enable zone redundancy for Azure SQL Managed Instance to achieve high availability and resiliency", + "potentialBenefits": "Enhanced availability and reliability", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Sql/managedInstances", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "d61f6ee8-de1b-4fd9-9ce3-316cfe11ee05", + "aprlGuid": "15e2712c-f3ea-4a8d-9081-11e822b1ccfb", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/example-scenario/azure-virtual-desktop/azure-virtual-desktop-multi-region-bcdr", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/automated-backups-overview?view=azuresql-mi&preserve-view=true#backup-storage-redundancy", + "name": "Backup storage redundancy" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "When using an AD DS identity solution with AVD, it is recommended to deploy two or more domain controllers on Azure virtual machines across availability zones. This improves the environment's reliability by removing a dependency on an on-premises service and improves performance by creating a shorter path for user authentication.\nThis recommendation doesn't apply when using Microsoft Entra ID or Entra Domain Services joined session hosts.\n", - "pgVerified": true, - "description": "Deploy multiple domain controllers across availability zones in each region with AVD session hosts.", - "potentialBenefits": "Enhanced identity resilience", - "publishedToLearn": false, + "recommendationControl": "High Availability", + "longDescription": "This copies your backups synchronously across three Azure availability zones in the primary region, if Geo is selected it creates 3 more copies in a secondary region.\n", + "pgVerified": false, + "description": "Use Zone-redundant or Geo-zone-redundant Backup storage redundancy", + "potentialBenefits": "Enhanced availability and reliability", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", + "recommendationResourceType": "Microsoft.Sql/managedInstances", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "99bf5c94-aa68-4bb3-8b7f-45d1c5f09b5d", + "aprlGuid": "c14de326-2729-4be7-a91f-4ea185d24b10", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/architecture/example-scenario/identity/adds-extend-domain#reliability", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/connection-types-overview?view=azuresql#connection-types", + "name": "Connection types" } ], - "recommendationControl": "High Availability", - "longDescription": "When using custom DNS servers, deploy DNS servers on Azure virtual machines across availability zones in the same region as the session hosts. This improves the environment's reliability by removing a dependency on an on-premises service and improves performance by creating a shorter path for name resolution.\n", - "pgVerified": true, - "description": "Deploy two or more DNS servers across availability zones in each region with AVD session hosts.", - "potentialBenefits": "Enhanced reliability and performance", - "publishedToLearn": false, + "recommendationControl": "Scalability", + "longDescription": "Redirect mode enables direct connectivity to the instance resulting in improved latency and throughput. Redirect mode applies to the VNet-local endpoint only, while the public endpoint will always default to Proxy connection mode.\n", + "pgVerified": false, + "description": "Use Redirect connection type to accelerate application access", + "potentialBenefits": "Improved latency and throughput", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVD", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Sql/managedInstances", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "726abfe3-adae-4a6d-8eb8-4b27a7214ca1", + "aprlGuid": "257cd903-700f-4a79-bd37-7dce2b511df4", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/connect-multiple-private-clouds-same-region", - "name": "Connect Private Clouds in the same region" + "url": "https://learn.microsoft.com/azure/azure-sql/managed-instance/failover-group-sql-mi?view=azuresql", + "name": "Failover groups overview and best practices" } ], - "recommendationControl": "High Availability", - "longDescription": "Use the Interconnect feature for direct communication between private clouds in different availability zones, enabling connectivity between the private clouds management and workload networks.\n", - "pgVerified": true, - "description": "Use the AVS Interconnect feature to connect private clouds in different availability zones", - "potentialBenefits": "Enhanced private cloud connectivity", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "If an outage impacts one or more of the databases in the managed instance, you can manually or automatically failover all the databases inside the instance to a secondary region.\n", + "pgVerified": false, + "description": "Configure a secondary instance and a Failover group to enable failover to another region", + "potentialBenefits": "Ensure seamless service with cross-region failover", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", + "recommendationResourceType": "Microsoft.Sql/managedInstances", "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c2794660-ffd7-4da3-96ba-5d546b70b1c6", + "aprlGuid": "c9afeb1e-e706-4809-be4e-75d9fac708f2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-identity-source-vcenter", - "name": "Set an external identity source for vCenter" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-external-identity-source-nsx-t", - "name": "Set an external identity for NSX-T" + "url": "https://techcommunity.microsoft.com/t5/azure-sql/monitoring-options-available-for-azure-sql-managed-instance/ba-p/1065416", + "name": "Azure SQL Managed Instance monitoring options" } ], - "recommendationControl": "Security", - "longDescription": "Ensure two external identity sources are configured for NSX and vCenter Server. The VMware vCenter Server and NSX Manager use these for authentication with external identities.\n", - "pgVerified": true, - "description": "Integrate LDAPS Identity with dual sources for enhanced NSX and vCenter security", - "potentialBenefits": "Continuous login access during maintenances", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Monitoring and alerting are an important part of database operations. When working with Azure SQL Database, make use of Azure Monitor and SQL Insights to ensure that you capture relevant database metrics.\n", + "pgVerified": false, + "description": "Monitor your Azure SQL MI Managed Instance in near-real time to detect reliability incidents", + "potentialBenefits": "Quick incident detection and response", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "bce16eee-0933-4baa-ab4d-8d1bb5653fc2", + "aprlGuid": "9fad5392-b852-4807-9b6d-3f700ff9771a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-hcx-network-extension-high-availability", - "name": "HCX Network extension high availability" - }, - { - "url": "https://docs.vmware.com/en/VMware-HCX/4.8/hcx-user-guide/GUID-E1353511-697A-44B0-82A0-852DB55F97D7.html", - "name": "Understanding Network Extension High Availability" + "url": "https://learn.microsoft.com/azure/azure-sql/database/always-encrypted-landing?view=azuresql", + "name": "Overview of Always Encrypted" } ], - "recommendationControl": "High Availability", - "longDescription": "Enable Network Extension High Availability for appliance failure tolerance in HCX service. It pairs selected appliances for Active Standby configuration, ensuring high availability and quick recovery, keeping configurations in-service despite failures.\n", - "pgVerified": true, - "description": "Use HCX Network Extension High Availability", - "potentialBenefits": "Improves HCX service continuity", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "It is highly recommended to use Azure Key Vault (AKV) to store encryption keys related to Always Encrypted configurations, however it is not required. If you are not using AKV, then ensure that your keys are properly backed up and stored in a secure manner.\n", + "pgVerified": false, + "description": "Back Up Your Keys", + "potentialBenefits": "Enhanced security and data recovery", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6be9a543-cf82-4926-82ea-7e1f1ffaad80", - "recommendationTypeId": null, + "aprlGuid": "74c2491d-048b-0041-a140-935960220e20", + "recommendationTypeId": "2ea11bcb-dfd0-48dc-96f0-beba578b989a", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://docs.vmware.com/en/VMware-HCX/4.8/hcx-user-guide/GUID-0C746416-850E-46F7-85DD-4D4326A23785.html", - "name": "Requirements for Network Extension" + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/active-geo-replication-overview", + "name": "Active Geo Replication" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Do not extend the network used by the HCX Management devices to ensure the network's security and stability.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Active Geo Replication ensures business continuity by utilizing readable secondary database replicas. In case of primary database failure, manually failover to secondary database. Secondaries, up to four, can be in same/different regions, used for read-only access.\n", "pgVerified": true, - "description": "Verify Management Networks are not extended with HCX Network Extension", - "potentialBenefits": "Enhanced network safety and performance", - "publishedToLearn": false, + "description": "Use Active Geo Replication to Create a Readable Secondary in Another Region", + "potentialBenefits": "Enhanced disaster recovery and read scalability", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", + "recommendationResourceType": "Microsoft.Sql/servers", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of SQL databases that are not part of Geo Replication.\nresources\n| where type == \"microsoft.sql/servers/databases\"\n| summarize secondaryTypeCount = countif(isnotempty(properties.secondaryType)) by name\n| where secondaryTypeCount == 0\n| join kind=inner (\n Resources\n | where type == \"microsoft.sql/servers/databases\"\n) on name\n| extend param1 = \"Not part of Geo Replication\"\n| project recommendationId = \"74c2491d-048b-0041-a140-935960220e20\", name, id, tags, param1\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0943aa90-e3db-4c61-aef1-782b6a6a3881", + "aprlGuid": "943c168a-2ec2-a94c-8015-85732a1b4859", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/azure-vmware/application-platform#use-fault-domains", - "name": "Use fault domains" + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/auto-failover-group-overview?tabs=azure-powershell", + "name": "AutoFailover Groups" }, { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/configure-storage-policy", - "name": "Configure storage policy" + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/designing-cloud-solutions-for-disaster-recovery", + "name": "DR Design" } ], - "recommendationControl": "High Availability", - "longDescription": "The Azure VMware Solution's service SLA is influenced by vSAN storage policies, which change based on cluster size. For clusters over 6 hosts, an FTT-2 policy (RAID-1 or RAID-6) is advised. FTT refers to the Fault Tolerance feature.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Failover Groups facilitate disaster recovery by configuring databases on one logical server to replicate to another region's logical server. This streamlines geo-replicated database management, offering a single endpoint for connection routing to replicated databases if the primary server fails.\n", "pgVerified": true, - "description": "Verify vSAN FTT configuration aligns with the cluster size", - "potentialBenefits": "Enhanced cluster reliability", - "publishedToLearn": false, + "description": "Auto Failover Groups can encompass one or multiple databases, usually used by the same app.", + "potentialBenefits": "Improves load balancing and disaster recovery", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", + "recommendationResourceType": "Microsoft.Sql/servers", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of SQL databases that are not configured to use a failover-group.\nresources\n| where type =~'microsoft.sql/servers/databases'\n| where isnull(properties['failoverGroupId'])\n| project recommendationId = \"943c168a-2ec2-a94c-8015-85732a1b4859\", name, id, tags, param1= strcat(\"databaseId=\", properties['databaseId'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6f573d60-be93-4f18-8016-42e923e3c05e", - "recommendationTypeId": null, + "aprlGuid": "c0085c32-84c0-c247-bfa9-e70977cbf108", + "recommendationTypeId": "807e58d0-e385-41ad-987b-4a4b3e3fb563", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://azure.github.io/Azure-Proactive-Resiliency-Library/services/networking/expressroute-circuits", - "name": "APRL guidance for ExpressRoute circuits" - }, - { - "url": "https://learn.microsoft.com/azure/expressroute/expressroute-howto-circuit-portal-resource-manager?pivots=expressroute-preview#create-a-new-expressroute-circuit-preview", - "name": "Create a new ExpressRoute circuit" + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/high-availability-sla", + "name": "Zone Redundant Databases" } ], "recommendationControl": "High Availability", - "longDescription": "Microsoft suggests using two or more ExpressRoute circuits at distinct peering locations for critical workloads. Connect these circuits and your Azure VMware Solutions private clouds using Global Reach.\n", + "longDescription": "By default, Azure SQL Database premium tier provisions multiple copies within the same region. For geo redundancy, databases can be set as Zone Redundant, distributing copies across Azure Availability Zones to maintain availability during regional outages.\n", "pgVerified": true, - "description": "Align ExpressRoute configuration with best practices for circuit resilience", - "potentialBenefits": "Enhanced circuit resilience for Azure VMware", - "publishedToLearn": false, + "description": "Enable zone redundancy for Azure SQL Database to achieve high availability and resiliency", + "potentialBenefits": "Enhanced reliability, no extra cost", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", - "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationImpact": "Medium", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Finds non-zone redundant SQL databases and lists them\nResources\n| where type =~ 'microsoft.sql/servers/databases'\n| where tolower(tostring(properties.zoneRedundant))=~'false'\n|project recommendationId = \"c0085c32-84c0-c247-bfa9-e70977cbf108\", name, id, tags\n\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "91c84596-1c41-48fe-8d5e-3f817e6a273b", + "aprlGuid": "cbb17a29-64fb-c943-95d0-8df814a37c40", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/deploy-vsan-stretched-clusters#deploy-a-stretched-cluster-private-cloud", - "name": "Deploy vSAN streched cluster" + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/troubleshoot-common-connectivity-issues", + "name": "How to Implement Retry Logic" } ], "recommendationControl": "High Availability", - "longDescription": "Azure VMware Solution vSAN stretched clusters cover 2 Availability Zones plus a third for witness. Use ExpressRoute for added resilience by deploying two circuits in different locations. With Global Reach, create a mesh topology by connecting on-premises circuits to Azure's managed circuits.\n", + "longDescription": "During transient failures, the application should handle connection retries effectively with Azure SQL Database. No Database layer configuration is needed; instead, the application must be set up for graceful retrying.\n", "pgVerified": true, - "description": "Deploy two or more circuits in different peering locations when using stretched clusters", - "potentialBenefits": "Enhanced resilience and connectivity", - "publishedToLearn": false, + "description": "Implement Retry Logic", + "potentialBenefits": "Enhanced connectivity stability", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", + "recommendationResourceType": "Microsoft.Sql/servers", "recommendationImpact": "High", "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "bdac462a-2eda-4a67-887d-46d58f141afe", + "aprlGuid": "7e7daec9-6a81-3546-a4cc-9aef72fec1f7", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/azure-vmware/move-azure-vmware-solution-across-regions", - "name": "Private Clouds in two regions" + "url": "https://learn.microsoft.com/en-us/azure/azure-monitor/insights/azure-sql#analyze-data-and-create-alerts", + "name": "Azure Monitor" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/monitoring-sql-database-azure-monitor", + "name": "Azure SQL Database Monitoring" }, { - "url": "https://learn.microsoft.com/en-us/azure/cloud-adoption-framework/scenarios/azure-vmware/eslz-dual-region-network-topology", - "name": "Dual Region Network Topology" + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/monitoring-sql-database-azure-monitor-reference", + "name": "Monitoring SQL Database Reference" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Two Azure VMware Solution private clouds can be deployed in different regions for business continuity, implementing a mesh network topology based on ExpressRoute Gateway Connections and Global Reach Connections.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Monitoring and alerting are an important part of database operations. When working with Azure SQL Database, make use of Azure Monitor and SQL Insights to ensure that you capture relevant database metrics.\n", "pgVerified": true, - "description": "Deploy dual Azure VMware Solution clouds in different regions for disaster recovery", - "potentialBenefits": "Enhanced disaster recovery", - "publishedToLearn": false, + "description": "Monitor your Azure SQL Database in Near Real-Time to Detect Reliability Incidents", + "potentialBenefits": "Quick incident detection and response", "tags": null, - "recommendationResourceType": "Specialized.Workload/AVS", + "recommendationResourceType": "Microsoft.Sql/servers", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// cannot-be-validated-with-arg\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of SQL databases that are not configured for monitoring.\nresources\n| where type == \"microsoft.insights/metricalerts\"\n| mv-expand properties.scopes\n| mv-expand properties.criteria.allOf\n| project databaseid = properties_scopes, monitoredMetric = properties_criteria_allOf.metricName\n| where databaseid contains 'databases'\n| summarize monitoredMetrics=make_list(monitoredMetric) by databaseid=tolower(tostring(databaseid))\n| join kind=fullouter (\n resources\n | where type =~ 'microsoft.sql/servers/databases'\n | project databaseid = tolower(id), name, tags\n) on databaseid\n| where isnull(monitoredMetrics)\n| project recommendationId = \"7e7daec9-6a81-3546-a4cc-9aef72fec1f7\", name, id=databaseid1, tags, param1=strcat(\"MonitoringMetrics=false\" )\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4c78fab4-845a-495d-ab14-3ad51de53a2a", + "aprlGuid": "d6ef87aa-574e-584e-a955-3e6bb8b5425b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/powershell/high-performance-computing/hpcpack-ha-cloud?view=hpc19-ps#hpc-pack-cluster-shares", - "name": "Learn More" + "url": "https://learn.microsoft.com/en-us/azure/key-vault/general/overview", + "name": "Azure Key Vault" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/azure-sql/database/always-encrypted-landing?view=azuresql", + "name": "Getting Started with Always Encrypted" } ], - "recommendationControl": "High Availability", - "longDescription": "Currently in all HPC Pack ARM templates we create the cluster share on one of the head node which is not highly available.\n", - "pgVerified": false, - "description": "Ensure File shares that stores jobs metadata are accessible from all head nodes", - "potentialBenefits": "Enhances job metadata availability", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "It is highly recommended to use Azure Key Vault (AKV) to store encryption keys related to Always Encrypted configurations, however it is not required. If you are not using AKV, then ensure that your keys are properly backed up and stored in a secure manner.\n", + "pgVerified": true, + "description": "Back Up Your Keys", + "potentialBenefits": "Enhanced security and data recovery", "tags": null, - "recommendationResourceType": "Specialized.Workload/HPC", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b02b5a0e-3770-44da-a099-5dd4d9f8cd70", + "aprlGuid": "de266d8a-a9f3-4cb9-be95-9306001fceea", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/powershell/high-performance-computing/hpcpack-auto-grow-shrink?view=hpc19-ps", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/azure-sql/database/failover-group-sql-db?view=azuresql#endpoint-redirection", + "name": "Failover Group endpoint redirection" } ], - "recommendationControl": "Scalability", - "longDescription": "By deploying Azure \"burst\" nodes (both Windows and Linux) in your HPC Pack cluster or creating your HPC Pack cluster in Azure, you can automatically grow or shrink the cluster's resources such as nodes or cores according to the workload on the cluster.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "When using Failover Groups, it is recommended to connect to the Failover Group endpoint instead of individual database endpoints. This allows for automatic redirection to the secondary database in case of a failover, ensuring high availability.\n", "pgVerified": false, - "description": "Automatically grow and shrink HPC Pack cluster resources", - "potentialBenefits": "Efficient, uninterrupted execution", - "publishedToLearn": false, + "description": "Use Failover Group endpoints for database connections", + "potentialBenefits": "Enhanced disaster recovery", "tags": null, - "recommendationResourceType": "Specialized.Workload/HPC", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Sql/servers", + "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a48b1be6-77a3-4e3c-8205-dda2ba010a99", - "recommendationTypeId": null, + "aprlGuid": "43663217-a1d3-844b-80ea-571a2ce37c6c", + "recommendationTypeId": "b57f7a29-dcc8-43de-86fa-18d3f9d3764d", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/powershell/high-performance-computing/hpcpack-ha-cloud?view=hpc19-ps#dealing-with-head-node-failure", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/cosmos-db/distribute-data-globally", + "name": "Distribute data globally with Azure Cosmos DB" + }, + { + "url": "https://learn.microsoft.com/azure/cosmos-db/high-availability#tips-for-building-highly-available-applications", + "name": "Tips for building highly available applications" } ], "recommendationControl": "High Availability", - "longDescription": "Establish a cluster with a minimum of two head nodes. In the event of a head node failure, the active HPC Service will be automatically transferred from the affected head node to another functioning one.\n", - "pgVerified": false, - "description": "Use multiple head nodes for HPC Pack", - "potentialBenefits": "Enhanced reliability for HPC", - "publishedToLearn": false, + "longDescription": "Enable a secondary region in Cosmos DB for higher SLA without downtime. Simple as pinning a location on a map. For Strong consistency, configure at least three regions for write availability in case of failure.\n", + "pgVerified": true, + "description": "Configure at least two regions for high availability", + "potentialBenefits": "Enhances SLA and resilience", "tags": null, - "recommendationResourceType": "Specialized.Workload/HPC", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Query to find Azure Cosmos DB accounts that have less than 2 regions or less than 3 regions with strong consistency level\nResources\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\n| where\n array_length(properties.locations) < 2 or\n (array_length(properties.locations) < 3 and properties.consistencyPolicy.defaultConsistencyLevel == 'Strong')\n| project recommendationId='43663217-a1d3-844b-80ea-571a2ce37c6c', name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "37eec891-7880-4759-b597-7cd925512fe3", - "recommendationTypeId": null, + "aprlGuid": "9cabded7-a1fc-6e4a-944b-d7dd98ea31a2", + "recommendationTypeId": "5de9f2e6-087e-40da-863a-34b7943beed4", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/powershell/high-performance-computing/hpcpack-ha-cloud?view=hpc19-ps#dealing-with-ad-failure", - "name": "Learn More" + "url": "https://learn.microsoft.com/azure/cosmos-db/how-to-manage-database-account#automatic-failover", + "name": "Manage an Azure Cosmos DB account by using the Azure portal" } ], - "recommendationControl": "High Availability", - "longDescription": "When HPC failed to connect to the Domain controller, admin and user will not be able to connect to the HPC Service thus not able to manage and submit jobs to the cluster.\n", - "pgVerified": false, - "description": "Use HPC Pack Azure AD Integration or other highly available AD configuration", - "potentialBenefits": "Enhanced reliability and job management", - "publishedToLearn": false, + "recommendationControl": "Disaster Recovery", + "longDescription": "Cosmos DB boasts high uptime and resiliency. Even so, issues may arise. With Service-Managed failover, if a region is down, Cosmos DB automatically switches to the next available region, requiring no user action.\n", + "pgVerified": true, + "description": "Enable service-managed failover for multi-region accounts with single write region", + "potentialBenefits": "Auto failover for high uptime", "tags": null, - "recommendationResourceType": "Specialized.Workload/HPC", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Query to list all Azure Cosmos DB accounts that do not have multiple write locations or automatic failover enabled\nResources\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\n| where\n array_length(properties.locations) > 1 and\n tobool(properties.enableAutomaticFailover) == false and\n tobool(properties.enableMultipleWriteLocations) == false\n| project recommendationId='9cabded7-a1fc-6e4a-944b-d7dd98ea31a2', name, id, tags\n" }, { "publishedToAdvisor": null, - "aprlGuid": "a9b649a5-2bfe-40ca-9b8f-34f9c71dfa12", + "aprlGuid": "921631f6-ed59-49a5-94c1-f0f3ececa580", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Quality Insights" - }, - { - "url": "https://aka.ms/ACESInventoryCheckSAP", - "name": "OpenSource Inventory Checks" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/Move-VM-from-AvSet-to-AvZone/Move-Regional-SAP-HA-To-Zonal-SAP-HA-WhitePaper", - "name": "Move Regional SAP HA to Zonal" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/sap-high-availability-architecture-scenarios#high-availability-deployment-options-for-sap-workload", - "name": "High Availability Deployment Options for SAP" + "url": "https://learn.microsoft.com/en-us/azure/reliability/reliability-cosmos-db-nosql", + "name": "High availability in Azure Cosmos DB" } ], "recommendationControl": "High Availability", - "longDescription": "Azure Availability Zones are physically separate locations within each Azure region that are tolerant to local failures. Use availability zones to protect your applications and data against unlikely data center failures. Ensure each single point of failure of each SAP production system is protected with high availability using multiple availability zones. If you cannot deploy across different zones in a region, then refer to Microsoft guidance for High availability deployment options for SAP workload.\n", - "pgVerified": true, - "description": "Ensure that each SAP production system is designed for high availability across availability zones", - "potentialBenefits": "High availability for SAP systems", - "publishedToLearn": false, + "longDescription": "When availability zones are configured, Azure Cosmos DB intelligently distributes the 4 replicas of your data across all available zones. It ensures that your Azure Cosmos DB can withstand an outage in one availability zone and remain fully operational throughout.\n", + "pgVerified": false, + "description": "Enable availability zones", + "potentialBenefits": "Enhances high availability", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Query to find Azure Cosmos DB accounts that do not utilize availability zones and are deployed in availability-zone supported regions\nResources\n| where type == \"microsoft.documentdb/databaseaccounts\"\n| where properties.capabilities !has_cs 'EnableServerless'\n| project recommendationId='921631f6-ed59-49a5-94c1-f0f3ececa580', name, id, tags, locations=properties.locations\n| mv-expand locations\n| where not(locations.isZoneRedundant) //filter out already AZ enabled regions\n| extend location=tostring(locations.locationName)\n| project-away locations\n| where location in (\n 'Australia East', 'Brazil South', 'Canada Central', 'Central India', 'Central US',\n 'China North 3', 'East Asia', 'East US', 'East US 2', 'France Central',\n 'Germany West Central', 'Israel Central', 'Italy North', 'Japan East', 'Japan West',\n 'Korea Central', 'Mexico Central', 'New Zealand North', 'North Europe', 'Norway East',\n 'Poland Central', 'Qatar Central', 'South Africa North', 'South Central US', 'Southeast Asia',\n 'Spain Central', 'Sweden Central', 'Switzerland North', 'UAE North', 'UK South',\n 'US Gov Virginia', 'West Europe', 'West US 2', 'West US 3') // filter out regions unsupported for AZs\n| project-rename param1=location\n" }, { "publishedToAdvisor": null, - "aprlGuid": "49bd34ab-d117-4b0e-99f8-34cc8a5394bc", + "aprlGuid": "9ce78192-74a0-104c-b5bb-9a443f941649", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://aka.ms/ACESInventoryCheckSAP", - "name": "OpenSource Inventory Checks" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/virtual-machine-scale-set-sap-deployment-guide", - "name": "Virtual machine Scale Set SAP Deployment Guide" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/virtual-machine-scale-set-sap-deployment-guide?tabs=scaleset-cli#important-consideration-of-flexible-virtual-machine-scale-sets-for-sap-workload", - "name": "Considerations for Flexible VM Scale Sets for SAP" + "url": "https://learn.microsoft.com/azure/cosmos-db/distribute-data-globally", + "name": "Distribute data globally with Azure Cosmos DB" }, { - "url": "https://techcommunity.microsoft.com/t5/running-sap-applications-on-the/how-to-easily-migrate-an-existing-sap-system-vms-to-flexible/ba-p/3833548", - "name": "Migrate existing SAP system VMs to VMSS Flex" + "url": "https://learn.microsoft.com/azure/cosmos-db/conflict-resolution-policies", + "name": "Conflict resolution types and resolution policies in Azure Cosmos DB" } ], "recommendationControl": "High Availability", - "longDescription": "Use Virtual Machines Scale Set (VMSS) with flexible orchestration to distribute the virtual machines across specified zones and within each zone to also distribute VMs across different fault domains within the zone on a best effort basis. Configure VMSS Flex following Microsoft recommendation for SAP workload using the right mode and correct settings. If you aren't currently using VMSS Flex for SAP application servers and also not using Availability Sets with Fault domain and Update domain distribution, then you should consider moving to VMSS Flex architecture to improve the resiliency posture of your SAP deployment. The following blog post in links below outlines the details on the process of migrating existing SAP workloads that are deployed in an availability set or availability zone to a flexible scale set with FD=1 deployment option.\n", + "longDescription": "Multi-region write capability allows for designing applications that are highly available across multiple regions, though it demands careful attention to consistency requirements and conflict resolution. Improper setup may decrease availability and cause data corruption due to unhandled conflicts.\n", "pgVerified": true, - "description": "Run SAP application servers on two or more VMs using VMSS Flex", - "potentialBenefits": "Enhanced resiliency for SAP on Azure", - "publishedToLearn": false, + "description": "Evaluate multi-region write capability", + "potentialBenefits": "Enhances high availability", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Query to find Azure Cosmos DB accounts that have multiple read locations but do not have multiple write locations enabled\nResources\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\n| where\n array_length(properties.locations) > 1 and\n properties.enableMultipleWriteLocations == false\n| project recommendationId='9ce78192-74a0-104c-b5bb-9a443f941649', name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "b60ae773-9917-4bca-8a42-7cb45365a917", - "recommendationTypeId": "58d6648d-32e8-4346-827c-4f288dd8ca24", + "aprlGuid": "e544520b-8505-7841-9e77-1f1974ee86ec", + "recommendationTypeId": "52fef986-5897-4359-8b92-0f22749f0d73", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" - }, - { - "url": "https://aka.ms/ACESInventoryCheckSAP", - "name": "OpenSource Inventory Checks" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" - }, - { - "url": "https://www.azure.cn/en-us/support/sla/virtual-machines/", - "name": "VM SLA" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/planning-guide-storage", - "name": "SAP Storage Planning Guide" + "url": "https://learn.microsoft.com/azure/cosmos-db/continuous-backup-restore-introduction", + "name": "Continuous backup with point in time restore feature in Azure Cosmos DB" } ], - "recommendationControl": "High Availability", - "longDescription": "For single-instance VMs, both OS and data disks must be either Premium SSD or Ultra Disk to achieve the single-instance SLA of 99.9% availability.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Cosmos DB's backup is always on, offering protection against data mishaps. Continuous mode allows for self-serve restoration to a pre-mishap point, unlike periodic mode which requires contacting Microsoft support, leading to longer restore times.\n", "pgVerified": true, - "description": "If using single-instance VMs all OS and data disks must be Premium SSD or Ultra Disk", - "potentialBenefits": "Higher SLA of 99.9% with SSDs", - "publishedToLearn": false, + "description": "Configure continuous backup mode", + "potentialBenefits": "Faster self-serve data restore", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", "recommendationImpact": "High", "automationAvailable": true, - "query": "// Azure Resource Graph Query\r\n// Find all single instance VMs that have an attached disk that is not in the Premium or Ultra sku tier.\r\n\r\nresources\r\n| where type =~ 'Microsoft.Compute/virtualMachines'\r\n| where isnull(properties.virtualMachineScaleSet.id)\r\n| where isnotnull(properties.availabilitySet)\r\n| extend lname = tolower(name)\r\n| join kind=leftouter(resources\r\n | where type =~ 'Microsoft.Compute/disks'\r\n | where not(sku.tier =~ 'Premium') and not(sku.tier =~ 'Ultra')\r\n | extend lname = tolower(tostring(split(managedBy, '/')[8]))\r\n | project lname, name\r\n | summarize disks = make_list(name) by lname) on lname\r\n| where isnotnull(disks)\r\n| project recommendationId = \"b60ae773-9917-4bca-8a42-7cb45365a917\", name, id, tags, param1=strcat(\"AffectedDisks: \", disks)\r\n" + "query": "// Azure Resource Graph Query\n// Query all Azure Cosmos DB accounts that do not have continuous backup mode configured\nResources\n| where type =~ 'Microsoft.DocumentDb/databaseAccounts'\n| where\n properties.backupPolicy.type == 'Periodic' and\n properties.enableMultipleWriteLocations == false and\n properties.enableAnalyticalStorage == false\n| project recommendationId='e544520b-8505-7841-9e77-1f1974ee86ec', name, id, tags\n" }, { "publishedToAdvisor": null, - "aprlGuid": "094400a5-f112-408d-a334-afd68873ff0f", + "aprlGuid": "c006604a-0d29-684c-99f0-9729cb40dac5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" + "url": "https://learn.microsoft.com/azure/cosmos-db/nosql/query/pagination#handling-multiple-pages-of-results", + "name": "Pagination in Azure Cosmos DB" } ], - "recommendationControl": "High Availability", - "longDescription": "High availability for databases should be implemented using database native replication technologies and the data should be replicated synchronously that is in SYNC mode from primary database to a stand-by node.\n", + "recommendationControl": "Scalability", + "longDescription": "Cosmos DB has a 4 MB response limit, leading to paginated results for large or partition-spanning queries. Each page shows availability and provides a continuation token for the next. A while loop in code is necessary to traverse all pages until completion.\n", "pgVerified": true, - "description": "Ensure synchronous data replication (SYNC mode) between primary and secondary VM nodes", - "potentialBenefits": "Ensures high availability for SAP data", - "publishedToLearn": false, + "description": "Ensure query results are fully drained", + "potentialBenefits": "Maximizes data retrieval efficiency", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e09ca960-20b7-4831-b85b-83ec84c1390e", + "aprlGuid": "7eb32cf9-9a42-1540-acf8-597cbba8a418", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://aka.ms/ACESInventoryCheckSAP", - "name": "OpenSource Inventory Checks" + "url": "https://learn.microsoft.com/azure/cosmos-db/nosql/conceptual-resilient-sdk-applications", + "name": "Designing resilient applications with Azure Cosmos DB SDKs" } ], - "recommendationControl": "High Availability", - "longDescription": "SAP shared file systems such as /sapmnt, /usr/trans, interfaces should be made highly available.\nIn case of Azure File Shares, we recommend that you use ZRS (Zone-redundant storage) and for Azure NetApp Files use Zonal replication for your volumes.\n", + "recommendationControl": "Scalability", + "longDescription": "Using a single instance of the SDK client for each account and application is crucial as connections are tied to the client. Compute environments have a limit on open connections, affecting connectivity when exceeded.\n", "pgVerified": true, - "description": "Design SAP shared file systems for high availability, utilizing availability zones when possible", - "potentialBenefits": "Enhanced data availability for SAP", - "publishedToLearn": false, + "description": "Maintain singleton pattern in your client", + "potentialBenefits": "Optimizes connections and efficiency", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5663a808-56be-49ea-8d5c-c5dfc6925f76", + "aprlGuid": "fa6ac22f-0584-bb4b-80e4-80f4755d1a97", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/sap-hana-high-availability?tabs=lb-portal#test-the-cluster-setup", - "name": "Test Cases" + "url": "https://learn.microsoft.com/azure/cosmos-db/nosql/conceptual-resilient-sdk-applications", + "name": "Designing resilient applications with Azure Cosmos DB SDKs" } ], "recommendationControl": "High Availability", - "longDescription": "Test all high availability solutions thoroughly (including kernel panic in Linux VMs and also fail-back). Include zonal failure scenarios in your testing, the testing should confirm that each layer of your SAP solution including database, central services, application servers and shared file systems is configured correctly for zone redundancy, the solution meets RPO = 0 and the application fails over automatically meeting your RTO.\nThe fail back can be either automatic or manual.\n", + "longDescription": "Cosmos DB SDKs automatically manage many transient errors through retries. Despite this, it's crucial for applications to implement additional retry policies targeting specific cases that the SDKs can't generically address, ensuring more robust error handling.\n", "pgVerified": true, - "description": "Test high availability solutions thoroughly to ensure fail overs work as expected", - "potentialBenefits": "Ensures SAP Azure's failover reliability", - "publishedToLearn": false, + "description": "Implement retry logic in your client", + "potentialBenefits": "Enhances error handling resilience", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "1b8a3051-dfd4-4780-bfb7-446296774029", + "aprlGuid": "deaea200-013c-414b-ac9f-bfa7a7fb13f0", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://aka.ms/ACESInventoryCheckSAP", - "name": "OpenSource Inventory Checks" + "url": "https://learn.microsoft.com/azure/cosmos-db/create-alerts", + "name": "Create alerts for Azure Cosmos DB using Azure Monitor" } ], - "recommendationControl": "High Availability", - "longDescription": "When executing a migrate command in a Linux Pacemaker cluster, the system generates a temporary \"prefer\" location constraint, aiming to move a resource to a specified node. This constraint prioritizes the target node for the resource temporarily without permanently altering the cluster's configuration.\nDuring planned maintenances and fail over testing, you can leverage the migrate command for temporary resource relocation during maintenance or administrative tasks to ensure minimal disruption. This constraint is not permanent and does not survive reboots or cluster resets. It's designed for short-term adjustments.\nOnce the planned task necessitating the resource migration is complete, manually remove the temporary constraint to revert to the cluster's original resource management policies.\nThis approach allows for controlled resource movement within the cluster, facilitating maintenance while preserving the integrity and efficiency of the cluster's configuration.\n", + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Monitoring the availability and responsiveness of Azure Cosmos DB resources and having alerts set up for your workload is a good practice. This ensures you stay proactive in handling unforeseen events.\n", "pgVerified": true, - "description": "Remove unwanted location constraints from Linux Pacemaker clusters", - "potentialBenefits": "Enhanced maintenance and failover handling", - "publishedToLearn": false, + "description": "Monitor Cosmos DB health and set up alerts", + "potentialBenefits": "Proactive issue management", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.DocumentDB/databaseAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n" }, { "publishedToAdvisor": null, - "aprlGuid": "820b4c0c-8a74-442a-8ba7-b0cb840cd983", + "aprlGuid": "af426a99-62a6-6b4c-9662-42d220b413b8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/capacity-reservation-overview", - "name": "Capacity Reservation" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-netapp-files-service-levels", + "name": "Service levels for Azure NetApp Files | Microsoft Learn" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "To ensure the availability of compute resources for critical VM roles in a DR region, consider securing capacity either through a warm standby approach or by utilizing Azure's On-demand Capacity Reservation.\n\nWarm standby involves keeping VMs in the DR region running. On-demand Capacity Reservation, on the other hand, reserves compute capacity without having to run the VMs, allowing you to start them when needed. When DR VMs are not needed, the reserved capacity may safely be used to run other workloads without the risk of losing the capacity to other customers. This strategy guarantees resource availability for your critical workloads in the event of a disaster, balancing cost and readiness.\n", + "recommendationControl": "Scalability", + "longDescription": "Service levels, part of capacity pool attributes, determine the maximum throughput per volume quota in Azure NetApp Files. It combines read and write speed, offering three levels: Standard (16 MiB/s per 1TiB), Premium (64 MiB/s per 1TiB), and Ultra (128 MiB/s per 1TiB) throughput.\n", "pgVerified": true, - "description": "Secure compute resource capacity for critical VM roles in DR region", - "potentialBenefits": "Guarantees DR region availability", - "publishedToLearn": false, + "description": "Use the correct service level and volume quota size for the expected performance level", + "potentialBenefits": "Optimized performance and cost efficiency", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "fb8bdcee-d88f-408d-8572-a76a4aaa733b", + "aprlGuid": "ab984130-c57b-6c4a-8d04-6723b4e1bdb6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/disaster-recovery-sap-guide?tabs=windows", - "name": "SAP Disaster Recovery Guide" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-netapp-files-network-topologies", + "name": "Guidelines for Azure NetApp Files network planning | Microsoft Learn" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Replicate production databases (ASYNC) to the DR location using the database vendor's replication technology.\n", + "recommendationControl": "Scalability", + "longDescription": "Standard network feature in Azure NetApp Files enhances IP limits and VNet capabilities, including network security groups, user-defined routes on subnets, and diverse connectivity options.\n", "pgVerified": true, - "description": "Replicate production databases to DR location (ASYNC) using the vendor's replication technology", - "potentialBenefits": "Enhanced DR resilience", - "publishedToLearn": false, + "description": "Use standard network features for production in Azure NetApp Files", + "potentialBenefits": "Enhanced connectivity and security", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Azure NetApp Files volumes without standard network features.\nresources\n| where type =~ \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\n| where properties.networkFeatures != \"Standard\"\n| project recommendationId = \"ab984130-c57b-6c4a-8d04-6723b4e1bdb6\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "41f0d88e-7866-4444-aac4-ef5fee3e6874", + "aprlGuid": "47d100a5-7f85-5742-967a-67eb5081240a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" - }, + "url": "https://learn.microsoft.com/azure/azure-netapp-files/use-availability-zones", + "name": "Use availability zones for high availability in Azure NetApp Files | Microsoft Learn" + } + ], + "recommendationControl": "High Availability", + "longDescription": "Availability zones are distinct locations within an Azure region to withstand local failures. Deploy your workload in multiple availability zones and use application-based replication or Azure NetApp Files cross-zone replication to achieve high availability. Note that failover is a manual process.\n", + "pgVerified": true, + "description": "Use availability zones for high availability in Azure NetApp Files", + "potentialBenefits": "High Availability across availability zones", + "tags": null, + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Azure NetApp Files volumes without an availability zone defined.\nResources\n| where type =~ \"Microsoft.NetApp/netAppAccounts/capacityPools/volumes\"\n| where array_length(zones) == 0 or isnull(zones)\n| project recommendationId = \"47d100a5-7f85-5742-967a-67eb5081240a\", name, id, tags\n\n" + }, + { + "publishedToAdvisor": null, + "aprlGuid": "8bb690e8-64d5-4838-8703-9ee3dbac688f", + "recommendationTypeId": null, + "recommendationMetadataState": "Active", + "learnMoreLink": [ { - "url": "https://aka.ms/ACESInventoryCheckSAP", - "name": "OpenSource Inventory Checks" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/manage-availability-zone-volume-placement", + "name": "Manage availability zone volume placement for Azure NetApp Files | Microsoft Learn" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "SAP components such as (A)SCS, application servers, WebDispatchers, etc are backed up to DR location using an appropriate backup tool or ASR.\n", + "recommendationControl": "Other Best Practices", + "longDescription": "Azure NetApp Files' availability zone (AZ) volume placement feature lets you deploy volumes in the same AZ with Azure compute and other services to have within AZ latency and share the same AZ failure domain.\n", "pgVerified": true, - "description": "SAP components are backed up to DR location using an appropriate backup tool or ASR", - "potentialBenefits": "Ensures SAP data safety and recovery", - "publishedToLearn": false, + "description": "Deploy ANF volumes in the same availability zone with Azure compute and other services", + "potentialBenefits": "Within AZ latency and tolerate failure of other AZ", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n" }, { "publishedToAdvisor": null, - "aprlGuid": "ee4dc309-00a1-49fe-92fa-1724baf5f103", + "aprlGuid": "72827434-c773-4345-9493-34848ddf5803", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/disaster-recovery-sap-guide?tabs=windows", - "name": "DR Guidance" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/snapshots-introduction", + "name": "How Azure NetApp Files snapshots work | Microsoft Learn" } ], - "recommendationControl": "Disaster Recovery", - "longDescription": "Implementing robust monitoring and alerting for DR in SAP on Azure ensures coverage across its complex, multi-layer architecture. This strategy is crucial for databases, services, applications, and shared systems.\n", + "recommendationControl": "High Availability", + "longDescription": "Azure NetApp Files snapshot technology ensures stability, scalability, and swift data recoverability without affecting performance. It supports automatic snapshot creation via policies for Azure NetApp Files data.\n", "pgVerified": true, - "description": "SAP shared files systems are replicated or backed up to DR location", - "potentialBenefits": "Enhances SAP DR oversight", - "publishedToLearn": false, + "description": "Use snapshots for data protection in Azure NetApp Files", + "potentialBenefits": "Stable, scalable, swift recovery, no perf impact", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", "recommendationImpact": "High", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Azure NetApp Files volumes without a snapshot policy defined.\nresources\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\n| where properties.dataProtection.snapshot.snapshotPolicyId == \"\"\n| project recommendationId = \"72827434-c773-4345-9493-34848ddf5803\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "0fabc52e-cdbb-4acd-8626-c4c637061e2d", + "aprlGuid": "b2fb3e60-97ec-e34d-af29-b16a0d61c2ac", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/disaster-recovery-sap-guide?tabs=windows", - "name": "DR Guidance" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/backup-introduction", + "name": "Understand Azure NetApp Files backup | Microsoft Learn" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "Automate the build of disaster recovery (DR) infrastructure (or pre-deploy DR resources) and streamline SAP service recovery as much as possible.\n", + "longDescription": "Azure NetApp Files offers a fully managed backup solution enhancing long-term recovery, archiving, and compliance.\n", "pgVerified": true, - "description": "Automate DR infrastructure build or pre-deploy DR resources", - "potentialBenefits": "Faster SAP recovery, reduced downtime", - "publishedToLearn": false, + "description": "Enable backup for data protection in Azure NetApp Files", + "potentialBenefits": "Enhances data recovery and compliance", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Azure NetApp Files volumes without a backup policy defined.\nresources\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\n| where properties.dataProtection.backup.backupPolicyId == \"\"\n| project recommendationId = \"b2fb3e60-97ec-e34d-af29-b16a0d61c2ac\", name, id, tags\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c300e949-528d-4ac9-889b-cacf8b4a6e90", + "aprlGuid": "e30317d2-c502-4dfe-a2d3-0a737cc79545", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/disaster-recovery-sap-guide?tabs=windows", - "name": "DR Guidance" + "url": "https://learn.microsoft.com/en-us/azure/azure-netapp-files/cross-region-replication-introduction", + "name": "Cross-region replication of Azure NetApp Files volumes" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "Create detailed documentation of your DR procedures for each layer of the SAP architecture-database, central services, application servers, and shared file systems. This documentation should include configuration details, failover mechanisms, and step-by-step recovery procedures.\n\nTest a wide range of failure scenarios, including regional outages. Testing should confirm that your DR strategy is robust, meets your RPO and RTO targets, and provides seamless failover across all layers of the SAP architecture. This will ensure a comprehensive and resilient DR strategy capable of withstanding regional failures and ensuring business continuity.\n", + "longDescription": "Azure NetApp Files replication offers data protection by allowing asynchronous cross-region volume replication for application failover in case of regional outages. Volumes can be replicated across regions, not concurrently with cross-zone replication. Note that failover is a manual process.\n", "pgVerified": true, - "description": "Document and test DR procedure ensure it meets RPO and RTO targets", - "potentialBenefits": "Ensures robust DR, meets RPO/RTO", - "publishedToLearn": false, + "description": "Enable Cross-region replication of Azure NetApp Files volumes", + "potentialBenefits": "Enhanced data protection and disaster recovery", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Azure NetApp Files volumes without cross-region replication.\nresources\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\n| extend remoteVolumeRegion = properties.dataProtection.replication.remoteVolumeRegion\n| extend volumeType = properties.volumeType\n| extend replicationType = iff((remoteVolumeRegion == location), \"CZR\", iff((remoteVolumeRegion == \"\"),\"n/a\",\"CRR\"))\n| where replicationType != \"CRR\" and volumeType != \"DataProtection\"\n| project recommendationId = \"e30317d2-c502-4dfe-a2d3-0a737cc79545\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "c27134b7-6917-4852-8276-3dbef5c71578", + "aprlGuid": "e3d742e1-dacd-9b48-b6b1-510ec9f87c96", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/disaster-recovery-sap-guide?tabs=windows", - "name": "DR Guidance" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/cross-zone-replication-introduction", + "name": "Cross-zone replication of Azure NetApp Files volumes | Microsoft Learn" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "For an SAP solution hosted on Azure it is imperative to implement a robust monitoring and alerting solution that comprehensively covers DR of each layer of the SAP architecture. Given the complexity of SAP systems, which span multiple layers using diverse technologies and Azure resources, each with potentially distinct DR replication mechanisms, an appropriate monitoring strategy is crucial. The different layers include database, central services, application, and shared file systems.\n", + "longDescription": "The cross-zone replication (CZR) feature enables asynchronous data replication between Azure NetApp Files volumes across different availability zones, ensuring data protection and critical application failover in case of zone-wide disasters. Note that failover is a manual process.\n", "pgVerified": true, - "description": "Ensure there is a robust monitoring and alerting solution in place for the entire DR solution", - "potentialBenefits": "Improved DR oversight and rapid issue response", - "publishedToLearn": false, + "description": "Enable Cross-zone replication of Azure NetApp Files volumes", + "potentialBenefits": "Enhances disaster recovery across availability zones", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// This Resource Graph query will return all Azure NetApp Files volumes without cross-zone replication.\nresources\n| where type == \"microsoft.netapp/netappaccounts/capacitypools/volumes\"\n| extend remoteVolumeRegion = properties.dataProtection.replication.remoteVolumeRegion\n| extend volumeType = properties.volumeType\n| extend replicationType = iff((remoteVolumeRegion == location), \"CZR\", iff((remoteVolumeRegion == \"\"),\"n/a\",\"CRR\"))\n| where replicationType != \"CZR\" and volumeType != \"DataProtection\"\n| project recommendationId = \"e3d742e1-dacd-9b48-b6b1-510ec9f87c96\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6b589ce6-c847-4cee-af35-f6e8eb1cf983", + "aprlGuid": "2f579fc9-e599-0d44-8b97-254f50ae04d8", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events", - "name": "VM Scheduled Events" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/high-availability-guide-suse-pacemaker?tabs=msi#configure-pacemaker-for-azure-scheduled-events", - "name": "Configure Pacemaker for Azure Scheduled Events" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/monitor-azure-netapp-files", + "name": "Ways to monitor Azure NetApp Files | Microsoft Learn" } ], "recommendationControl": "Monitoring and Alerting", - "longDescription": "Scheduled events is an Azure Metadata Services that provides proactive notifications about upcoming maintenance events (for example, reboot) so that your application can prepare for them and limit disruption. You should configure scheduled events for all your critical Azure VMs.\n\nResource agent azure-events-az can also integrate with Pacemaker clusters.\n\nTo ensure high availability and service continuity in your Azure VMs, you should configure the azure-events-az resource agent within your Pacemaker clusters. This agent monitors for scheduled Azure maintenance events and can proactively relocate resources for a graceful node shutdown. Configure the agent to monitor specific event types such as Reboot and Redeploy, and enable verbose logging for detailed diagnostics.\n\nIn addition, it is also important that you define a procedure on how to react to scheduled events.\n", + "longDescription": "Azure NetApp Files offers metrics like allocated storage, actual usage, volume IOPS, and latency, enabling a better understanding of usage patterns and volume performance for NetApp accounts.\n", "pgVerified": true, - "description": "Configure scheduled events notification", - "potentialBenefits": "Proactive maintenance awareness", - "publishedToLearn": false, + "description": "Monitor Azure NetApp Files metrics to better understand usage pattern and performance", + "potentialBenefits": "Optimize usage and performance", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "9d8f6678-694c-4da4-8384-415201f65194", + "aprlGuid": "687ae58f-517f-ca43-90fe-922497e61283", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-policy-definitions", + "name": "Azure Policy definitions for Azure NetApp Files | Microsoft Learn" }, { - "url": "https://docs.microsoft.com/en-us/azure/advisor/advisor-reference-reliability-recommendations", - "name": "ASCS-Pacemaker - Central Server Instance" + "url": "https://learn.microsoft.com/azure/governance/policy/tutorials/create-custom-policy-definition", + "name": "Creating custom policy definitions | Microsoft Learn" } ], - "recommendationControl": "High Availability", - "longDescription": "For the ASCS-Pacemaker (Central Server Instance), ensure that the Pacemaker cluster configuration parameters are correctly set up for SAP ASCS high availability.\n", + "recommendationControl": "Governance", + "longDescription": "Azure NetApp Files supports Azure policy integration using either built-in policy definitions or by creating custom ones to maintain organizational standards and compliance.\n", "pgVerified": true, - "description": "Configure a Pacemaker cluster for SAP ASCS high availability", - "potentialBenefits": "Enhances SAP ASCS uptime", - "publishedToLearn": false, + "description": "Enforce standards and assess compliance in Azure NetApp Files with Azure policy", + "potentialBenefits": "Enforce standards and assess compliance", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5c2e52d0-25be-4b1c-833c-b98b5ef1a26b", + "aprlGuid": "cfa2244b-5436-47de-8287-b217875d3b0a", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/configure-network-features", + "name": "Configure network features for an Azure NetApp Files volume" + }, + { + "url": "https://learn.microsoft.com/azure/azure-netapp-files/manage-smb-share-access-control-lists", + "name": "Manage SMB share ACLs in Azure NetApp Files" + }, + { + "url": "https://learn.microsoft.com/azure/azure-netapp-files/azure-netapp-files-configure-export-policy", + "name": "Configure export policy for NFS or dual-protocol volumes" }, { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/configure-access-control-lists", + "name": "Configure access control lists on NFSv4.1 volumes for Azure NetApp Files" }, { - "url": "https://docs.microsoft.com/en-us/azure/advisor/advisor-reference-reliability-recommendations", - "name": "ASCS-LB - Central Server Instance" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/configure-unix-permissions-change-ownership-mode", + "name": "Configure Unix permissions and change ownership mode for NFS and dual-protocol volumes" } ], - "recommendationControl": "High Availability", - "longDescription": "For the ASCS-LB (Central Server Instance), ensure that the load balancer is configured correctly for SAP ASCS high availability.\n", + "recommendationControl": "Security", + "longDescription": "Access to the delegated subnet should be limited to specific Azure Virtual Networks. SMB-enabled volumes' share permissions should move away from 'Everyone/Full control'. NFS-enabled volumes' access needs to be controlled via export policies and/or NFSv4.1 ACLs.\n", "pgVerified": true, - "description": "Ensure the load balancer is configured correctly for SAP ASCS High availability", - "potentialBenefits": "Enhanced HA for SAP ASCS", - "publishedToLearn": false, + "description": "Restrict default access to Azure NetApp Files volumes", + "potentialBenefits": "Enhanced security, Reduced data breach risk", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "6648fe61-880d-4a96-8d2d-190a23d5580b", - "recommendationTypeId": null, + "aprlGuid": "d1e7ccc3-e6c1-40e9-a36e-fd134711c808", + "recommendationTypeId": "e4bebd74-387a-4a74-b757-475d2d1b4e3e", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" - }, - { - "url": "https://docs.microsoft.com/en-us/azure/advisor/advisor-reference-reliability-recommendations", - "name": "DBHANA-Pacemaker - Database Instance" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/faq-application-resilience#do-i-need-to-take-special-precautions-for-smb-based-applications", + "name": "Do I need to take special precautions for SMB-based applications? | Microsoft Learn" } ], "recommendationControl": "High Availability", - "longDescription": "For the DBHANA-Pacemaker (Database Instance), ensure that the Pacemaker cluster configuration parameters are correctly set up for SAP HANA database high availability.\n", + "longDescription": "Certain SMB applications need SMB Transparent Failover for maintenance without interrupting server connectivity. Azure NetApp Files provides this through SMB Continuous Availability for applications like Citrix App Layering, FSLogix user/profile containers, Microsoft SQL Server, MSIX app attach.\n", "pgVerified": true, - "description": "Ensure the Pacemaker cluster has been setup for SAP HANA DB high availability", - "potentialBenefits": "Enhances SAP HANA DB uptime", - "publishedToLearn": false, + "description": "Make use of SMB continuous availability for supported applications", + "potentialBenefits": "Zero downtime for SMB apps", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", "recommendationImpact": "High", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2e4c2171-a83f-4238-a8e3-b51c90d86a99", + "aprlGuid": "60f36f9b-fac9-4160-bbf5-57af04da4f53", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/center-sap-solutions/get-quality-checks-insights", - "name": "SAP ACSS Insights" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/tree/main/QualityCheck", - "name": "OpenSource Quality Checks" - }, - { - "url": "https://docs.microsoft.com/en-us/azure/advisor/advisor-reference-reliability-recommendations", - "name": "DBHANA-LB- Database Instance" + "url": "https://learn.microsoft.com/azure/azure-netapp-files/faq-application-resilience#what-do-you-recommend-for-handling-potential-application-disruptions-due-to-storage-service-maintenance-events", + "name": "What do you recommend for handling potential application disruptions due to storage service maintenance events? | Microsoft Learn" } ], "recommendationControl": "High Availability", - "longDescription": "For the DBHANA-LB (Database Instance), make sure the load balancer is configured correctly for SAP HANA database high availability.\n", + "longDescription": "Azure NetApp Files might undergo occasional planned maintenance such as platform updates or service and software upgrades. It's important to be aware of the application's resiliency settings to cope with these storage service maintenance events.\n", "pgVerified": true, - "description": "Ensure the load balancer is configured correctly for SAP HANA DB High availability", - "potentialBenefits": "Enhanced DB availability", - "publishedToLearn": false, + "description": "Ensure application resilience for service maintenance events", + "potentialBenefits": "Minimizes downtime during maintenance", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.NetApp/netAppAccounts", + "recommendationImpact": "Medium", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4884cada-b9c7-42d5-8153-3853e4a6f6c4", + "aprlGuid": "54c3191b-b535-1946-bba9-b754f44060f6", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/planning-guide-storage#azure-netapp-files", - "name": "SAP on Azure NetApp Planning Guide" + "url": "https://learn.microsoft.com/en-us/azure/event-grid/enable-diagnostic-logs-topic", + "name": "Azure Event Grid - Enable diagnostic logs for Event Grid resources" } ], - "recommendationControl": "High Availability", - "longDescription": "High availability of SAP while used with Azure NetApp Files relies on setting proper timeout values to prevent disruption to your application. Review the documentation to ensure your configuration meets the timeout values as noted in the documentation.\n", - "pgVerified": true, - "description": "Review SAP configuration for timeout values used with Azure NetApp Files", - "potentialBenefits": "Improve resiliency and performance of SAP on Azure", - "publishedToLearn": false, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Enabling diagnostic settings on Azure Event Grid resources like custom topics, system topics, and domains lets you capture and view diagnostic information to troubleshoot failures effectively.\n", + "pgVerified": false, + "description": "Configure Diagnostic Settings for all Azure Event Grid resources", + "potentialBenefits": "Enhanced troubleshooting for Event Grid", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.EventGrid/topics", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "697deb1d-d398-4989-9734-9e6c18f7e0ad", + "aprlGuid": "92162eb5-4323-3145-8a6c-525ce2f0700e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/high-availability-guide-suse-nfs-simple-mount?tabs=lb-portal%2Censa1", - "name": "High-availability SAP NetWeaver with simple mount and NFS on SLES for SAP Applications VMs" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/high-availability-guide-suse-netapp-files?tabs=lb-portal%2Censa1", - "name": "High availability for SAP NetWeaver on Azure VMs on SUSE Linux Enterprise Server with Azure NetApp Files for SAP applications" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/sap-hana-high-availability-netapp-files-suse?tabs=lb-portal", - "name": "High availability of SAP HANA scale-up with Azure NetApp Files on SUSE Enterprise Linux" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/high-availability-guide-rhel-netapp-files?tabs=lb-portal%2Censa1", - "name": "Azure Virtual Machines HA for SAP NetWeaver on RHEL with Azure NetApp Files for SAP applications" - }, - { - "url": "https://learn.microsoft.com/en-us/azure/sap/workloads/sap-hana-high-availability-netapp-files-red-hat?tabs=lb-portal", - "name": "High availability of SAP HANA scale-up with Azure NetApp Files on RHEL" - }, - { - "url": "https://github.com/Azure/SAP-on-Azure-Scripts-and-Utilities/blob/main/QualityCheck/", - "name": "OpenSource Quality Checks" + "url": "https://learn.microsoft.com/en-us/azure/event-grid/delivery-and-retry#dead-letter-events", + "name": "Azure Event Grid delivery and retry" } ], - "recommendationControl": "Scalability", - "longDescription": "It is strongly recommended to review database storage configuration to ensure the right type and number of disks are used to provision the data and log volumes. This is to ensure that the database VMs meets performance requirements for IOPS and throughput for the given database.\nYou should also use Microsoft recommended settings such as disk caching, WriteAccelerator, stripe size and Linux I/O Scheduler mode for all database VMs.\nSAP on Azure QualityCheck tool can help you identify any deviations from Microsoft recommendations quickly and at scale.\n", - "pgVerified": true, - "description": "Provision recommended storage configuration on database VMs", - "potentialBenefits": "Improve reliability, performance and optimize costs", - "publishedToLearn": false, + "recommendationControl": "Personalized", + "longDescription": "Event Grid may not deliver an event within a specific time or after several attempts, leading to dead-lettering where undelivered events are sent to a storage account.\n", + "pgVerified": false, + "description": "Configure Dead-letter to save events that cannot be delivered", + "potentialBenefits": "Saves undelivered events", "tags": null, - "recommendationResourceType": "Specialized.Workload/SAP", - "recommendationImpact": "High", + "recommendationResourceType": "Microsoft.EventGrid/topics", + "recommendationImpact": "Low", "automationAvailable": false, - "query": "// under-development\r\n\r\n" + "query": "// under-development\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "8c0a0a4c-9e34-41af-9f6d-89d8dc00370e", - "recommendationTypeId": null, + "aprlGuid": "b2069f64-4741-3d4a-a71d-50c8b03f5ab7", + "recommendationTypeId": "bdac9c7b-b9b8-f572-0450-f161c430861c", "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/simplify", - "name": "RE:01 Simplicity and efficiency" + "url": "https://learn.microsoft.com/en-us/azure/event-grid/configure-private-endpoints", + "name": "Configure private endpoints for Azure Event Grid topics or domains" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Design your workload to align with business objectives and avoid unnecessary complexity or overhead. Use a practical and balanced approach to make design decisions that deliver the desired results. Contain your design to the necessities to reduce inefficiencies and potential problems.\n", - "pgVerified": true, - "description": "RE:01 Design your workload to align with business objectives", - "potentialBenefits": "Meet business requirements", - "publishedToLearn": true, + "recommendationControl": "Security", + "longDescription": "Use private endpoints for secure event ingress to custom topics/domains via a private link, avoiding the public internet. It employs an IP from the VNet space for your topic/domain.\n", + "pgVerified": false, + "description": "Azure Event Grid topics should use Private Link Private Endpoints", + "potentialBenefits": "Secure, private VNet ingress", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", + "recommendationResourceType": "Microsoft.EventGrid/topics", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find all eventgrid services not protected by private endpoints.\nResources\n| where type contains \"eventgrid\"\n| where properties['publicNetworkAccess'] == \"Enabled\"\n| project recommendationId = \"b2069f64-4741-3d4a-a71d-50c8b03f5ab7\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "74415e66-7baf-43f3-8def-164bc7b48215", + "aprlGuid": "c041d596-6c97-4c5f-b4b3-9cd37628f2e2", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/identify-flows", - "name": "RE:02 Critical flows" + "url": "https://docs.citrix.com/en-us/citrix-daas-azure/limits", + "name": "Citrix Limits" } ], - "recommendationControl": "High Availability", - "longDescription": "Identify and rate user and system flows. Use a criticality scale based on your business requirements to prioritize the flows.\n", + "recommendationControl": "Governance", + "longDescription": "A Citrix Managed Azure subscription supports VMs with VDA for app/desktop delivery, excluding other machines like Cloud Connectors. When close to the limit, signaled by a dashboard notification, and with sufficient licenses, request another subscription. Can't exceed the given limits for catalogs.\n", "pgVerified": true, - "description": "RE:02 Identify and rate user and system flows", - "potentialBenefits": "Align architecture with reliability goals", - "publishedToLearn": true, + "description": "Do not create more than 2000 Citrix VDA servers per subscription", + "potentialBenefits": "Avoids hitting limit, ensures reliability", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.Subscription/Subscriptions", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Count VM instances with a tag that contains \"Citrix VDA\" and create output if that count is >2000 for each subscription.\n// The Citrix published limit is 2500. This query runs an 80% check.\n\nresources\n| where type == 'microsoft.compute/virtualmachines'\n| where tags contains 'Citrix VDA'\n| summarize VMs=count() by subscriptionId\n| where VMs > 2000\n| join (resourcecontainers| where type =='microsoft.resources/subscriptions' | project subname=name, subscriptionId) on subscriptionId\n| project recommendationId='c041d596-6c97-4c5f-b4b3-9cd37628f2e2', name= subname, id = subscriptionId, param1='Too many instances.', param2= VMs\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "f5fbe3d4-7196-46b8-9b09-0e29e7cf43ac", + "aprlGuid": "5ada5ffa-7149-4e49-9fbf-e67be7c2594c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/failure-mode-analysis", - "name": "RE:03 Failure mode analysis" + "url": "https://learn.microsoft.com/en-us/azure/cloud-adoption-framework/ready/landing-zone/design-area/resource-org-management-groups#management-group-recommendations", + "name": "Management group recommendations" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/governance/management-groups/overview#root-management-group-for-each-directory", + "name": "Root management group for each directory" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Use failure mode analysis (FMA) to identify and prioritize potential failures in your solution components. Perform FMA to help you assess the risk and effect of each failure mode. Determine how the workload responds and recovers.\n", + "recommendationControl": "Governance", + "longDescription": "The root management group in Azure is designed for organizational hierarchy, allowing for all management groups and subscriptions to fold into it.\n", "pgVerified": true, - "description": "RE:03 Use failure mode analysis to identify and prioritize potential failures", - "potentialBenefits": "Reduce risk of unpredicted behavior", - "publishedToLearn": true, + "description": "Subscriptions should not be placed under the Tenant Root Management Group", + "potentialBenefits": "Enhanced security, compliance, and management", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", + "recommendationResourceType": "Microsoft.Subscription/Subscriptions", "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure Subscriptions that are placed under the Tenant Root Management Group\nresourcecontainers\n| where type == 'microsoft.resources/subscriptions'\n| extend mgParentSize = array_length(properties.managementGroupAncestorsChain)\n| where mgParentSize == 1\n| project recommendationId=\"5ada5ffa-7149-4e49-9fbf-e67be7c2594c\", name, id, tags\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "2c41b97c-af27-47b5-aafb-81bbf95fe8ba", + "aprlGuid": "783c6c18-760b-4867-9ced-3010a0bc5aa3", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/metrics", - "name": "RE:04 Target metrics" + "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-bulk-identity-mgmt", + "name": "Import and export IoT Hub device identities in bulk" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-ha-dr#manual-failover", + "name": "IoT Hub high availability and disaster recovery" } ], - "recommendationControl": "High Availability", - "longDescription": "Define reliability and recovery targets for the components, the flows, and the overall solution. Use the defined targets to build the health model. The health model defines what healthy, degraded, and unhealthy states look like.\n", - "pgVerified": true, - "description": "RE:04 Define reliability and recovery targets", - "potentialBenefits": "Communicate reliability expectations with stakeholders", - "publishedToLearn": true, + "recommendationControl": "Disaster Recovery", + "longDescription": "Device Identities should be copied to the failover region IoT Hub for all IoT devices to ensure connectivity in case of a failover. Manual Failover to another region is quicker (RTO), suitable for mission critical workloads.\n", + "pgVerified": false, + "description": "Device Identities are exported to a secondary region", + "potentialBenefits": "Faster failover; Ensures device connectivity", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationImpact": "High", "automationAvailable": false, - "query": null + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "e404ef3f-e427-4e43-a1df-09da987e744f", + "aprlGuid": "eeba3a49-fef0-481f-a471-7ff01139b474", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/redundancy", - "name": "RE:05 Redundancy" + "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-scaling", + "name": "Choose the right IoT Hub tier and size for your solution" } ], "recommendationControl": "High Availability", - "longDescription": "Add redundancy at different levels, especially for critical flows. Apply redundancy to the compute, data, network, and other infrastructure tiers in accordance with the identified reliability targets.\n", - "pgVerified": true, - "description": "RE:05 Design for redundancy", - "potentialBenefits": "Optimize for resiliency", - "publishedToLearn": true, + "longDescription": "In a production scenario, the IoT Hub tier should not be Free because the Free tier does not provide the necessary Service Level Agreement.\n", + "pgVerified": false, + "description": "Do not use free tier", + "potentialBenefits": "Ensures SLA for production", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// list all IoT Hubs that are using the Free tier\nresources\n| where type =~ \"microsoft.devices/iothubs\" and\n tostring(sku.tier) =~ 'Free'\n| project recommendationId=\"eeba3a49-fef0-481f-a471-7ff01139b474\", name, id, tags, param1=strcat(\"tier:\", tostring(sku.tier))\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "df93ae26-260e-408f-860c-42cd189f8bf8", + "aprlGuid": "214cbc46-747e-4354-af6e-6bf0054196a5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/en-us/azure/well-architected/reliability/highly-available-multi-region-design", - "name": "RE:05 High-availability multi-region design" + "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-ha-dr#availability-zones", + "name": "Azure IoT Hub high availability and disaster recovery" } ], "recommendationControl": "High Availability", - "longDescription": "High availability is a foundational tenet of designing for reliability. A highly available architecture can help you avoid downtime as much as possible and recover efficiently if downtime does occur.\n", - "pgVerified": true, - "description": "RE:05 Design for multi-region high availability", - "potentialBenefits": "Minimize downtime from regional outages", - "publishedToLearn": true, + "longDescription": "In regions supporting Availability Zones for IoT Hub, using these zones boosts availability. They're automatically activated for new IoT Hubs in supported areas.\n", + "pgVerified": false, + "description": "Use Availability Zones", + "potentialBenefits": "Boosts IoT Hub availability", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationImpact": "High", "automationAvailable": false, - "query": null + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "3d6adb0a-042f-47f7-a7ea-db2e360903d5", + "aprlGuid": "b1e1378d-4572-4414-bebd-b8872a6d4d1c", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/regions-availability-zones", - "name": "Regions and availability zones" + "url": "https://learn.microsoft.com/en-us/azure/iot-dps/concepts-service", + "name": "IoT Hub Device Provisioning Service (DPS) terminology" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/iot-dps/concepts-deploy-at-scale", + "name": "Best practices for large-scale IoT device deployments" + }, + { + "url": "https://learn.microsoft.com/en-us/azure/iot-dps/iot-dps-ha-dr", + "name": "IoT Hub Device Provisioning Service high availability and disaster recovery" } ], - "recommendationControl": "High Availability", - "longDescription": "High availability is a foundational tenet of designing for reliability. A highly available architecture can help you avoid downtime as much as possible and recover efficiently if downtime does occur.\n", - "pgVerified": true, - "description": "RE:05 Design for high availability with availability zones", - "potentialBenefits": "Minimize downtime from zonal outages", - "publishedToLearn": true, + "recommendationControl": "Scalability", + "longDescription": "Device Provisioning Service (DPS) enables easy redistribution of IoT devices for scaling and availability, allowing devices to be reassigned and not bound to specific IoT Hub instances. Devices in IoT Hubs using DPS should be verified for DPS utilization.\n", + "pgVerified": false, + "description": "Use Device Provisioning Service", + "potentialBenefits": "Enhances scalability and availability", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// list all IoT Hubs that do not have a linked IoT Hub Device Provisioning Service (DPS)\nresources\n| where type =~ \"microsoft.devices/iothubs\"\n| project id, iotHubName=tostring(properties.hostName), tags, resourceGroup\n| join kind=fullouter (\n resources\n | where type == \"microsoft.devices/provisioningservices\"\n | mv-expand iotHubs=properties.iotHubs\n | project iotHubName = tostring(iotHubs.name), dpsName = name, name=iotHubs.name\n) on iotHubName\n| where dpsName == ''\n| project recommendationId=\"b1e1378d-4572-4414-bebd-b8872a6d4d1c\", name=iotHubName, id, tags, param1='DPS:none'\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7f0b9ea3-0159-4ea7-b854-a4313fe76d7f", + "aprlGuid": "02568a5d-335e-4e51-9f7c-fe2ada977300", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/partition-data", - "name": "RE:06 Data partitioning" + "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-ha-dr", + "name": "IoT Hub high availability and disaster recovery" } ], "recommendationControl": "High Availability", - "longDescription": "Partitioning data improves scalability, reduces contention, and optimizes performance. Implement data partitioning to divide data by usage pattern.\n", - "pgVerified": true, - "description": "RE:06 Design for data partitioning", - "potentialBenefits": "Improve data estate reliability", - "publishedToLearn": true, + "longDescription": "In case of a regional failure, an IoT Hub can failover to a second region, automatically or manually, to ensure your application continues working.\n", + "pgVerified": false, + "description": "Define Failover Guidelines", + "potentialBenefits": "Ensures business continuity", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", + "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationImpact": "High", "automationAvailable": false, - "query": null + "query": "// cannot-be-validated-with-arg\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "340fe5c3-d599-448a-8e52-15e96771a3f0", + "aprlGuid": "e7dbd21f-b27a-4b8c-a901-cedb1e6d8e1e", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/scaling", - "name": "RE:06 Scaling" + "url": "https://learn.microsoft.com/en-us/azure/iot-hub/iot-hub-devguide-messages-d2c#fallback-route", + "name": "Use message routing - Fallback route" } ], - "recommendationControl": "Scalability", - "longDescription": "Implement a timely and reliable scaling strategy at the application, data, and infrastructure levels.\n", - "pgVerified": true, - "description": "RE:06 Design for reliable scaling", - "potentialBenefits": "Dynamically handle increased load", - "publishedToLearn": true, + "recommendationControl": "Monitoring and Alerting", + "longDescription": "Using message routing for custom endpoints in IoT Hub, messages might not reach these destinations if specific conditions are unmet. A default route ensures all messages are received, but disabling this safety net risks leaving some messages undelivered.\n", + "pgVerified": false, + "description": "Disabled Fallback Route", + "potentialBenefits": "Prevents undelivered messages", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.Devices/IotHubs", + "recommendationImpact": "Low", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// list all IoT Hubs that have the fallback route disabled\nresources\n| where type == \"microsoft.devices/iothubs\"\n| extend fallbackEnabled=properties.routing.fallbackRoute.isEnabled\n| where fallbackEnabled == false\n| project recommendationId=\"e7dbd21f-b27a-4b8c-a901-cedb1e6d8e1e\", name, id, tags, param1='FallbackRouteEnabled:false'\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "4e1094dd-2d85-4a1a-8ca8-1e6ea21206fb", + "aprlGuid": "98bd7098-49d6-491b-86f1-b143d6b1a0ff", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/background-jobs", - "name": "RE:07 Background jobs" + "url": "https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/overview#resource-group-location-alignment", + "name": "Azure Resource Manager Overview" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Background jobs help minimize the load on the application UI, which improves availability and reduces interactive response time.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Ensure resource locations align with their resource group to manage resources during regional outages. ARM stores resource data, which if in an unavailable region, could halt updates, rendering resources read-only.\n", "pgVerified": true, - "description": "RE:07 Use background jobs", - "potentialBenefits": "Minimize application load", - "publishedToLearn": true, + "description": "Ensure Resource Group and its Resources are located in the same Region", + "potentialBenefits": "Improves outage management", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.Resources/resourceGroups", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Provides a list of Azure Resource Groups that have resources deployed in a region different than the Resource Group region\nresources\n| project id, name, tags, resourceGroup, location\n| where location != \"global\" // exclude global resources\n| where resourceGroup != \"networkwatcherrg\" // exclude networkwatcherrg\n| where split(id, \"/\", 3)[0] =~ \"resourceGroups\" // resource is in a resource group\n| extend resourceGroupId = strcat_array(array_slice(split(id, \"/\"),0,4), \"/\") // create resource group resource id\n| join (resourcecontainers | project containerid=id, containerlocation=location ) on $left.resourceGroupId == $right.['containerid'] // join to resourcecontainers table\n| where location != containerlocation\n| project recommendationId=\"98bd7098-49d6-491b-86f1-b143d6b1a0ff\", name, id, tags\n| order by id asc\n\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7b5008cf-1853-44c4-827d-bca091678c3f", + "aprlGuid": "88856605-53d8-4bbd-a75b-4a7b14939d32", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/self-preservation", - "name": "RE:07 Self-preservation" + "url": "https://learn.microsoft.com/azure/mysql/flexible-server/concepts-high-availability", + "name": "High availability concepts in Azure Database for MySQL - Flexible Server" } ], "recommendationControl": "High Availability", - "longDescription": "Strengthen the resiliency and recoverability of your workload by implementing self-preservation and self-healing measures. Self-healing capabilities help you avoid downtime by building in failure detection and automatic corrective actions to respond to different failure types.\n", + "longDescription": "Enable HA with zone redundancy on flexible server instances to deploy a standby replica in a different zone, offering automatic failover capability for improved reliability and disaster recovery.\n", "pgVerified": true, - "description": "RE:07 Implement self-preservation and self-healing measures", - "potentialBenefits": "Reduce the likelihood of outages", - "publishedToLearn": true, + "description": "Enable HA with zone redundancy", + "potentialBenefits": "Enhanced uptime and data protection", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Database for MySQL instances that are not zone redundant\nresources\n| where type == \"microsoft.dbformysql/flexibleservers\"\n| where properties.highAvailability.mode != \"ZoneRedundant\"\n| project recommendationId = \"88856605-53d8-4bbd-a75b-4a7b14939d32\", name, id, tags, param1 = \"ZoneRedundant: False\"\n" }, { "publishedToAdvisor": null, - "aprlGuid": "66ae4a5c-7f58-4293-bed8-5caa4f9f34e2", + "aprlGuid": "82a9a0f2-24ee-496f-9ad2-25f81710942d", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/handle-transient-faults", - "name": "RE:07 Transient faults" + "url": "https://learn.microsoft.com/azure/mysql/flexible-server/concepts-maintenance", + "name": "Scheduled maintenance in Azure Database for MySQL - Flexible Server" } ], - "recommendationControl": "High Availability", - "longDescription": "Build capabilities into the solution by using infrastructure-based reliability patterns and software-based design patterns to handle component failures and transient errors.\n", + "recommendationControl": "Scalability", + "longDescription": "Use custom maintenance schedule on flexible server instances to select a preferred time for service updates to be applied.\n", "pgVerified": true, - "description": "RE:07 Handle transient faults", - "potentialBenefits": "Reduce the likelihood of outages", - "publishedToLearn": true, + "description": "Enable custom maintenance schedule", + "potentialBenefits": "Control update timings", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Database for MySQL instances that do not have a custom maintenance window\nresources\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\n| where properties.maintenanceWindow.customWindow != \"Enabled\"\n| project recommendationId = \"82a9a0f2-24ee-496f-9ad2-25f81710942d\", name, id, tags, param1 = strcat(\"customWindow:\", properties['maintenanceWindow']['customWindow'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "7db74a6a-4062-46a8-a0cd-18684fb0ec08", + "aprlGuid": "5c96afc3-7d2e-46ff-a4c7-9c32850c441b", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/testing-strategy", - "name": "RE:08 Testing" + "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-backup-restore", + "name": "Backup and restore in Azure Database for MySQL - Flexible Server" } ], - "recommendationControl": "Other Best Practices", - "longDescription": "Test resiliency and availability scenarios by applying the principles of chaos engineering in your test and production environments. Use testing to ensure that your graceful degradation implementation and scaling strategies are effective by performing active malfunction and simulated load testing.\n", + "recommendationControl": "Disaster Recovery", + "longDescription": "Configure GRS to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", "pgVerified": true, - "description": "RE:08 Design a reliability testing strategy", - "potentialBenefits": "Validate and optimize workload reliability", - "publishedToLearn": true, + "description": "Configure geo redundant backup storage", + "potentialBenefits": "Recover from regional failure and/or disaster", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Database for MySQL instances that do not have geo redundant backup storage enabled\nresources\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\n| where properties.backup.geoRedundantBackup != \"Enabled\"\n| project recommendationId = \"5c96afc3-7d2e-46ff-a4c7-9c32850c441b\", name, id, tags, param1 = strcat(\"geoRedundantBackup:\", properties['backup']['geoRedundantBackup'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "5f95df03-cae2-4761-90b7-7afd657ac124", + "aprlGuid": "b49a8653-cc43-48c9-8513-a2d2e3f14dd1", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/disaster-recovery", - "name": "RE:09 Disaster recovery" + "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-read-replicas", + "name": "Read replicas in Azure Database for MySQL - Flexible Server" } ], "recommendationControl": "Disaster Recovery", - "longDescription": "Implement structured, tested, and documented business continuity and disaster recovery (BCDR) plans that align with the recovery targets. Plans must cover all components and the system as a whole.\n", + "longDescription": "Configure one or more read replicas to ensure that your database meets its availability and durability targets even in the face of failures or disasters.\n", "pgVerified": true, - "description": "RE:09 Implement business continuity and disaster recovery plan", - "potentialBenefits": "Reliable disaster recovery", - "publishedToLearn": true, + "description": "Configure one or more read replicas", + "potentialBenefits": "Recover from regional failure and/or disaster", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Database for MySQL instances that do not have a read replica configured\nresources\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\n| where properties.replicationRole == \"None\"\n| project recommendationId = \"b49a8653-cc43-48c9-8513-a2d2e3f14dd1\", name, id, tags, param1 = strcat(\"replicationRole:\", properties['replicationRole'])\n" }, { "publishedToAdvisor": null, - "aprlGuid": "90adebf7-bc90-4939-9aa8-119c46bee0fc", + "aprlGuid": "8176a79d-8645-4e52-96be-a10fc0204fe5", "recommendationTypeId": null, "recommendationMetadataState": "Active", "learnMoreLink": [ { - "url": "https://learn.microsoft.com/azure/well-architected/reliability/monitoring-alerting-strategy", - "name": "RE:10 Monitoring and alerting" + "url": "https://learn.microsoft.com/en-us/azure/mysql/flexible-server/concepts-service-tiers-storage#storage-auto-grow", + "name": "Azure Database for MySQL - Flexible Server service tiers - Storage auto grow" } ], - "recommendationControl": "Monitoring and Alerting", - "longDescription": "Measure and publish the solution's health indicators. Continuously capture uptime and other reliability data from across the workload and also from individual components and key flows.\n", + "recommendationControl": "Scalability", + "longDescription": "Configure storage auto-grow to prevent the server from running out of storage and becoming read-only.\n", "pgVerified": true, - "description": "RE:10 Design a reliable monitoring and alerting strategy", - "potentialBenefits": "Observability into workload health", - "publishedToLearn": true, + "description": "Configure storage auto-grow", + "potentialBenefits": "Scale storage automatically to meet increasing demand", "tags": null, - "recommendationResourceType": "WellArchitected/Reliability", - "recommendationImpact": "Medium", - "automationAvailable": false, - "query": null + "recommendationResourceType": "Microsoft.DBforMySQL/flexibleServers", + "recommendationImpact": "High", + "automationAvailable": true, + "query": "// Azure Resource Graph Query\n// Find Database for MySQL instances that do not have a storage auto-grow\nresources\n| where type =~ \"microsoft.dbformysql/flexibleservers\"\n| where properties.storage.autoGrow != \"Enabled\"\n| project recommendationId = \"8176a79d-8645-4e52-96be-a10fc0204fe5\", name, id, tags, param1 = strcat(\"autoGrow:\", properties['storage']['autoGrow'])\n" } ]