diff --git a/bicep/modules/blade_service.bicep b/bicep/modules/blade_service.bicep index f61af74e..b3089fdc 100644 --- a/bicep/modules/blade_service.bicep +++ b/bicep/modules/blade_service.bicep @@ -81,13 +81,6 @@ param enableAdminUI bool = true @description('Specify the OSDU version.') param osduVersion string = 'master' -@allowed([ - 'Intel' - 'ARM' -]) -@description('Specify the server type.') -param serverType string = 'ARM' - @minLength(9) @maxLength(18) @description('The address range to use for services') @@ -147,8 +140,9 @@ var serviceLayerConfig = { // D4pds v5 with 4 vCPUs and 16 GiB of memory. Available in 22 regions starting from $88.18 per month. // D2s_v5 with 2 vCPUs and 8 GiB of memory. Available in 50 regions starting from $70.08 per month. // D4s_v5 with 4 vCPUs and 16 GiB of memory. Available in 50 regions starting from $140.16 per month. - vmSize: serverType == 'Intel' ? 'Standard_D4s_v5' : 'Standard_D4pds_v5' // Choose between Intel (D4s_v5 - 4 vCPUs/16GB) or ARM (D4pds_v5) - poolSize: serverType == 'Intel' ? 'Standard_D2s_v5' : 'Standard_D2pds_v5' // Choose between Intel (D2s_v5 - 2 vCPUs/8GB) or ARM (D2pds_v5) + vmSize: 'Standard_D4pds_v5' + poolSize: 'Standard_D2pds_v5' + defaultSize: 'Standard_D4s_v5' // OSDU Java Services don't run on ARM? } gitops: { name: 'flux-system' @@ -392,7 +386,7 @@ module cluster './managed-cluster/main.bicep' = { { name: 'default' mode: 'User' - vmSize: empty(vmSize) ? serviceLayerConfig.cluster.vmSize : vmSize + vmSize: empty(vmSize) ? serviceLayerConfig.cluster.defaultSize : vmSize enableAutoScaling: true minCount: 4 maxCount: 20 diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 5b190a13..c2f57abc 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -79,6 +79,8 @@ markdown_extensions: - pymdownx.snippets - pymdownx.tabbed: alternate_style: true + - pymdownx.tasklist: + custom_checkbox: true - pymdownx.superfences: custom_fences: - name: mermaid @@ -141,6 +143,7 @@ nav: - experimental_adminui.md - Design: - design_architecture.md + - design_platform.md - design_infrastructure.md - design_software.md - Tutorials: diff --git a/docs/src/design_platform.md b/docs/src/design_platform.md new file mode 100644 index 00000000..54e0e50d --- /dev/null +++ b/docs/src/design_platform.md @@ -0,0 +1,192 @@ +# Platform + +The OSDU™ private instance solution implements industry-leading best practices for security and operational excellence on Azure Kubernetes Service (AKS). These practices are aligned with Microsoft's Secure Future Initiative and are designed to provide a robust, secure, and efficient platform while maintaining developer productivity. + +??? Tip "Learning Opportunity" + For more details on Microsoft's security focus, refer to the + [Microsoft Secure Future Initiative](https://www.microsoft.com/security/business/secure-future-initiative). + +This solution implements comprehensive best practices across security controls and operational excellence. The implemented controls and features help ensure: + +- Strong security posture through infrastructure and application security controls +- Operational efficiency through automation and DevOps practices +- Reliable performance through proper scaling and maintenance procedures +- Simplified maintenance through automated updates and proper backup strategies + +??? Tip "Learning Opportunity" + For more details on Microsoft's Cluster Best Practices, refer to the + [AKS Best Practices](https://learn.microsoft.com/en-us/azure/aks/best-practices). + +## Security Controls + +### Infrastructure Security + +
+ +- :material-shield-check:{ .lg .middle } __Cluster Protection__ + + --- + + - [x] [Microsoft Defender for Containers](https://learn.microsoft.com/en-us/azure/defender-for-cloud/defender-for-containers-introduction) + + Comprehensive security monitoring and protection for containerized assets including clusters, nodes, workloads, registries and images. + + - [x] [Kubernetes RBAC and Microsoft Entra ID](https://learn.microsoft.com/en-us/azure/aks/concepts-identity) + + Granular access control by granting users, groups, and service accounts only the minimum required permissions through role-based policies and enhanced Azure authentication. + + - [x] [Node Resource Group Lockdown](https://learn.microsoft.com/en-us/azure/aks/node-resource-group-lockdown) + + Prevent unauthorized changes to node resource group resources using NRGLockdownPreview feature. + +
+ +
+ +- :material-linux:{ .lg .middle } __Node Security__ + + --- + + - [x] [Azure Linux](https://learn.microsoft.com/en-us/azure/aks/use-azure-linux) + + Azure Linux Container Host is optimized for container workloads on AKS, based on Microsoft's CBL-Mariner Linux distribution. + + - [x] [Disable SSH Access](https://learn.microsoft.com/en-us/azure/aks/disable-ssh-access) + + Improve security by disabling SSH access to nodes at both cluster and node pool levels using DisableSSHPreview feature. + +
+ +
+ +- :material-network:{ .lg .middle } __Network Security__ + + --- + + - [x] [CNI Overlay](https://learn.microsoft.com/en-us/azure/aks/azure-cni-overlay) + + Enhanced network security with overlay networking, providing logical separation between pod and node networks. + + - [x] [NAT Gateway](https://learn.microsoft.com/en-us/azure/aks/nat-gateway) + + Managed outbound internet connectivity with network isolation capabilities. + + - [x] [Service Mesh](https://learn.microsoft.com/en-us/azure/aks/istio-deploy-addon) + + Istio service mesh for secure service-to-service communication, traffic management, and observability. + +
+ +
+ +- :material-database:{ .lg .middle } __Storage Security__ + + --- + + - [x] [Managed Disks](https://learn.microsoft.com/en-us/azure/aks/azure-disk-customer-managed-keys) + + Secure block-level storage volumes with encryption and access controls. + +
+ +### Application Security + +
+ +- :material-docker:{ .lg .middle } __Container Security__ + + --- + + - [x] [Image Cleaner](https://learn.microsoft.com/en-us/azure/aks/image-cleaner) + + Automatic identification and removal of unused images to reduce vulnerability surface. + +
+ +
+ +- :material-shield-lock:{ .lg .middle } __Pod Security__ + + --- + + - [x] [Pod Security Context](https://learn.microsoft.com/en-us/azure/aks/developer-best-practices-pod-security) + + Limit access to processes and services through security context settings, implementing principle of least privilege. + + - [x] [Workload Identity](https://learn.microsoft.com/en-us/azure/aks/workload-identity-overview) + + Enable pods to authenticate against Azure services using Microsoft Entra workload identities. + + - [x] [Secrets Management](https://learn.microsoft.com/en-us/azure/aks/csi-secrets-store-driver) + + Integrate Azure Key Vault with Secrets Store CSI Driver for secure runtime secrets management. + + - [x] [Policy Controls](https://learn.microsoft.com/en-us/azure/aks/policy-reference) + + Enforce Kubernetes best practices through Azure Policy deployment safeguards. + +
+ +## Operational Excellence + +### Automation & DevOps + +
+ +- :material-cog:{ .lg .middle } __Deployment & Operations__ + + --- + + - [x] [GitOps](https://learn.microsoft.com/en-us/azure/azure-arc/kubernetes/tutorial-use-gitops-flux2) + + Git-based infrastructure and application deployment management. + + - [x] [Verified Modules](https://learn.microsoft.com/en-us/azure/verified-modules/overview) + + Pre-validated infrastructure modules for consistent and secure deployments. + + - [x] [App Configuration](https://learn.microsoft.com/en-us/azure/azure-app-configuration/overview) + + Managed service for feature flags and configuration management. + +
+ +### Scalability & Performance + +
+ +- :material-speedometer:{ .lg .middle } __Performance & Scaling__ + + --- + + - [x] [Node Auto Provisioning](https://learn.microsoft.com/en-us/azure/aks/cluster-node-auto-provisioning) + + Automatic node provisioning for optimal cluster sizing and cost efficiency. + + - [x] [KEDA](https://learn.microsoft.com/en-us/azure/aks/keda-about) + + Event-driven autoscaling for Kubernetes workloads. + + - [x] [Vertical Pod Autoscaler](https://learn.microsoft.com/en-us/azure/aks/vertical-pod-autoscaler) + + Automated resource allocation optimization for pods based on usage patterns. + +
+ +### Maintenance & Updates + +
+ +- :material-update:{ .lg .middle } __System Updates__ + + --- + + - [x] [Automatic Upgrades](https://learn.microsoft.com/en-us/azure/aks/auto-upgrade-cluster?tabs=azure-cli) + + Stay current on new features and bug fixes with automated Kubernetes version upgrades. + + - [x] [Node OS Updates](https://learn.microsoft.com/en-us/azure/aks/node-updates-kured) + + Linux nodes in AKS get security patches through their distro update channel nightly. + +
diff --git a/docs/src/getting_started.md b/docs/src/getting_started.md index 8e303447..73439520 100644 --- a/docs/src/getting_started.md +++ b/docs/src/getting_started.md @@ -4,19 +4,21 @@ Prerequisites and configuration steps for deploying personal OSDU™ instances i ## Subscription Quota -It is recommended to have at least 50 vCPUs in a region along with the ability to deploy Cosmos DB instances which can be resource constrained in some regions. Defaults can be increased by requesting a [quota increase](https://learn.microsoft.com/en-us/azure/quotas/regional-quota-requests). +It is recommended to have at least 50 vCPUs in a region for vCPU families along with the ability to deploy Cosmos DB instances which can be resource constrained in some regions. Defaults can be increased by requesting a [quota increase](https://learn.microsoft.com/en-us/azure/quotas/regional-quota-requests). !!! note "Ensure Sufficient Quota" - The choice between BS and DS family vCPUs depends on your specific deployment requirements: + The deployment requires quota for the following VM families: - - Increase DS family vCPU quota if necessary. + - Standard_D4pds_v5 nodes for system workloads + - Standard_D2pds_v5 nodes for zonal workloads + - Standard_D4s_v5 nodes for default workloads | Quota Name | Minimum Quantity | |------------|------------------| | Total Regional vCPUs | 100 | | Standard DPDSv5 Family vCPUs | 50 | -| Standard DS Family vCPUs | 50 | +| Standard DSv5 Family vCPUs | 50 | !!! tip "Available Cosmos DB Regions"