diff --git a/.infracost/pricing.gob b/.infracost/pricing.gob index 3bde55f..4d6d936 100644 Binary files a/.infracost/pricing.gob and b/.infracost/pricing.gob differ diff --git a/02_kubernetes_pods/deploy-app.yaml b/02_kubernetes_pods/deploy-app.yaml index 66a0fd0..7a5cd18 100644 Binary files a/02_kubernetes_pods/deploy-app.yaml and b/02_kubernetes_pods/deploy-app.yaml differ diff --git a/60_coredns/commands.sh b/60_coredns/commands.sh index 69f0743..14b4ace 100644 --- a/60_coredns/commands.sh +++ b/60_coredns/commands.sh @@ -1,126 +1,39 @@ # Custom domain names using Kubernetes CoreDNS kubectl get pods -n kube-system -l=k8s-app=kube-dns -# NAME READY STATUS RESTARTS AGE -# coredns-77f75ff65d-sx9mf 1/1 Running 0 85m -# coredns-77f75ff65d-z7f52 1/1 Running 0 89m kubectl get configmap -n kube-system -l=k8s-app=kube-dns -# NAME DATA AGE -# coredns 1 88m -# coredns-autoscaler 1 83m -# coredns-custom 0 88m kubectl describe configmap coredns -n kube-system -# Name: coredns -# Namespace: kube-system -# Labels: addonmanager.kubernetes.io/mode=Reconcile -# k8s-app=kube-dns -# kubernetes.io/cluster-service=true -# Annotations: - -# Data -# ==== -# Corefile: -# ---- -# .:53 { -# errors -# ready -# health -# kubernetes cluster.local in-addr.arpa ip6.arpa { -# pods insecure -# fallthrough in-addr.arpa ip6.arpa -# } -# prometheus :9153 -# forward . /etc/resolv.conf -# cache 30 -# loop -# reload -# loadbalance -# import custom/*.override -# } -# import custom/*.server - - -# BinaryData -# ==== - -# Events: kubectl describe configmap coredns-custom -n kube-system -# Name: coredns-custom -# Namespace: kube-system -# Labels: addonmanager.kubernetes.io/mode=EnsureExists -# k8s-app=kube-dns -# kubernetes.io/cluster-service=true -# Annotations: - -# Data -# ==== - -# BinaryData -# ==== - -# Events: kubectl create deployment nginx --image=nginx --replicas=3 -# deployment.apps/nginx created kubectl expose deployment nginx --name nginx --port=80 -# service/nginx exposed kubectl get deploy,svc -# NAME READY UP-TO-DATE AVAILABLE AGE -# deployment.apps/nginx 3/3 3 3 36s - -# NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE -# service/kubernetes ClusterIP 10.0.0.1 443/TCP 7h30m -# service/nginx ClusterIP 10.0.235.219 80/TCP 16s kubectl apply -f custom-coredns.yaml -# configmap/coredns-custom configured kubectl run nginx --image=nginx -# pod/nginx created kubectl exec -it nginx -- curl http://nginx -# -# -# -# Welcome to nginx! -# ... kubectl exec -it nginx -- curl http://nginx.default.svc.cluster.local -# -# -# -# Welcome to nginx! -# ... # resolve the custom service name (but with namespace) -kubectl exec -it nginx -- curl http://nginx.default.aks.com -# -# -# -# Welcome to nginx! -# resolve the custom service name (but without namespace) +kubectl exec -it nginx -- curl http://nginx.default.aks.com -# replace `rewrite stop` block with the following: -# rewrite stop { -# name regex (.*)\.aks\.com\.$ {1}.default.svc.cluster.local. -# answer name (.*).\default\.svc\.cluster\.local\.$ {1}.aks.com. -# } +# apply the new custom CoreDNS configmap -# aply the new custom CoreDNS configmap kubectl apply -f custom-coredns.yaml # delete CoreDNS pods after updating the custom configmap to reload the new configmap + kubectl delete pod --namespace kube-system -l k8s-app=kube-dns # resolving with '.aks.com' + kubectl exec -it nginx -- curl http://nginx.aks.com -# -# -# -# Welcome to nginx! \ No newline at end of file diff --git a/60_coredns_logs/Readme.md b/60_coredns_logs/Readme.md new file mode 100644 index 0000000..d58adb0 --- /dev/null +++ b/60_coredns_logs/Readme.md @@ -0,0 +1,120 @@ +# Enabling logging in CoreDNS + +## Introduction + +`CoreDNS` is the DNS service discovery plugin for Kubernetes. +CoreDNS is a replacement to `kube-dns` that was previously used in kubernetes. +CoreDNS is pre-installed in `kube-system` namespace. + + + +The objective of this lab is to enable logs for CoreDNS to log DNS queries. + +```sh +# create an AKS cluster + +$AKS_RG="rg-aks-cluster-dns-logs" +$AKS_NAME="aks-cluster" + +az group create --name $AKS_RG --location westeurope + +az aks create -g $AKS_RG -n $AKS_NAME --network-plugin azure --node-vm-size "Standard_B2als_v2" + +az aks get-credentials -g $AKS_RG -n $AKS_NAME --overwrite-existing + +# create demo application + +kubectl run nginx --image=nginx + +kubectl exec -it nginx -- apt update +kubectl exec -it nginx -- apt install dnsutils -y + +kubectl exec -it nginx -- nslookup microsoft.com + +# Did CoreDNS logged this DNS request ? + +# check CoreDNS logs + +kubectl get pods -n kube-system -l k8s-app=kube-dns +# NAME READY STATUS RESTARTS AGE +# coredns-789789675-5mq2l 1/1 Running 0 5m11s +# coredns-789789675-j55lz 1/1 Running 0 5m39s + +kubectl logs coredns-789789675-5mq2l -n kube-system + +kubectl logs coredns-789789675-j55lz -n kube-system + +# nothing was logged ! + +# Is logging enabled in CoreDNS ? + +kubectl get configmap -n kube-system -l k8s-app=kube-dns +# NAME DATA AGE +# coredns 1 4m18s +# coredns-custom 0 4m25s + +kubectl describe configmap coredns -n kube-system +# Name: coredns +# Namespace: kube-system +# Labels: addonmanager.kubernetes.io/mode=Reconcile +# k8s-app=kube-dns +# kubernetes.io/cluster-service=true +# Annotations: + +# Data +# ==== +# Corefile: +# ---- +# .:53 { +# errors +# ready +# health { +# lameduck 5s +# } +# kubernetes cluster.local in-addr.arpa ip6.arpa { +# pods insecure +# fallthrough in-addr.arpa ip6.arpa +# ttl 30 +# } +# prometheus :9153 +# forward . /etc/resolv.conf +# cache 30 +# loop +# reload +# loadbalance +# import custom/*.override +# } +# import custom/*.server + + +# BinaryData +# ==== + +# Events: + +kubectl describe cm coredns-custom -n kube-system +# Data +# ==== + +# enable logging for CoreDNS + +code coredns-custom.yaml + +kubectl apply -f coredns-custom.yaml + +# Force CoreDNS to reload the ConfigMap + +kubectl -n kube-system rollout restart deployment coredns + +kubectl get pods -n kube-system -l k8s-app=kube-dns + +# create DNS query + +kubectl exec -it nginx -- nslookup microsoft.com + +# View the CoreDNS logs + +kubectl logs --namespace kube-system -l k8s-app=kube-dns +# [INFO] 10.224.0.10:47320 - 15830 "A IN microsoft.com. udp 31 false 512" NOERROR qr,rd,ra 176 0.001047529s +# [INFO] 10.224.0.10:47575 - 61320 "AAAA IN microsoft.com. udp 31 false 512" NOERROR qr,rd,ra 236 0.001028862s +``` diff --git a/60_coredns_logs/commands.sh b/60_coredns_logs/commands.sh new file mode 100644 index 0000000..5a99e7d --- /dev/null +++ b/60_coredns_logs/commands.sh @@ -0,0 +1,106 @@ +# create an AKS cluster + +$AKS_RG="rg-aks-cluster-dns-logs" +$AKS_NAME="aks-cluster" + +az group create --name $AKS_RG --location westeurope + +az aks create -g $AKS_RG -n $AKS_NAME --network-plugin azure --node-vm-size "Standard_B2als_v2" + +az aks get-credentials -g $AKS_RG -n $AKS_NAME --overwrite-existing + +# create demo application + +kubectl run nginx --image=nginx + +kubectl exec -it nginx -- apt update +kubectl exec -it nginx -- apt install dnsutils -y + +kubectl exec -it nginx -- nslookup microsoft.com + +# Did CoreDNS logged this DNS request ? + +# check CoreDNS logs + +kubectl get pods -n kube-system -l k8s-app=kube-dns +# NAME READY STATUS RESTARTS AGE +# coredns-789789675-5mq2l 1/1 Running 0 5m11s +# coredns-789789675-j55lz 1/1 Running 0 5m39s + +kubectl logs coredns-789789675-5mq2l -n kube-system + +kubectl logs coredns-789789675-j55lz -n kube-system + +# nothing was logged ! + +# Is logging enabled in CoreDNS ? + +kubectl get configmap -n kube-system -l k8s-app=kube-dns +# NAME DATA AGE +# coredns 1 4m18s +# coredns-custom 0 4m25s + +kubectl describe configmap coredns -n kube-system +# Name: coredns +# Namespace: kube-system +# Labels: addonmanager.kubernetes.io/mode=Reconcile +# k8s-app=kube-dns +# kubernetes.io/cluster-service=true +# Annotations: + +# Data +# ==== +# Corefile: +# ---- +# .:53 { +# errors +# ready +# health { +# lameduck 5s +# } +# kubernetes cluster.local in-addr.arpa ip6.arpa { +# pods insecure +# fallthrough in-addr.arpa ip6.arpa +# ttl 30 +# } +# prometheus :9153 +# forward . /etc/resolv.conf +# cache 30 +# loop +# reload +# loadbalance +# import custom/*.override +# } +# import custom/*.server + + +# BinaryData +# ==== + +# Events: + +kubectl describe cm coredns-custom -n kube-system +# Data +# ==== + +# enable logging for CoreDNS + +code coredns-custom.yaml + +kubectl apply -f coredns-custom.yaml + +# Force CoreDNS to reload the ConfigMap + +kubectl -n kube-system rollout restart deployment coredns + +kubectl get pods -n kube-system -l k8s-app=kube-dns + +# create DNS query + +kubectl exec -it nginx -- nslookup microsoft.com + +# View the CoreDNS logs + +kubectl logs --namespace kube-system -l k8s-app=kube-dns +# [INFO] 10.224.0.10:47320 - 15830 "A IN microsoft.com. udp 31 false 512" NOERROR qr,rd,ra 176 0.001047529s +# [INFO] 10.224.0.10:47575 - 61320 "AAAA IN microsoft.com. udp 31 false 512" NOERROR qr,rd,ra 236 0.001028862s \ No newline at end of file diff --git a/60_coredns_logs/coredns-custom.yaml b/60_coredns_logs/coredns-custom.yaml new file mode 100644 index 0000000..4691d84 --- /dev/null +++ b/60_coredns_logs/coredns-custom.yaml @@ -0,0 +1,10 @@ +# https://learn.microsoft.com/en-us/azure/aks/coredns-custom + +apiVersion: v1 +kind: ConfigMap +metadata: + name: coredns-custom + namespace: kube-system +data: + log.override: | # any name end with the .override extension + log \ No newline at end of file diff --git a/60_coredns_logs/images/60_coredns__customdns.png b/60_coredns_logs/images/60_coredns__customdns.png new file mode 100644 index 0000000..f1fc840 Binary files /dev/null and b/60_coredns_logs/images/60_coredns__customdns.png differ diff --git a/67-kube-egress-gateway/azure_config_msi.yaml b/67-kube-egress-gateway/azure_config_msi.yaml new file mode 100644 index 0000000..1fc68f9 Binary files /dev/null and b/67-kube-egress-gateway/azure_config_msi.yaml differ diff --git a/67-kube-egress-gateway/commands.ps1 b/67-kube-egress-gateway/commands.ps1 new file mode 100644 index 0000000..2bbd018 --- /dev/null +++ b/67-kube-egress-gateway/commands.ps1 @@ -0,0 +1,116 @@ +$AKS_NAME = "aks-cluster" +$AKS_RG = "rg-aks-cluster" +$NODEPOOL_NAME = "npegress" +$IDENTITY_NAME = "identity-egress-gateway" + +az group create --name $AKS_RG --location westeurope + +az aks create -g $AKS_RG -n $AKS_NAME --network-plugin azure -k "1.28.3" --zones 1 2 3 --node-vm-size "Standard_B2als_v2" + +az aks nodepool add -g $AKS_RG --cluster-name $AKS_NAME --name $NODEPOOL_NAME + +az aks get-credentials -g $AKS_RG -n $AKS_NAME + +kubectl taint nodes -l agentpool=$NODEPOOL_NAME kubeegressgateway.azure.com/mode=true:NoSchedule + +kubectl label nodes -l agentpool=$NODEPOOL_NAME kubeegressgateway.azure.com/mode=true + +az aks nodepool update -g $AKS_RG --cluster-name $AKS_NAME --name $NODEPOOL_NAME --disable-cluster-autoscaler + + +# Use UserAssigned Managed Identity + +# Create a UserAssigned managed identity. This identity can be created in any resource group as long as permissions are set correctly. + +az identity create -g $AKS_RG -n $IDENTITY_NAME + +# Retrieve the identityID and clientID from the identity you just created + +$IDENTITY_CLIENT_ID = $(az identity show -g $AKS_RG -n $IDENTITY_NAME -o tsv --query "clientId") +echo $IDENTITY_CLIENT_ID + +$IDENTITY_ID = $(az identity show -g $AKS_RG -n $IDENTITY_NAME -o tsv --query "id") +echo $IDENTITY_ID + +# Assign "Network Contributor" and "Virtual Machine Contributor" roles to the identity. kube-egress-gateway components need these two roles to configure Load Balancer, Public IP Prefix, and VMSS resources. + +$AKS_NODE_RG = $(az aks show -g $AKS_RG -n $AKS_NAME --query "nodeResourceGroup" -o tsv) +echo $AKS_NODE_RG + +$AKS_NODE_RG_ID = $(az group show -g $AKS_NODE_RG --query id -o tsv) +echo $AKS_NODE_RG_ID +# get VMSS ID of the nodepool with name contains "npegress" + +$VMSS_ID = $(az vmss list -g $AKS_NODE_RG --query [1].id -o tsv) +echo $VMSS_ID + +$VMSS_NAME = $(az vmss list -g $AKS_NODE_RG --query [1].name -o tsv) +echo $VMSS_NAME + +# assign Network Contributor role on scope networkResourceGroup and vmssResourceGroup to the identity +az role assignment create --role "Network Contributor" --assignee $IDENTITY_CLIENT_ID --scope $AKS_NODE_RG_ID +# az role assignment create --role "Network Contributor" --assignee $IDENTITY_CLIENT_ID --scope $vmssRGID + +# assign Virtual Machine Contributor role on scope gateway vmss to the identity +az role assignment create --role "Virtual Machine Contributor" --assignee $IDENTITY_CLIENT_ID --scope $VMSS_ID + +@" +config: + azureCloudConfig: + cloud: "AzurePublicCloud" + tenantId: "$(az account show --query tenantId -o tsv)" + subscriptionId: "$(az account show --query id -o tsv)" + useManagedIdentityExtension: true + userAssignedIdentityID: "$IDENTITY_ID" + userAgent: "kube-egress-gateway-controller" + resourceGroup: "$AKS_RG" + location: "westeurope" + gatewayLoadBalancerName: "kubeegressgateway-ilb" + loadBalancerResourceGroup: "$AKS_NODE_RG" + vnetName: "$(az network vnet list -g $AKS_NODE_RG --query [0].name -o tsv)" + vnetResourceGroup: "$AKS_NODE_RG" + subnetName: "aks-subnet" +"@ > azure_config_msi.yaml + +# Install kube-egress-gateway as Helm Chart + +git clone https://github.com/Azure/kube-egress-gateway.git + +# To install kube-egress-gateway, you may run below helm command: + +helm install ` + kube-egress-gateway ./kube-egress-gateway/helm/kube-egress-gateway ` + --namespace kube-egress-gateway-system ` + --create-namespace ` + --set common.imageRepository=mcr.microsoft.com/aks ` + --set common.imageTag=v0.0.5 ` + -f azure_config_msi.yaml + +# create public IP prefix + +az network public-ip prefix create -g $AKS_RG -n myPIPPrefix --length 31 +$IP_PREFIX_ID=$(az network public-ip prefix create -g $AKS_RG -n myPIPPrefix --length 31 --query id -o tsv) +echo $IP_PREFIX_ID + +@" +apiVersion: egressgateway.kubernetes.azure.com/v1alpha1 +kind: StaticGatewayConfiguration +metadata: + name: my-static-egress-gateway + namespace: default +spec: + gatewayVmssProfile: + vmssResourceGroup: $AKS_NODE_RG + vmssName: $VMSS_NAME + publicIpPrefixSize: 31 + provisionPublicIps: true + publicIpPrefixId: $IP_PREFIX_ID + defaultRoute: staticEgressGateway + excludeCidrs: + - 10.244.0.0/16 + - 10.245.0.0/16 +"@ > static_gateway_config.yaml + +kubectl apply -f static_gateway_config.yaml + +kubectl get staticgatewayconfigurations my-static-egress-gateway -n default -o yaml \ No newline at end of file diff --git a/67-kube-egress-gateway/deployment.yaml b/67-kube-egress-gateway/deployment.yaml new file mode 100644 index 0000000..b5811ee --- /dev/null +++ b/67-kube-egress-gateway/deployment.yaml @@ -0,0 +1,25 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nginx + annotations: + kubernetes.azure.com/static-gateway-configuration: myStaticEgressGateway # required +spec: + replicas: 10 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:latest + ports: + - containerPort: 80 + tolerations: + - key: kubeegressgateway.azure.com/mode + effect: NoSchedule + \ No newline at end of file diff --git a/67-kube-egress-gateway/kube-egress-gateway b/67-kube-egress-gateway/kube-egress-gateway new file mode 160000 index 0000000..68fba1c --- /dev/null +++ b/67-kube-egress-gateway/kube-egress-gateway @@ -0,0 +1 @@ +Subproject commit 68fba1cee933069e245afffcac855819872ae3aa diff --git a/67-kube-egress-gateway/sample_StaticGatewayConfiguration.yaml b/67-kube-egress-gateway/sample_StaticGatewayConfiguration.yaml new file mode 100644 index 0000000..3bb553a --- /dev/null +++ b/67-kube-egress-gateway/sample_StaticGatewayConfiguration.yaml @@ -0,0 +1,16 @@ +apiVersion: egressgateway.kubernetes.azure.com/v1alpha1 +kind: StaticGatewayConfiguration +metadata: + name: myStaticEgressGateway + namespace: myNamespace +spec: + gatewayVmssProfile: + vmssResourceGroup: myResourceGroup + vmssName: myGatewayVMSS + publicIpPrefixSize: 31 + provisionPublicIps: true + publicIpPrefixId: /subscriptions/mySubscriptionID/resourcegroups/myResourceGroup/providers/Microsoft.Network/publicipprefixes/myPIPPrefix + defaultRoute: staticEgressGateway + excludeCidrs: + - 10.244.0.0/16 + - 10.245.0.0/16 \ No newline at end of file diff --git a/67-kube-egress-gateway/sample_azure_config_msi.yaml b/67-kube-egress-gateway/sample_azure_config_msi.yaml new file mode 100644 index 0000000..3a51c75 --- /dev/null +++ b/67-kube-egress-gateway/sample_azure_config_msi.yaml @@ -0,0 +1,15 @@ +config: + azureCloudConfig: + cloud: "AzurePublicCloud" + tenantId: "" + subscriptionId: "" + useManagedIdentityExtension: true + userAssignedIdentityID: "" + userAgent: "kube-egress-gateway-controller" + resourceGroup: "" + location: "" + gatewayLoadBalancerName: "kubeegressgateway-ilb" + loadBalancerResourceGroup: "" + vnetName: "" + vnetResourceGroup: "" + subnetName: "" \ No newline at end of file diff --git a/67-kube-egress-gateway/static_gateway_config.yaml b/67-kube-egress-gateway/static_gateway_config.yaml new file mode 100644 index 0000000..5ae8d1a Binary files /dev/null and b/67-kube-egress-gateway/static_gateway_config.yaml differ diff --git a/77_migrate_lb_to_natgateway_tf/natgateway.tf b/77_migrate_lb_to_natgateway_tf/natgateway.tf index c6a51fb..1dce5ab 100644 --- a/77_migrate_lb_to_natgateway_tf/natgateway.tf +++ b/77_migrate_lb_to_natgateway_tf/natgateway.tf @@ -20,3 +20,8 @@ resource "azurerm_nat_gateway_public_ip_association" "association" { nat_gateway_id = azurerm_nat_gateway.nat-gateway.id public_ip_address_id = azurerm_public_ip.pip.id } + +# resource "azurerm_subnet_nat_gateway_association" "subnet_natgw" { +# subnet_id = azurerm_subnet.subnet.id +# nat_gateway_id = azurerm_nat_gateway.nat-gateway.id +# } \ No newline at end of file diff --git a/77_migrate_lb_to_natgateway_tf/vm.tf b/77_migrate_lb_to_natgateway_tf/vm.tf index 02d4eea..f936169 100644 --- a/77_migrate_lb_to_natgateway_tf/vm.tf +++ b/77_migrate_lb_to_natgateway_tf/vm.tf @@ -50,7 +50,7 @@ resource "azurerm_virtual_network_peering" "direction1" { allow_gateway_transit = false } -resource "azurerm_virtual_network_peering" "vnet_peering_spoke_to_hub" { +resource "azurerm_virtual_network_peering" "direction2" { name = "direction2" virtual_network_name = azurerm_virtual_network.vnet.name resource_group_name = azurerm_resource_group.rg.name diff --git a/80_aks_backup/cmmands.ps1 b/80_aks_backup/cmmands.ps1 index d3c0d3d..e17506c 100644 --- a/80_aks_backup/cmmands.ps1 +++ b/80_aks_backup/cmmands.ps1 @@ -16,7 +16,7 @@ $AKS_RG_02="rg-aks-2" $VAULT_NAME="backup-vault" $VAULT_RG="rg-backup-vault" -$SA_NAME="storage4aks1backup1357" +$SA_NAME="storage4aks1backup13" $SA_RG="rg-backup-storage" $BLOB_CONTAINER_NAME="aks-backup" $SUBSCRIPTION_ID=$(az account list --query [?isDefault].id -o tsv) @@ -48,9 +48,11 @@ az storage container create ` # 4. Create first AKS cluster with CSI Disk Driver and Snapshot Controller +az aks get-versions -l westeurope -o table + az group create --name $AKS_RG_01 --location westeurope -az aks create -g $AKS_RG_01 -n $AKS_NAME_01 -k "1.25.5" --zones 1 2 3 +az aks create -g $AKS_RG_01 -n $AKS_NAME_01 -k "1.27.3" --zones 1 2 3 --node-vm-size "Standard_B2als_v2" # Verify that CSI Disk Driver and Snapshot Controller are installed @@ -63,7 +65,7 @@ az aks show -g $AKS_RG_01 -n $AKS_NAME_01 --query storageProfile az group create --name $AKS_RG_02 --location westeurope -az aks create -g $AKS_RG_02 -n $AKS_NAME_02 -k "1.25.5" --zones 1 2 3 +az aks create -g $AKS_RG_02 -n $AKS_NAME_02 -k "1.27.3" --zones 1 2 3 --node-vm-size "Standard_B2als_v2" # Verify that CSI Disk Driver and Snapshot Controller are installed diff --git a/86_log_analytics_custom_config/README.md b/86_log_analytics_custom_config/README.md new file mode 100644 index 0000000..d62d574 --- /dev/null +++ b/86_log_analytics_custom_config/README.md @@ -0,0 +1,50 @@ +# Enabling logging in kube-system namespace + +```sh +# create an AKS cluster + +$AKS_RG="rg-aks-cluster-log-analytics" +$AKS_NAME="aks-cluster" + +az group create --name $AKS_RG --location westeurope + +az aks create -g $AKS_RG -n $AKS_NAME --network-plugin azure --node-vm-size "Standard_B2als_v2" --enable-addons monitoring + +az aks get-credentials -g $AKS_RG -n $AKS_NAME --overwrite-existing + +# check log analytics pods called ama-logs + +kubectl get pods -n kube-system -l component=ama-logs-agent + +# check logs from kube-system namespace + +kubectl get pods -n kube-system + +kubectl logs -n kube-system -l k8s-app=kube-dns + +# check logs from kube-system namespace in Log Analytics + +# check the configuration of the log analytics agent + +kubectl get configmap container-azm-ms-aks-k8scluster -n kube-system -o yaml + +kubectl get configmap container-azm-ms-agentconfig -n kube-system -o yaml + +# apply custom configuration for the log analytics agent + +# sample configuration file: https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/kubernetes/container-azm-ms-agentconfig.yaml + +# remove "kube-system" from the list of excluded namespaces + +# apply the new configuration + +kubectl apply -f container-azm-ms-agentconfig.yaml + +# Then all Azure Monitor Agent pods in the cluster will restart. +# The restart is a rolling restart for all Azure Monitor Agent pods, so not all of them restart at the same time. + +kubectl get pods -n kube-system -l component=ama-logs-agent + +# check the logs from kube-system namespace in Log Analytics + +``` \ No newline at end of file diff --git a/86_log_analytics_custom_config/commands.sh b/86_log_analytics_custom_config/commands.sh new file mode 100644 index 0000000..e7d945a --- /dev/null +++ b/86_log_analytics_custom_config/commands.sh @@ -0,0 +1,48 @@ +# Enabling logging in kube-system namespace + +# create an AKS cluster + +$AKS_RG="rg-aks-cluster-log-analytics" +$AKS_NAME="aks-cluster" + +az group create --name $AKS_RG --location westeurope + +az aks create -g $AKS_RG -n $AKS_NAME --network-plugin azure --node-vm-size "Standard_B2als_v2" --enable-addons monitoring + +az aks get-credentials -g $AKS_RG -n $AKS_NAME --overwrite-existing + +# check log analytics pods called ama-logs + +kubectl get pods -n kube-system -l component=ama-logs-agent + +# check logs from kube-system namespace + +kubectl get pods -n kube-system + +kubectl logs -n kube-system -l k8s-app=kube-dns + +# check logs from kube-system namespace in Log Analytics + +# check the configuration of the log analytics agent + +kubectl get configmap container-azm-ms-aks-k8scluster -n kube-system -o yaml + +kubectl get configmap container-azm-ms-agentconfig -n kube-system -o yaml + +# apply custom configuration for the log analytics agent + +# sample configuration file: https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/kubernetes/container-azm-ms-agentconfig.yaml + +# remove "kube-system" from the list of excluded namespaces + +# apply the new configuration + +kubectl apply -f container-azm-ms-agentconfig.yaml + +# Then all Azure Monitor Agent pods in the cluster will restart. +# The restart is a rolling restart for all Azure Monitor Agent pods, so not all of them restart at the same time. + +kubectl get pods -n kube-system -l component=ama-logs-agent + +# check the logs from kube-system namespace in Log Analytics + diff --git a/86_log_analytics_custom_config/container-azm-ms-agentconfig.yaml b/86_log_analytics_custom_config/container-azm-ms-agentconfig.yaml new file mode 100644 index 0000000..84b135e --- /dev/null +++ b/86_log_analytics_custom_config/container-azm-ms-agentconfig.yaml @@ -0,0 +1,211 @@ +# src: https://raw.githubusercontent.com/microsoft/Docker-Provider/ci_prod/kubernetes/container-azm-ms-agentconfig.yaml +# doc: https://learn.microsoft.com/en-us/azure/azure-monitor/containers/container-insights-agent-config + +kind: ConfigMap +apiVersion: v1 +metadata: + name: container-azm-ms-agentconfig + namespace: kube-system +data: + schema-version: + #string.used by agent to parse config. supported versions are {v1}. Configs with other schema versions will be rejected by the agent. + v1 + config-version: + #string.used by customer to keep track of this config file's version in their source control/repository (max allowed 10 chars, other chars will be truncated) + ver1 + log-data-collection-settings: |- + # Log data collection settings + # Any errors related to config map settings can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to. + + [log_collection_settings] + [log_collection_settings.stdout] + # In the absense of this configmap, default value for enabled is true + enabled = true + # exclude_namespaces setting holds good only if enabled is set to true + # kube-system,gatekeeper-system log collection are disabled by default in the absence of 'log_collection_settings.stdout' setting. If you want to enable kube-system,gatekeeper-system, remove them from the following setting. + # If you want to continue to disable kube-system,gatekeeper-system log collection keep the namespaces in the following setting and add any other namespace you want to disable log collection to the array. + # In the absense of this configmap, default value for exclude_namespaces = ["kube-system","gatekeeper-system"] + exclude_namespaces = ["gatekeeper-system"] # ["kube-system","gatekeeper-system"] + + [log_collection_settings.stderr] + # Default value for enabled is true + enabled = true + # exclude_namespaces setting holds good only if enabled is set to true + # kube-system,gatekeeper-system log collection are disabled by default in the absence of 'log_collection_settings.stderr' setting. If you want to enable kube-system,gatekeeper-system, remove them from the following setting. + # If you want to continue to disable kube-system,gatekeeper-system log collection keep the namespaces in the following setting and add any other namespace you want to disable log collection to the array. + # In the absense of this configmap, default value for exclude_namespaces = ["kube-system","gatekeeper-system"] + exclude_namespaces = ["gatekeeper-system"] # ["kube-system","gatekeeper-system"] + + [log_collection_settings.env_var] + # In the absense of this configmap, default value for enabled is true + enabled = true + [log_collection_settings.enrich_container_logs] + # In the absense of this configmap, default value for enrich_container_logs is false + enabled = true + # When this is enabled (enabled = true), every container log entry (both stdout & stderr) will be enriched with container Name & container Image + [log_collection_settings.collect_all_kube_events] + # In the absense of this configmap, default value for collect_all_kube_events is false + # When the setting is set to false, only the kube events with !normal event type will be collected + enabled = false + # When this is enabled (enabled = true), all kube events including normal events will be collected + [log_collection_settings.schema] + # In the absence of this configmap, default value for containerlog_schema_version is "v1" + # Supported values for this setting are "v1","v2" + # See documentation at https://aka.ms/ContainerLogv2 for benefits of v2 schema over v1 schema before opting for "v2" schema + containerlog_schema_version = "v2" + #[log_collection_settings.enable_multiline_logs] + # fluent-bit based multiline log collection for .NET, Go, Java, and Python stacktraces. + # if enabled will also stitch together container logs split by docker/cri due to size limits(16KB per log line) + # enabled = "false" + + + prometheus-data-collection-settings: |- + # Custom Prometheus metrics data collection settings + [prometheus_data_collection_settings.cluster] + # Cluster level scrape endpoint(s). These metrics will be scraped from agent's Replicaset (singleton) + # Any errors related to prometheus scraping can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to. + + #Interval specifying how often to scrape for metrics. This is duration of time and can be specified for supporting settings by combining an integer value and time unit as a string value. Valid time units are ns, us (or µs), ms, s, m, h. + interval = "1m" + + ## Uncomment the following settings with valid string arrays for prometheus scraping + #fieldpass = ["metric_to_pass1", "metric_to_pass12"] + + #fielddrop = ["metric_to_drop"] + + # An array of urls to scrape metrics from. + # urls = ["http://myurl:9101/metrics"] + + # An array of Kubernetes services to scrape metrics from. + # kubernetes_services = ["http://my-service-dns.my-namespace:9102/metrics"] + + # When monitor_kubernetes_pods = true, replicaset will scrape Kubernetes pods for the following prometheus annotations: + # - prometheus.io/scrape: Enable scraping for this pod + # - prometheus.io/scheme: Default is http + # - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. + # - prometheus.io/port: If port is not 9102 use this annotation + monitor_kubernetes_pods = false + + ## Restricts Kubernetes monitoring to namespaces for pods that have annotations set and are scraped using the monitor_kubernetes_pods setting. + ## This will take effect when monitor_kubernetes_pods is set to true + ## ex: monitor_kubernetes_pods_namespaces = ["default1", "default2", "default3"] + # monitor_kubernetes_pods_namespaces = ["default1"] + + ## Label selector to target pods which have the specified label + ## This will take effect when monitor_kubernetes_pods is set to true + ## Reference the docs at https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors + # kubernetes_label_selector = "env=dev,app=nginx" + + ## Field selector to target pods which have the specified field + ## This will take effect when monitor_kubernetes_pods is set to true + ## Reference the docs at https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/ + ## eg. To scrape pods on a specific node + # kubernetes_field_selector = "spec.nodeName=$HOSTNAME" + + [prometheus_data_collection_settings.node] + # Node level scrape endpoint(s). These metrics will be scraped from agent's DaemonSet running in every node in the cluster + # Any errors related to prometheus scraping can be found in the KubeMonAgentEvents table in the Log Analytics workspace that the cluster is sending data to. + + #Interval specifying how often to scrape for metrics. This is duration of time and can be specified for supporting settings by combining an integer value and time unit as a string value. Valid time units are ns, us (or µs), ms, s, m, h. + interval = "1m" + + ## Uncomment the following settings with valid string arrays for prometheus scraping + + # An array of urls to scrape metrics from. $NODE_IP (all upper case) will substitute of running Node's IP address + # urls = ["http://$NODE_IP:9103/metrics"] + + #fieldpass = ["metric_to_pass1", "metric_to_pass12"] + + #fielddrop = ["metric_to_drop"] + + metric_collection_settings: |- + # Metrics collection settings for metrics sent to Log Analytics and MDM + [metric_collection_settings.collect_kube_system_pv_metrics] + # In the absense of this configmap, default value for collect_kube_system_pv_metrics is false + # When the setting is set to false, only the persistent volume metrics outside the kube-system namespace will be collected + enabled = false + # When this is enabled (enabled = true), persistent volume metrics including those in the kube-system namespace will be collected + + alertable-metrics-configuration-settings: |- + # Alertable metrics configuration settings for container resource utilization + [alertable_metrics_configuration_settings.container_resource_utilization_thresholds] + # The threshold(Type Float) will be rounded off to 2 decimal points + # Threshold for container cpu, metric will be sent only when cpu utilization exceeds or becomes equal to the following percentage + container_cpu_threshold_percentage = 95.0 + # Threshold for container memoryRss, metric will be sent only when memory rss exceeds or becomes equal to the following percentage + container_memory_rss_threshold_percentage = 95.0 + # Threshold for container memoryWorkingSet, metric will be sent only when memory working set exceeds or becomes equal to the following percentage + container_memory_working_set_threshold_percentage = 95.0 + + # Alertable metrics configuration settings for persistent volume utilization + [alertable_metrics_configuration_settings.pv_utilization_thresholds] + # Threshold for persistent volume usage bytes, metric will be sent only when persistent volume utilization exceeds or becomes equal to the following percentage + pv_usage_threshold_percentage = 60.0 + + # Alertable metrics configuration settings for completed jobs count + [alertable_metrics_configuration_settings.job_completion_threshold] + # Threshold for completed job count , metric will be sent only for those jobs which were completed earlier than the following threshold + job_completion_threshold_time_minutes = 360 + integrations: |- + [integrations.azure_network_policy_manager] + collect_basic_metrics = false + collect_advanced_metrics = false + [integrations.azure_subnet_ip_usage] + enabled = false + +# Doc - https://github.com/microsoft/Docker-Provider/blob/ci_prod/Documentation/AgentSettings/ReadMe.md + agent-settings: |- + # prometheus scrape fluent bit settings for high scale + # buffer size should be greater than or equal to chunk size else we set it to chunk size. + # settings scoped to prometheus sidecar container. all values in mb + [agent_settings.prometheus_fbit_settings] + tcp_listener_chunk_size = 10 + tcp_listener_buffer_size = 10 + tcp_listener_mem_buf_limit = 200 + + # prometheus scrape fluent bit settings for high scale + # buffer size should be greater than or equal to chunk size else we set it to chunk size. + # settings scoped to daemonset container. all values in mb + # [agent_settings.node_prometheus_fbit_settings] + # tcp_listener_chunk_size = 1 + # tcp_listener_buffer_size = 1 + # tcp_listener_mem_buf_limit = 10 + + # prometheus scrape fluent bit settings for high scale + # buffer size should be greater than or equal to chunk size else we set it to chunk size. + # settings scoped to replicaset container. all values in mb + # [agent_settings.cluster_prometheus_fbit_settings] + # tcp_listener_chunk_size = 1 + # tcp_listener_buffer_size = 1 + # tcp_listener_mem_buf_limit = 10 + + # The following settings are "undocumented", we don't recommend uncommenting them unless directed by Microsoft. + # They increase the maximum stdout/stderr log collection rate but will also cause higher cpu/memory usage. + ## Ref for more details about Ignore_Older - https://docs.fluentbit.io/manual/v/1.7/pipeline/inputs/tail + # [agent_settings.fbit_config] + # log_flush_interval_secs = "1" # default value is 15 + # tail_mem_buf_limit_megabytes = "10" # default value is 10 + # tail_buf_chunksize_megabytes = "1" # default value is 32kb (comment out this line for default) + # tail_buf_maxsize_megabytes = "1" # default value is 32kb (comment out this line for default) + # tail_ignore_older = "5m" # default value same as fluent-bit default i.e.0m + + # On both AKS & Arc K8s enviornments, if Cluster has configured with Forward Proxy then Proxy settings automatically applied and used for the agent + # Certain configurations, proxy config should be ignored for example Cluster with AMPLS + Proxy + # in such scenarios, use the following config to ignore proxy settings + # [agent_settings.proxy_config] + # ignore_proxy_settings = "true" # if this is not applied, default value is false + + # The following settings are "undocumented", we don't recommend uncommenting them unless directed by Microsoft. + # Configuration settings for the waittime for the network listeners to be available + # [agent_settings.network_listener_waittime] + # tcp_port_25226 = 45 # Port 25226 is used for telegraf to fluent-bit data in ReplicaSet + # tcp_port_25228 = 60 # Port 25228 is used for telegraf to fluentd data + # tcp_port_25229 = 45 # Port 25229 is used for telegraf to fluent-bit data in DaemonSet + + # The following settings are "undocumented", we don't recommend uncommenting them unless directed by Microsoft. + # [agent_settings.mdsd_config] + # monitoring_max_event_rate = "50000" # default 20K eps + # backpressure_memory_threshold_in_mb = "1500" # default 3500MB + # upload_max_size_in_mb = "20" # default 2MB + # upload_frequency_seconds = "1" # default 60 upload_frequency_seconds + # compression_level = "0" # supported levels 0 to 9 and 0 means no compression \ No newline at end of file diff --git a/86_log_analytics_custom_config/images/architecture.png b/86_log_analytics_custom_config/images/architecture.png new file mode 100644 index 0000000..01e09df Binary files /dev/null and b/86_log_analytics_custom_config/images/architecture.png differ