From b01da3a86336106408a891863c38a9761631f120 Mon Sep 17 00:00:00 2001 From: John Murret Date: Tue, 30 Jul 2024 18:46:12 -0600 Subject: [PATCH] NET-10194 - dns proxy acceptance test for partitions (#4174) * NET-10194 - dns proxy acceptance test for partitions * making test more readable --- .../consul-dns/consul_dns_partitions_test.go | 429 ++++++++++++++++++ .../tests/consul-dns/consul_dns_test.go | 96 ++-- .../tests/consul-dns/coredns-original.yaml | 27 ++ .../tests/consul-dns/coredns-template.yaml | 32 ++ .../templates/dns-proxy-deployment.yaml | 2 - 5 files changed, 557 insertions(+), 29 deletions(-) create mode 100644 acceptance/tests/consul-dns/consul_dns_partitions_test.go create mode 100644 acceptance/tests/consul-dns/coredns-original.yaml create mode 100644 acceptance/tests/consul-dns/coredns-template.yaml diff --git a/acceptance/tests/consul-dns/consul_dns_partitions_test.go b/acceptance/tests/consul-dns/consul_dns_partitions_test.go new file mode 100644 index 0000000000..c7b51523c8 --- /dev/null +++ b/acceptance/tests/consul-dns/consul_dns_partitions_test.go @@ -0,0 +1,429 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package consuldns + +import ( + "fmt" + terratestk8s "github.com/gruntwork-io/terratest/modules/k8s" + "github.com/hashicorp/consul-k8s/acceptance/framework/config" + "github.com/hashicorp/consul-k8s/acceptance/framework/consul" + "github.com/hashicorp/consul-k8s/acceptance/framework/environment" + "github.com/hashicorp/consul-k8s/acceptance/framework/helpers" + "github.com/hashicorp/consul-k8s/acceptance/framework/k8s" + "github.com/hashicorp/consul-k8s/acceptance/framework/logger" + "github.com/hashicorp/consul/api" + "github.com/hashicorp/consul/sdk/testutil/retry" + "github.com/stretchr/testify/require" + "strconv" + "testing" + "time" +) + +const staticServerName = "static-server" +const staticServerNamespace = "ns1" + +type dnsWithPartitionsTestCase struct { + name string + secure bool + enableDNSProxy bool +} + +type dnsVerification struct { + name string + requestingCtx environment.TestContext + svcContext environment.TestContext + svcName string + shouldResolveDNS bool + preProcessingFunc func(t *testing.T) +} + +const defaultPartition = "default" +const secondaryPartition = "secondary" +const defaultNamespace = "default" + +// TestConsulDNS_WithPartitionsAndCatalogSync verifies DNS queries for services across partitions +// when DNS proxy is enabled. It configures CoreDNS to use configure consul domain queries to +// be forwarded to the Consul DNS Proxy. The test validates: +// - returning the local partition's service when tenancy is not included in the DNS question. +// - properly not resolving DNS for unexported services when ACLs are enabled. +// - properly resolving DNS for exported services when ACLs are enabled. +func TestConsulDNS_WithPartitionsAndCatalogSync(t *testing.T) { + env := suite.Environment() + cfg := suite.Config() + + if cfg.EnableCNI { + t.Skipf("skipping because -enable-cni is set") + } + if !cfg.EnableEnterprise { + t.Skipf("skipping this test because -enable-enterprise is not set") + } + + cases := []dnsWithPartitionsTestCase{ + { + name: "dns service / not secure - ACLs and auto-encrypt not enabled", + secure: false, + enableDNSProxy: false, + }, + { + name: "dns service / secure - ACLs and auto-encrypt enabled", + secure: true, + enableDNSProxy: false, + }, + { + name: "dns-proxy / not secure - ACLs and auto-encrypt not enabled", + secure: false, + enableDNSProxy: true, + }, + { + name: "dns-proxy / secure - ACLs and auto-encrypt enabled", + secure: true, + enableDNSProxy: true, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + defaultClusterContext := env.DefaultContext(t) + secondaryClusterContext := env.Context(t, 1) + + // Setup the clusters and the static service. + releaseName, consulClient, defaultPartitionOpts, secondaryPartitionQueryOpts := setupClustersAndStaticService(t, cfg, + defaultClusterContext, secondaryClusterContext, c, secondaryPartition, + defaultPartition) + + // Update CoreDNS to use the Consul domain and forward queries to the Consul DNS Service or Proxy. + updateCoreDNSWithConsulDomain(t, defaultClusterContext, releaseName, true) + updateCoreDNSWithConsulDomain(t, secondaryClusterContext, releaseName, true) + + podLabelSelector := "app=static-server" + // The index of the dnsUtils pod to use for the DNS queries so that the pod name can be unique. + dnsUtilsPodIndex := 0 + + // When ACLs are enabled, the unexported service should not resolve. + shouldResolveUnexportedCrossPartitionDNSRecord := true + if c.secure { + shouldResolveUnexportedCrossPartitionDNSRecord = false + } + + // Verify that the service is in the catalog under each partition. + verifyServiceInCatalog(t, consulClient, defaultPartitionOpts) + verifyServiceInCatalog(t, consulClient, secondaryPartitionQueryOpts) + + logger.Log(t, "verify the service via DNS in the default partition of the Consul catalog.") + for _, v := range getVerifications(defaultClusterContext, secondaryClusterContext, + shouldResolveUnexportedCrossPartitionDNSRecord, cfg) { + t.Run(v.name, func(t *testing.T) { + if v.preProcessingFunc != nil { + v.preProcessingFunc(t) + } + verifyDNS(t, releaseName, staticServerNamespace, v.requestingCtx, v.svcContext, + podLabelSelector, v.svcName, v.shouldResolveDNS, dnsUtilsPodIndex) + dnsUtilsPodIndex++ + }) + } + + //if c.enableDNSProxy { + // t.Run("restart dns-proxy and verify DNS queries for exported services across partitions", func(t *testing.T) { + // restartDNSProxy(t, releaseName, defaultClusterContext) + // verifyDNS(t, releaseName, staticServerNamespace, defaultClusterContext, secondaryClusterContext, + // podLabelSelector, fmt.Sprintf("%s.service.%s.ap.consul", staticServerName, secondaryPartition), true, + // dnsUtilsPodIndex) + // dnsUtilsPodIndex++ + // restartDNSProxy(t, releaseName, secondaryClusterContext) + // verifyDNS(t, releaseName, staticServerNamespace, secondaryClusterContext, defaultClusterContext, + // podLabelSelector, fmt.Sprintf("%s.service.%s.ap.consul", staticServerName, defaultPartition), true, + // dnsUtilsPodIndex) + // }) + //} + }) + } +} + +//func restartDNSProxy(t *testing.T, releaseName string, ctx environment.TestContext) { +// dnsDeploymentName := fmt.Sprintf("deployment/%s-consul-dns-proxy", releaseName) +// restartDNSProxyCommand := []string{"rollout", "restart", dnsDeploymentName} +// k8sOptions := ctx.KubectlOptions(t) +// logger.Log(t, fmt.Sprintf("restarting the dns-proxy deployment in %s k8s context", k8sOptions.ContextName)) +// _, err := k8s.RunKubectlAndGetOutputE(t, k8sOptions, restartDNSProxyCommand...) +// require.NoError(t, err) +// +// // Wait for restart to finish. +// out, err := k8s.RunKubectlAndGetOutputE(t, k8sOptions, "rollout", "status", "--timeout", "1m", "--watch", dnsDeploymentName) +// require.NoError(t, err, out, "rollout status command errored, this likely means the rollout didn't complete in time") +// logger.Log(t, fmt.Sprintf("dns-proxy deployment in %s k8s context has finished restarting", k8sOptions.ContextName)) +//} + +func getVerifications(defaultClusterContext environment.TestContext, secondaryClusterContext environment.TestContext, + shouldResolveUnexportedCrossPartitionDNSRecord bool, cfg *config.TestConfig) []dnsVerification { + serviceRequestWithNoPartition := fmt.Sprintf("%s.service.consul", staticServerName) + serviceRequestInDefaultPartition := fmt.Sprintf("%s.service.%s.ap.consul", staticServerName, defaultPartition) + serviceRequestInSecondaryPartition := fmt.Sprintf("%s.service.%s.ap.consul", staticServerName, secondaryPartition) + return []dnsVerification{ + { + name: "verify static-server.service.consul from default partition resolves the default partition ip address.", + requestingCtx: defaultClusterContext, + svcContext: defaultClusterContext, + svcName: serviceRequestWithNoPartition, + shouldResolveDNS: true, + }, + { + name: "verify static-server.service.default.ap.consul resolves the default partition ip address.", + requestingCtx: defaultClusterContext, + svcContext: defaultClusterContext, + svcName: serviceRequestInDefaultPartition, + shouldResolveDNS: true, + }, + { + name: "verify the unexported static-server.service.secondary.ap.consul from the default partition. With ACLs turned on, this should not resolve. Otherwise, it will resolve.", + requestingCtx: defaultClusterContext, + svcContext: secondaryClusterContext, + svcName: serviceRequestInSecondaryPartition, + shouldResolveDNS: shouldResolveUnexportedCrossPartitionDNSRecord, + }, + { + name: "verify static-server.service.secondary.ap.consul from the secondary partition.", + requestingCtx: secondaryClusterContext, + svcContext: secondaryClusterContext, + svcName: serviceRequestInSecondaryPartition, + shouldResolveDNS: true, + }, + { + name: "verify static-server.service.consul from the secondary partition should return the ip in the secondary.", + requestingCtx: secondaryClusterContext, + svcContext: secondaryClusterContext, + svcName: serviceRequestWithNoPartition, + shouldResolveDNS: true, + }, + { + name: "verify static-server.service.default.ap.consul from the secondary partition. With ACLs turned on, this should not resolve. Otherwise, it will resolve.", + requestingCtx: secondaryClusterContext, + svcContext: defaultClusterContext, + svcName: serviceRequestInDefaultPartition, + shouldResolveDNS: shouldResolveUnexportedCrossPartitionDNSRecord, + }, + { + name: "verify static-server.service.secondary.ap.consul from the default partition once the service is exported.", + requestingCtx: defaultClusterContext, + svcContext: secondaryClusterContext, + svcName: serviceRequestInSecondaryPartition, + shouldResolveDNS: true, + preProcessingFunc: func(t *testing.T) { + k8s.KubectlApplyK(t, secondaryClusterContext.KubectlOptions(t), "../fixtures/cases/crd-partitions/secondary-partition-default") + helpers.Cleanup(t, cfg.NoCleanupOnFailure, cfg.NoCleanup, func() { + k8s.KubectlDeleteK(t, secondaryClusterContext.KubectlOptions(t), "../fixtures/cases/crd-partitions/secondary-partition-default") + }) + }, + }, + { + name: "verify static-server.service.default.ap.consul from the secondary partition once the service is exported.", + requestingCtx: secondaryClusterContext, + svcContext: defaultClusterContext, + svcName: serviceRequestInDefaultPartition, + shouldResolveDNS: true, + preProcessingFunc: func(t *testing.T) { + k8s.KubectlApplyK(t, defaultClusterContext.KubectlOptions(t), "../fixtures/cases/crd-partitions/default-partition-default") + helpers.Cleanup(t, cfg.NoCleanupOnFailure, cfg.NoCleanup, func() { + k8s.KubectlDeleteK(t, defaultClusterContext.KubectlOptions(t), "../fixtures/cases/crd-partitions/default-partition-default") + }) + }, + }, + } +} + +func verifyServiceInCatalog(t *testing.T, consulClient *api.Client, queryOpts *api.QueryOptions) { + logger.Log(t, "verify the service in the secondary partition of the Consul catalog.") + svc, _, err := consulClient.Catalog().Service(staticServerName, "", queryOpts) + require.NoError(t, err) + require.Equal(t, 1, len(svc)) + require.Equal(t, []string{"k8s"}, svc[0].ServiceTags) +} + +func setupClustersAndStaticService(t *testing.T, cfg *config.TestConfig, defaultClusterContext environment.TestContext, + secondaryClusterContext environment.TestContext, c dnsWithPartitionsTestCase, secondaryPartition string, + defaultPartition string) (string, *api.Client, *api.QueryOptions, *api.QueryOptions) { + commonHelmValues := map[string]string{ + "global.adminPartitions.enabled": "true", + "global.enableConsulNamespaces": "true", + + "global.tls.enabled": "true", + "global.tls.httpsOnly": strconv.FormatBool(c.secure), + + "global.acls.manageSystemACLs": strconv.FormatBool(c.secure), + + "syncCatalog.enabled": "true", + // When mirroringK8S is set, this setting is ignored. + "syncCatalog.consulNamespaces.consulDestinationNamespace": defaultNamespace, + "syncCatalog.consulNamespaces.mirroringK8S": "false", + "syncCatalog.addK8SNamespaceSuffix": "false", + + "dns.enabled": "true", + "dns.proxy.enabled": "true", + "dns.enableRedirection": strconv.FormatBool(cfg.EnableTransparentProxy), + } + + serverHelmValues := map[string]string{ + "server.exposeGossipAndRPCPorts": "true", + "server.extraConfig": `"{\"log_level\": \"TRACE\"}"`, + } + + if cfg.UseKind { + serverHelmValues["server.exposeService.type"] = "NodePort" + serverHelmValues["server.exposeService.nodePort.https"] = "30000" + } + + releaseName := helpers.RandomName() + + helpers.MergeMaps(serverHelmValues, commonHelmValues) + + // Install the consul cluster with servers in the default kubernetes context. + defaultConsulCluster := consul.NewHelmCluster(t, serverHelmValues, defaultClusterContext, cfg, releaseName) + defaultConsulCluster.Create(t) + + // Get the TLS CA certificate and key secret from the server cluster and apply it to the client cluster. + caCertSecretName := fmt.Sprintf("%s-consul-ca-cert", releaseName) + caKeySecretName := fmt.Sprintf("%s-consul-ca-key", releaseName) + + logger.Logf(t, "retrieving ca cert secret %s from the server cluster and applying to the client cluster", caCertSecretName) + k8s.CopySecret(t, defaultClusterContext, secondaryClusterContext, caCertSecretName) + + if !c.secure { + // When auto-encrypt is disabled, we need both + // the CA cert and CA key to be available in the clients cluster to generate client certificates and keys. + logger.Logf(t, "retrieving ca key secret %s from the server cluster and applying to the client cluster", caKeySecretName) + k8s.CopySecret(t, defaultClusterContext, secondaryClusterContext, caKeySecretName) + } + + partitionToken := fmt.Sprintf("%s-consul-partitions-acl-token", releaseName) + if c.secure { + logger.Logf(t, "retrieving partition token secret %s from the server cluster and applying to the client cluster", partitionToken) + k8s.CopySecret(t, defaultClusterContext, secondaryClusterContext, partitionToken) + } + + partitionServiceName := fmt.Sprintf("%s-consul-expose-servers", releaseName) + partitionSvcAddress := k8s.ServiceHost(t, cfg, defaultClusterContext, partitionServiceName) + + k8sAuthMethodHost := k8s.KubernetesAPIServerHost(t, cfg, secondaryClusterContext) + + // Create client cluster. + clientHelmValues := map[string]string{ + "global.enabled": "false", + + "global.adminPartitions.name": secondaryPartition, + + "global.tls.caCert.secretName": caCertSecretName, + "global.tls.caCert.secretKey": "tls.crt", + + "externalServers.enabled": "true", + "externalServers.hosts[0]": partitionSvcAddress, + "externalServers.tlsServerName": "server.dc1.consul", + } + + if c.secure { + // Setup partition token and auth method host if ACLs enabled. + clientHelmValues["global.acls.bootstrapToken.secretName"] = partitionToken + clientHelmValues["global.acls.bootstrapToken.secretKey"] = "token" + clientHelmValues["externalServers.k8sAuthMethodHost"] = k8sAuthMethodHost + } else { + // Provide CA key when auto-encrypt is disabled. + clientHelmValues["global.tls.caKey.secretName"] = caKeySecretName + clientHelmValues["global.tls.caKey.secretKey"] = "tls.key" + } + + if cfg.UseKind { + clientHelmValues["externalServers.httpsPort"] = "30000" + } + + helpers.MergeMaps(clientHelmValues, commonHelmValues) + + // Install the consul cluster without servers in the client cluster kubernetes context. + secondaryConsulCluster := consul.NewHelmCluster(t, clientHelmValues, secondaryClusterContext, cfg, releaseName) + secondaryConsulCluster.Create(t) + + defaultStaticServerOpts := &terratestk8s.KubectlOptions{ + ContextName: defaultClusterContext.KubectlOptions(t).ContextName, + ConfigPath: defaultClusterContext.KubectlOptions(t).ConfigPath, + Namespace: staticServerNamespace, + } + secondaryStaticServerOpts := &terratestk8s.KubectlOptions{ + ContextName: secondaryClusterContext.KubectlOptions(t).ContextName, + ConfigPath: secondaryClusterContext.KubectlOptions(t).ConfigPath, + Namespace: staticServerNamespace, + } + + logger.Logf(t, "creating namespaces %s in servers cluster", staticServerNamespace) + k8s.RunKubectl(t, defaultClusterContext.KubectlOptions(t), "create", "ns", staticServerNamespace) + helpers.Cleanup(t, cfg.NoCleanupOnFailure, cfg.NoCleanup, func() { + k8s.RunKubectl(t, defaultClusterContext.KubectlOptions(t), "delete", "ns", staticServerNamespace) + }) + + logger.Logf(t, "creating namespaces %s in clients cluster", staticServerNamespace) + k8s.RunKubectl(t, secondaryClusterContext.KubectlOptions(t), "create", "ns", staticServerNamespace) + helpers.Cleanup(t, cfg.NoCleanupOnFailure, cfg.NoCleanup, func() { + k8s.RunKubectl(t, secondaryClusterContext.KubectlOptions(t), "delete", "ns", staticServerNamespace) + }) + + consulClient, _ := defaultConsulCluster.SetupConsulClient(t, c.secure) + + defaultPartitionQueryOpts := &api.QueryOptions{Namespace: defaultNamespace, Partition: defaultPartition} + secondaryPartitionQueryOpts := &api.QueryOptions{Namespace: defaultNamespace, Partition: secondaryPartition} + + // Check that the ACL token is deleted. + if c.secure { + // We need to register the cleanup function before we create the deployments + // because golang will execute them in reverse order i.e. the last registered + // cleanup function will be executed first. + t.Cleanup(func() { + if c.secure { + retry.Run(t, func(r *retry.R) { + tokens, _, err := consulClient.ACL().TokenList(defaultPartitionQueryOpts) + require.NoError(r, err) + for _, token := range tokens { + require.NotContains(r, token.Description, staticServerName) + } + + tokens, _, err = consulClient.ACL().TokenList(secondaryPartitionQueryOpts) + require.NoError(r, err) + for _, token := range tokens { + require.NotContains(r, token.Description, staticServerName) + } + }) + } + }) + } + + logger.Log(t, "creating a static-server with a service") + // create service in default partition. + k8s.DeployKustomize(t, defaultStaticServerOpts, cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/bases/static-server") + // create service in secondary partition. + k8s.DeployKustomize(t, secondaryStaticServerOpts, cfg.NoCleanupOnFailure, cfg.NoCleanup, cfg.DebugDirectory, "../fixtures/bases/static-server") + + logger.Log(t, "checking that the service has been synced to Consul") + var services map[string][]string + counter := &retry.Counter{Count: 30, Wait: 30 * time.Second} + retry.RunWith(counter, t, func(r *retry.R) { + var err error + // list services in default partition catalog. + services, _, err = consulClient.Catalog().Services(defaultPartitionQueryOpts) + require.NoError(r, err) + require.Contains(r, services, staticServerName) + if _, ok := services[staticServerName]; !ok { + r.Errorf("service '%s' is not in Consul's list of services %s in the default partition", staticServerName, services) + } + // list services in secondary partition catalog. + services, _, err = consulClient.Catalog().Services(secondaryPartitionQueryOpts) + require.NoError(r, err) + require.Contains(r, services, staticServerName) + if _, ok := services[staticServerName]; !ok { + r.Errorf("service '%s' is not in Consul's list of services %s in the secondary partition", staticServerName, services) + } + }) + + logger.Log(t, "verify the service in the default partition of the Consul catalog.") + service, _, err := consulClient.Catalog().Service(staticServerName, "", defaultPartitionQueryOpts) + require.NoError(t, err) + require.Equal(t, 1, len(service)) + require.Equal(t, []string{"k8s"}, service[0].ServiceTags) + + return releaseName, consulClient, defaultPartitionQueryOpts, secondaryPartitionQueryOpts +} diff --git a/acceptance/tests/consul-dns/consul_dns_test.go b/acceptance/tests/consul-dns/consul_dns_test.go index 04e3d2c6ee..c70b8d2fae 100644 --- a/acceptance/tests/consul-dns/consul_dns_test.go +++ b/acceptance/tests/consul-dns/consul_dns_test.go @@ -7,9 +7,11 @@ import ( "context" "fmt" "github.com/hashicorp/consul-k8s/acceptance/framework/environment" + "os" "strconv" "strings" "testing" + "time" "github.com/hashicorp/consul-k8s/acceptance/framework/consul" "github.com/hashicorp/consul-k8s/acceptance/framework/helpers" @@ -20,6 +22,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +// TestConsulDNS configures CoreDNS to use configure consul domain queries to +// be forwarded to the Consul DNS Service or the Consul DNS Proxy depending on +// the test case. The test validates that the DNS queries are resolved when querying +// for .consul services in secure and non-secure modes. func TestConsulDNS(t *testing.T) { cfg := suite.Config() if cfg.EnableCNI { @@ -52,43 +58,70 @@ func TestConsulDNS(t *testing.T) { "global.acls.manageSystemACLs": strconv.FormatBool(c.secure), "dns.proxy.enabled": strconv.FormatBool(c.enableDNSProxy), } + cluster := consul.NewHelmCluster(t, helmValues, ctx, suite.Config(), releaseName) cluster.Create(t) - contextNamespace := ctx.KubectlOptions(t).Namespace - - verifyDNS(t, releaseName, c.enableDNSProxy, contextNamespace, ctx, ctx, "app=consul,component=server", + updateCoreDNSWithConsulDomain(t, ctx, releaseName, c.enableDNSProxy) + verifyDNS(t, releaseName, ctx.KubectlOptions(t).Namespace, ctx, ctx, "app=consul,component=server", "consul.service.consul", true, 0) - }) } } -func verifyDNS(t *testing.T, releaseName string, enableDNSProxy bool, svcNamespace string, requestingCtx, svcContext environment.TestContext, - podLabelSelector, svcName string, shouldResolveDNSRecord bool, dnsUtilsPodIndex int) { - logger.Log(t, "get the in cluster dns service or proxy.") - dnsSvcName := fmt.Sprintf("%s-consul-dns", releaseName) - if enableDNSProxy { - dnsSvcName += "-proxy" +func updateCoreDNSWithConsulDomain(t *testing.T, ctx environment.TestContext, releaseName string, enableDNSProxy bool) { + updateCoreDNSFile(t, ctx, releaseName, enableDNSProxy, "coredns-custom.yaml") + updateCoreDNS(t, ctx, "coredns-custom.yaml") + + t.Cleanup(func() { + updateCoreDNS(t, ctx, "coredns-original.yaml") + time.Sleep(5 * time.Second) + }) +} + +func updateCoreDNSFile(t *testing.T, ctx environment.TestContext, releaseName string, + enableDNSProxy bool, dnsFileName string) { + dnsIP, err := getDNSServiceClusterIP(t, ctx, releaseName, enableDNSProxy) + require.NoError(t, err) + + input, err := os.ReadFile("coredns-template.yaml") + require.NoError(t, err) + newContents := strings.Replace(string(input), "{{CONSUL_DNS_IP}}", dnsIP, -1) + err = os.WriteFile(dnsFileName, []byte(newContents), os.FileMode(0644)) + require.NoError(t, err) +} + +func updateCoreDNS(t *testing.T, ctx environment.TestContext, coreDNSConfigFile string) { + coreDNSCommand := []string{ + "replace", "-n", "kube-system", "-f", coreDNSConfigFile, } - dnsService, err := requestingCtx.KubernetesClient(t).CoreV1().Services(requestingCtx.KubectlOptions(t).Namespace).Get(context.Background(), dnsSvcName, metav1.GetOptions{}) + logs, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), coreDNSCommand...) + require.NoError(t, err) + require.Contains(t, logs, "configmap/coredns replaced") + restartCoreDNSCommand := []string{"rollout", "restart", "deployment/coredns", "-n", "kube-system"} + _, err = k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), restartCoreDNSCommand...) require.NoError(t, err) - dnsIP := dnsService.Spec.ClusterIP + // Wait for restart to finish. + out, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "rollout", "status", "--timeout", "1m", "--watch", "deployment/coredns", "-n", "kube-system") + require.NoError(t, err, out, "rollout status command errored, this likely means the rollout didn't complete in time") +} +func verifyDNS(t *testing.T, releaseName string, svcNamespace string, requestingCtx, svcContext environment.TestContext, + podLabelSelector, svcName string, shouldResolveDNSRecord bool, dnsUtilsPodIndex int) { podList, err := svcContext.KubernetesClient(t).CoreV1().Pods(svcNamespace).List(context.Background(), metav1.ListOptions{ LabelSelector: podLabelSelector, }) require.NoError(t, err) servicePodIPs := make([]string, len(podList.Items)) - for _, serverPod := range podList.Items { - servicePodIPs = append(servicePodIPs, serverPod.Status.PodIP) + for i, serverPod := range podList.Items { + servicePodIPs[i] = serverPod.Status.PodIP } logger.Log(t, "launch a pod to test the dns resolution.") dnsUtilsPod := fmt.Sprintf("%s-dns-utils-pod-%d", releaseName, dnsUtilsPodIndex) dnsTestPodArgs := []string{ - "run", "-it", dnsUtilsPod, "--restart", "Never", "--image", "anubhavmishra/tiny-tools", "--", "dig", fmt.Sprintf("@%s", dnsSvcName), svcName, + "run", "-it", dnsUtilsPod, "--restart", "Never", "--image", "anubhavmishra/tiny-tools", "--", "dig", svcName, } helpers.Cleanup(t, suite.Config().NoCleanupOnFailure, suite.Config().NoCleanup, func() { @@ -117,24 +150,33 @@ func verifyDNS(t *testing.T, releaseName string, enableDNSProxy bool, svcNamespa // We assert on the existence of the ANSWER SECTION, The consul-server IPs being present in the ANSWER SECTION and the the DNS IP mentioned in the SERVER: field logger.Log(t, "verify the DNS results.") - require.Contains(r, logs, fmt.Sprintf("SERVER: %s", dnsIP)) // strip logs of tabs, newlines and spaces to make it easier to assert on the content when there is a DNS match strippedLogs := strings.Replace(logs, "\t", "", -1) strippedLogs = strings.Replace(strippedLogs, "\n", "", -1) strippedLogs = strings.Replace(strippedLogs, " ", "", -1) for _, ip := range servicePodIPs { - if ip != "" { - aRecordPattern := "%s.0INA%s" - if shouldResolveDNSRecord { - require.Contains(r, logs, "ANSWER SECTION:") - require.Contains(r, strippedLogs, fmt.Sprintf(aRecordPattern, svcName, ip)) - } else { - require.NotContains(r, logs, "ANSWER SECTION:") - require.NotContains(r, strippedLogs, fmt.Sprintf(aRecordPattern, svcName, ip)) - require.Contains(r, logs, "status: NXDOMAIN") - require.Contains(r, logs, "AUTHORITY SECTION:\nconsul.\t\t\t0\tIN\tSOA\tns.consul. hostmaster.consul.") - } + aRecordPattern := "%s.5INA%s" + aRecord := fmt.Sprintf(aRecordPattern, svcName, ip) + if shouldResolveDNSRecord { + require.Contains(r, logs, "ANSWER SECTION:") + require.Contains(r, strippedLogs, aRecord) + } else { + require.NotContains(r, logs, "ANSWER SECTION:") + require.NotContains(r, strippedLogs, aRecord) + require.Contains(r, logs, "status: NXDOMAIN") + require.Contains(r, logs, "AUTHORITY SECTION:\nconsul.\t\t\t5\tIN\tSOA\tns.consul. hostmaster.consul.") } } }) } + +func getDNSServiceClusterIP(t *testing.T, requestingCtx environment.TestContext, releaseName string, enableDNSProxy bool) (string, error) { + logger.Log(t, "get the in cluster dns service or proxy.") + dnsSvcName := fmt.Sprintf("%s-consul-dns", releaseName) + if enableDNSProxy { + dnsSvcName += "-proxy" + } + dnsService, err := requestingCtx.KubernetesClient(t).CoreV1().Services(requestingCtx.KubectlOptions(t).Namespace).Get(context.Background(), dnsSvcName, metav1.GetOptions{}) + require.NoError(t, err) + return dnsService.Spec.ClusterIP, err +} diff --git a/acceptance/tests/consul-dns/coredns-original.yaml b/acceptance/tests/consul-dns/coredns-original.yaml new file mode 100644 index 0000000000..ba59c03471 --- /dev/null +++ b/acceptance/tests/consul-dns/coredns-original.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +data: + Corefile: | + .:53 { + errors + health { + lameduck 5s + } + ready + kubernetes cluster.local in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + ttl 30 + } + prometheus :9153 + forward . /etc/resolv.conf { + max_concurrent 1000 + } + cache 30 + loop + reload + loadbalance + } +kind: ConfigMap +metadata: + name: coredns + namespace: kube-system \ No newline at end of file diff --git a/acceptance/tests/consul-dns/coredns-template.yaml b/acceptance/tests/consul-dns/coredns-template.yaml new file mode 100644 index 0000000000..1a00674704 --- /dev/null +++ b/acceptance/tests/consul-dns/coredns-template.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +data: + Corefile: | + .:53 { + errors + health { + lameduck 5s + } + ready + kubernetes cluster.local in-addr.arpa ip6.arpa { + pods insecure + fallthrough in-addr.arpa ip6.arpa + ttl 30 + } + prometheus :9153 + forward . /etc/resolv.conf { + max_concurrent 1000 + } + cache 30 + loop + reload + loadbalance + } + consul:53 { + errors + cache 30 + forward . {{CONSUL_DNS_IP}} + } +kind: ConfigMap +metadata: + name: coredns + namespace: kube-system \ No newline at end of file diff --git a/charts/consul/templates/dns-proxy-deployment.yaml b/charts/consul/templates/dns-proxy-deployment.yaml index 6af6fdfa28..c4cad4332e 100644 --- a/charts/consul/templates/dns-proxy-deployment.yaml +++ b/charts/consul/templates/dns-proxy-deployment.yaml @@ -188,11 +188,9 @@ spec: timeoutSeconds: 5 ports: - containerPort: {{ .Values.dns.proxy.port }} - hostPort: {{ .Values.dns.proxy.port }} protocol: "TCP" name: dns-tcp - containerPort: {{ .Values.dns.proxy.port }} - hostPort: {{ .Values.dns.proxy.port }} protocol: "UDP" name: dns-udp {{- end }} \ No newline at end of file