From b56829652ca9b9f40316bc7b16a420a9d0a8075c Mon Sep 17 00:00:00 2001 From: Nell Jerram Date: Thu, 22 Jun 2023 12:40:43 +0100 Subject: [PATCH 1/3] Set DNS trusted servers using FelixConfig instead of FELIX_ env var For Rancher and OpenShift the operator needs to set a non-default value for DNS trusted servers, and it was previously doing that by setting a FELIX_ env var in the node daemonset. But that makes it impossible for any further tweaks to that using FelixConfiguration - because FelixConfiguration settings are always overridden by FELIX_ env vars. To allow later tweaks, change the operator to create or patch FelixConfiguration instead. References: https://tigera.atlassian.net/browse/RS-897 https://tigera.atlassian.net/browse/EV-3193 --- .../crd.projectcalico.org/v1/felixconfig.go | 8 ++ .../applicationlayer_controller.go | 83 ++++++++----------- .../egressgateway/egressgateway_controller.go | 28 ++----- .../installation/core_controller.go | 58 +++++++------ .../installation/core_controller_test.go | 14 ++++ pkg/controller/utils/felix_configuration.go | 54 ++++++++++++ pkg/render/node.go | 11 +-- pkg/render/node_test.go | 2 - 8 files changed, 151 insertions(+), 107 deletions(-) create mode 100644 pkg/controller/utils/felix_configuration.go diff --git a/pkg/apis/crd.projectcalico.org/v1/felixconfig.go b/pkg/apis/crd.projectcalico.org/v1/felixconfig.go index 25a6797443..1bb04d0993 100644 --- a/pkg/apis/crd.projectcalico.org/v1/felixconfig.go +++ b/pkg/apis/crd.projectcalico.org/v1/felixconfig.go @@ -379,6 +379,14 @@ type FelixConfigurationSpec struct { EgressIPVXLANPort *int `json:"egressIPVXLANPort,omitempty"` // EgressIPVXLANVNI is the VNI ID of vxlan tunnel device for egress traffic. [Default: 4097] EgressIPVXLANVNI *int `json:"egressIPVXLANVNI,omitempty"` + + // The DNS servers that Felix should trust. Each entry here must be `[:]` - indicating an + // explicit DNS server IP - or `k8s-service:[/][:port]` - indicating a Kubernetes DNS + // service. `` defaults to the first service port, or 53 for an IP, and `` to + // `kube-system`. An IPv6 address with a port must use the square brackets convention, for example + // `[fd00:83a6::12]:5353`.Note that Felix (calico-node) will need RBAC permission to read the details of + // each service specified by a `k8s-service:...` form. [Default: "k8s-service:kube-dns"]. + DNSTrustedServers *[]string `json:"dnsTrustedServers,omitempty"` } type RouteTableRange struct { diff --git a/pkg/controller/applicationlayer/applicationlayer_controller.go b/pkg/controller/applicationlayer/applicationlayer_controller.go index 88cd3f208c..e051e689a4 100644 --- a/pkg/controller/applicationlayer/applicationlayer_controller.go +++ b/pkg/controller/applicationlayer/applicationlayer_controller.go @@ -521,58 +521,47 @@ func (r *ReconcileApplicationLayer) getTProxyMode(al *operatorv1.ApplicationLaye // patchFelixConfiguration takes all application layer specs as arguments and patches felix config. // If at least one of the specs requires TPROXYMode as "Enabled" it'll be patched as "Enabled" otherwise it is "Disabled". func (r *ReconcileApplicationLayer) patchFelixConfiguration(ctx context.Context, al *operatorv1.ApplicationLayer) error { - // Fetch any existing default FelixConfiguration object. - fc := &crdv1.FelixConfiguration{} - err := r.client.Get(ctx, types.NamespacedName{Name: "default"}, fc) - if err != nil && !apierrors.IsNotFound(err) { - r.status.SetDegraded(operatorv1.ResourceReadError, "Unable to read FelixConfiguration", err, log) - return err - } + _, err := utils.PatchFelixConfiguration(ctx, r.client, func(fc *crdv1.FelixConfiguration) bool { + var tproxyMode crdv1.TPROXYModeOption + if ok, v := r.getTProxyMode(al); ok { + tproxyMode = v + } else { + if fc.Spec.TPROXYMode == nil { + // Workaround: we'd like to always force the value to be the correct one, matching the operator's + // configuration. However, during an upgrade from a version that predates the TPROXYMode option, + // Felix hits a bug and gets confused by the new config parameter, which in turn triggers a restart. + // Work around that by relying on Disabled being the default value for the field instead. + // + // The felix bug was fixed in v3.16, v3.15.1 and v3.14.4; it should be safe to set new config fields + // once we know we're only upgrading from those versions and above. + return false + } - patchFrom := client.MergeFrom(fc.DeepCopy()) - - var tproxyMode crdv1.TPROXYModeOption - if ok, v := r.getTProxyMode(al); ok { - tproxyMode = v - } else { - if fc.Spec.TPROXYMode == nil { - // Workaround: we'd like to always force the value to be the correct one, matching the operator's - // configuration. However, during an upgrade from a version that predates the TPROXYMode option, - // Felix hits a bug and gets confused by the new config parameter, which in turn triggers a restart. - // Work around that by relying on Disabled being the default value for the field instead. - // - // The felix bug was fixed in v3.16, v3.15.1 and v3.14.4; it should be safe to set new config fields - // once we know we're only upgrading from those versions and above. - return nil + // If the mode is already set, fall through to the normal logic, it's safe to force-set the field now. + // This also avoids churning the config if a previous version of the operator set it to Disabled already, + // we avoid setting it back to nil. + tproxyMode = crdv1.TPROXYModeOptionDisabled } - // If the mode is already set, fall through to the normal logic, it's safe to force-set the field now. - // This also avoids churning the config if a previous version of the operator set it to Disabled already, - // we avoid setting it back to nil. - tproxyMode = crdv1.TPROXYModeOptionDisabled - } - - policySyncPrefix := r.getPolicySyncPathPrefix(&fc.Spec, al) - policySyncPrefixSetDesired := fc.Spec.PolicySyncPathPrefix == policySyncPrefix - tproxyModeSetDesired := fc.Spec.TPROXYMode != nil && *fc.Spec.TPROXYMode == tproxyMode + policySyncPrefix := r.getPolicySyncPathPrefix(&fc.Spec, al) + policySyncPrefixSetDesired := fc.Spec.PolicySyncPathPrefix == policySyncPrefix + tproxyModeSetDesired := fc.Spec.TPROXYMode != nil && *fc.Spec.TPROXYMode == tproxyMode - // If tproxy mode is already set to desired state return nil. - if policySyncPrefixSetDesired && tproxyModeSetDesired { - return nil - } - - fc.Spec.TPROXYMode = &tproxyMode - fc.Spec.PolicySyncPathPrefix = policySyncPrefix + // If tproxy mode is already set to desired state return false to indicate patch not needed. + if policySyncPrefixSetDesired && tproxyModeSetDesired { + return false + } - log.Info( - "Patching FelixConfiguration: ", - "policySyncPathPrefix", fc.Spec.PolicySyncPathPrefix, - "tproxyMode", string(tproxyMode), - ) + fc.Spec.TPROXYMode = &tproxyMode + fc.Spec.PolicySyncPathPrefix = policySyncPrefix - if err := r.client.Patch(ctx, fc, patchFrom); err != nil { - return err - } + log.Info( + "Patching FelixConfiguration: ", + "policySyncPathPrefix", fc.Spec.PolicySyncPathPrefix, + "tproxyMode", string(tproxyMode), + ) + return true + }) - return nil + return err } diff --git a/pkg/controller/egressgateway/egressgateway_controller.go b/pkg/controller/egressgateway/egressgateway_controller.go index a1fff5a0c4..205fe86db0 100644 --- a/pkg/controller/egressgateway/egressgateway_controller.go +++ b/pkg/controller/egressgateway/egressgateway_controller.go @@ -309,7 +309,13 @@ func (r *ReconcileEgressGateway) Reconcile(ctx context.Context, request reconcil } // patch and get the felix configuration - fc, err := r.patchFelixConfig(ctx) + fc, err := utils.PatchFelixConfiguration(ctx, r.client, func(fc *crdv1.FelixConfiguration) bool { + if fc.Spec.PolicySyncPathPrefix != "" { + return false // don't proceed with the patch + } + fc.Spec.PolicySyncPathPrefix = "/var/run/nodeagent" + return true // proceed with this patch + }) if err != nil { reqLogger.Error(err, "Error patching felix configuration") r.status.SetDegraded(operatorv1.ResourcePatchError, "Error patching felix configuration", err, reqLogger) @@ -430,26 +436,6 @@ func (r *ReconcileEgressGateway) reconcileEgressGateway(ctx context.Context, egw return nil } -func (r *ReconcileEgressGateway) patchFelixConfig(ctx context.Context) (*crdv1.FelixConfiguration, error) { - // Fetch any existing default FelixConfiguration object. - fc := &crdv1.FelixConfiguration{} - err := r.client.Get(ctx, types.NamespacedName{Name: "default"}, fc) - if err != nil && !errors.IsNotFound(err) { - r.status.SetDegraded(operatorv1.ResourceReadError, "Unable to read FelixConfiguration", err, log) - return nil, err - } - - patchFrom := client.MergeFrom(fc.DeepCopy()) - if fc.Spec.PolicySyncPathPrefix != "" { - return fc, nil - } - fc.Spec.PolicySyncPathPrefix = "/var/run/nodeagent" - if err := r.client.Patch(ctx, fc, patchFrom); err != nil { - return nil, err - } - return fc, nil -} - // getRequestedEgressGateway returns the namespaced EgressGateway instance. func getRequestedEgressGateway(egws []operatorv1.EgressGateway, request reconcile.Request) (*operatorv1.EgressGateway, int) { for index, egw := range egws { diff --git a/pkg/controller/installation/core_controller.go b/pkg/controller/installation/core_controller.go index b5c18f2353..e9a8a9c7a5 100644 --- a/pkg/controller/installation/core_controller.go +++ b/pkg/controller/installation/core_controller.go @@ -1133,15 +1133,11 @@ func (r *ReconcileInstallation) Reconcile(ctx context.Context, request reconcile } } - // Fetch any existing default FelixConfiguration object. - felixConfiguration := &crdv1.FelixConfiguration{} - err = r.client.Get(ctx, types.NamespacedName{Name: "default"}, felixConfiguration) - if err != nil && !apierrors.IsNotFound(err) { - r.status.SetDegraded(operator.ResourceReadError, "Unable to read FelixConfiguration", err, reqLogger) - return reconcile.Result{}, err - } - - if err = r.setDefaultsOnFelixConfiguration(ctx, instance, felixConfiguration, reqLogger); err != nil { + // Set any non-default FelixConfiguration values that we need. + felixConfiguration, err := utils.PatchFelixConfiguration(ctx, r.client, func(fc *crdv1.FelixConfiguration) bool { + return r.setDefaultsOnFelixConfiguration(instance, fc) + }) + if err != nil { return reconcile.Result{}, err } @@ -1618,11 +1614,8 @@ func GetOrCreateTyphaNodeTLSConfig(cli client.Client, certificateManager certifi } // setDefaultOnFelixConfiguration will take the passed in fc and add any defaulting needed -// based on the install config. If the FelixConfig ResourceVersion is empty, -// then the FelixConfig default will be created, otherwise a patch will be performed. -func (r *ReconcileInstallation) setDefaultsOnFelixConfiguration(ctx context.Context, install *operator.Installation, fc *crdv1.FelixConfiguration, log logr.Logger) error { - patchFrom := client.MergeFrom(fc.DeepCopy()) - fc.ObjectMeta.Name = "default" +// based on the install config. +func (r *ReconcileInstallation) setDefaultsOnFelixConfiguration(install *operator.Installation, fc *crdv1.FelixConfiguration) bool { updated := false switch install.Spec.CNI.Type { @@ -1665,21 +1658,32 @@ func (r *ReconcileInstallation) setDefaultsOnFelixConfiguration(ctx context.Cont updated = true } - if !updated { - return nil - } - if fc.ResourceVersion == "" { - if err := r.client.Create(ctx, fc); err != nil { - r.status.SetDegraded(operator.ResourceCreateError, "Unable to Create default FelixConfiguration", err, log) - return err - } - } else { - if err := r.client.Patch(ctx, fc, patchFrom); err != nil { - r.status.SetDegraded(operator.ResourcePatchError, "Unable to Patch default FelixConfiguration", err, log) - return err + if install.Spec.Variant == operator.TigeraSecureEnterprise { + // Some platforms need a different default setting for dnsTrustedServers, because their DNS service is not named "kube-dns". + dnsService := "" + switch install.Spec.KubernetesProvider { + case operator.ProviderOpenShift: + dnsService = "openshift-dns/dns-default" + case operator.ProviderRKE2: + dnsService = "kube-system/rke2-coredns-rke2-coredns" + } + if dnsService != "" { + felixDefault := "k8s-service:kube-dns" + trustedServers := []string{"k8s-service:" + dnsService} + // Keep any other values that are already configured, excepting the kube-dns + // default. + if fc.Spec.DNSTrustedServers != nil { + for _, server := range *(fc.Spec.DNSTrustedServers) { + if server != felixDefault { + trustedServers = append(trustedServers, server) + } + } + } + fc.Spec.DNSTrustedServers = &trustedServers + updated = true } } - return nil + return updated } var osExitOverride = os.Exit diff --git a/pkg/controller/installation/core_controller_test.go b/pkg/controller/installation/core_controller_test.go index 127f62a63e..9f77a3540b 100644 --- a/pkg/controller/installation/core_controller_test.go +++ b/pkg/controller/installation/core_controller_test.go @@ -1073,6 +1073,20 @@ var _ = Describe("Testing core-controller installation", func() { Expect(fc.Spec.RouteTableRange).To(BeNil()) }) + It("generates FelixConfiguration with correct DNS service for Rancher", func() { + cr.Spec.KubernetesProvider = operator.ProviderRKE2 + Expect(c.Create(ctx, cr)).NotTo(HaveOccurred()) + _, err := r.Reconcile(ctx, reconcile.Request{}) + Expect(err).ShouldNot(HaveOccurred()) + + // We should get a felix configuration with Rancher's DNS service. + fc := &crdv1.FelixConfiguration{} + err = c.Get(ctx, types.NamespacedName{Name: "default"}, fc) + Expect(err).ShouldNot(HaveOccurred()) + Expect(fc.Spec.DNSTrustedServers).NotTo(BeNil()) + Expect(*fc.Spec.DNSTrustedServers).To(ConsistOf("k8s-service:kube-system/rke2-coredns-rke2-coredns")) + }) + It("should Reconcile with AWS CNI config", func() { cr.Spec.CNI = &operator.CNISpec{Type: operator.PluginAmazonVPC} Expect(c.Create(ctx, cr)).NotTo(HaveOccurred()) diff --git a/pkg/controller/utils/felix_configuration.go b/pkg/controller/utils/felix_configuration.go new file mode 100644 index 0000000000..ff05d1fcfe --- /dev/null +++ b/pkg/controller/utils/felix_configuration.go @@ -0,0 +1,54 @@ +// Copyright (c) 2023 Tigera, Inc. All rights reserved. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "context" + "fmt" + + crdv1 "github.com/tigera/operator/pkg/apis/crd.projectcalico.org/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func PatchFelixConfiguration(ctx context.Context, c client.Client, patchFn func(fc *crdv1.FelixConfiguration) bool) (*crdv1.FelixConfiguration, error) { + // Fetch any existing default FelixConfiguration object. + fc := &crdv1.FelixConfiguration{} + err := c.Get(ctx, types.NamespacedName{Name: "default"}, fc) + if err != nil && !errors.IsNotFound(err) { + return nil, fmt.Errorf("unable to read FelixConfiguration: %w", err) + } + + // Create a base state for the upcoming patch operation. + patchFrom := client.MergeFrom(fc.DeepCopy()) + + // Apply desired changes to the FelixConfiguration. + if patchFn(fc) { + // Apply the patch. + if fc.ResourceVersion == "" { + fc.ObjectMeta.Name = "default" + if err := c.Create(ctx, fc); err != nil { + return nil, err + } + } else { + if err := c.Patch(ctx, fc, patchFrom); err != nil { + return nil, err + } + } + } + + return fc, nil +} diff --git a/pkg/render/node.go b/pkg/render/node.go index 49ac708102..2195a19c73 100644 --- a/pkg/render/node.go +++ b/pkg/render/node.go @@ -727,7 +727,7 @@ func (c *nodeComponent) nodeCNIConfigMap() *corev1.ConfigMap { config := fmt.Sprintf(`{ "name": "k8s-pod-network", "cniVersion": "0.3.1", - "plugins": %s + "plugins": %s }`, string(pluginsArray)) return &corev1.ConfigMap{ @@ -1650,15 +1650,6 @@ func (c *nodeComponent) nodeEnvVars() []corev1.EnvVar { case operatorv1.ProviderOpenShift: // For Openshift, we need special configuration since our default port is already in use. nodeEnv = append(nodeEnv, corev1.EnvVar{Name: "FELIX_HEALTHPORT", Value: "9199"}) - if c.cfg.Installation.Variant == operatorv1.TigeraSecureEnterprise { - // We also need to configure a non-default trusted DNS server, since there's no kube-dns. - nodeEnv = append(nodeEnv, corev1.EnvVar{Name: "FELIX_DNSTRUSTEDSERVERS", Value: "k8s-service:openshift-dns/dns-default"}) - } - case operatorv1.ProviderRKE2: - // For RKE2, configure a non-default trusted DNS server, as the DNS service is not named "kube-dns". - if c.cfg.Installation.Variant == operatorv1.TigeraSecureEnterprise { - nodeEnv = append(nodeEnv, corev1.EnvVar{Name: "FELIX_DNSTRUSTEDSERVERS", Value: "k8s-service:kube-system/rke2-coredns-rke2-coredns"}) - } // For AKS/AzureVNET and EKS/VPCCNI, we must explicitly ask felix to add host IP's to wireguard ifaces case operatorv1.ProviderAKS: if c.cfg.Installation.CNI.Type == operatorv1.PluginAzureVNET { diff --git a/pkg/render/node_test.go b/pkg/render/node_test.go index 1461bab222..9546aad00b 100644 --- a/pkg/render/node_test.go +++ b/pkg/render/node_test.go @@ -1951,7 +1951,6 @@ var _ = Describe("Node rendering tests", func() { // The OpenShift envvar overrides. {Name: "FELIX_HEALTHPORT", Value: "9199"}, {Name: "MULTI_INTERFACE_MODE", Value: operatorv1.MultiInterfaceModeNone.Value()}, - {Name: "FELIX_DNSTRUSTEDSERVERS", Value: "k8s-service:openshift-dns/dns-default"}, {Name: "FIPS_MODE_ENABLED", Value: "false"}, } expectedNodeEnv = configureExpectedNodeEnvIPVersions(expectedNodeEnv, defaultInstance, enableIPv4, enableIPv6) @@ -2046,7 +2045,6 @@ var _ = Describe("Node rendering tests", func() { // The RKE2 envvar overrides. {Name: "MULTI_INTERFACE_MODE", Value: operatorv1.MultiInterfaceModeNone.Value()}, - {Name: "FELIX_DNSTRUSTEDSERVERS", Value: "k8s-service:kube-system/rke2-coredns-rke2-coredns"}, {Name: "FIPS_MODE_ENABLED", Value: "false"}, } expectedNodeEnv = configureExpectedNodeEnvIPVersions(expectedNodeEnv, defaultInstance, enableIPv4, enableIPv6) From 2a1a245e44bb3b20f5de12d42978e34701d3e0a7 Mon Sep 17 00:00:00 2001 From: Nell Jerram Date: Thu, 22 Jun 2023 17:43:46 +0100 Subject: [PATCH 2/3] make generate --- .../crd.projectcalico.org/v1/zz_generated.deepcopy.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/apis/crd.projectcalico.org/v1/zz_generated.deepcopy.go b/pkg/apis/crd.projectcalico.org/v1/zz_generated.deepcopy.go index c0e9be7ab7..335ac34d4c 100644 --- a/pkg/apis/crd.projectcalico.org/v1/zz_generated.deepcopy.go +++ b/pkg/apis/crd.projectcalico.org/v1/zz_generated.deepcopy.go @@ -689,6 +689,15 @@ func (in *FelixConfigurationSpec) DeepCopyInto(out *FelixConfigurationSpec) { *out = new(int) **out = **in } + if in.DNSTrustedServers != nil { + in, out := &in.DNSTrustedServers, &out.DNSTrustedServers + *out = new([]string) + if **in != nil { + in, out := *in, *out + *out = make([]string, len(*in)) + copy(*out, *in) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FelixConfigurationSpec. From 46c38ea3bf4640924969833533f904021c93edc0 Mon Sep 17 00:00:00 2001 From: Nell Jerram Date: Mon, 26 Jun 2023 13:37:22 +0100 Subject: [PATCH 3/3] Don't keep adding same DNS server, and don't change FelixConfig when not needed --- .../installation/core_controller.go | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pkg/controller/installation/core_controller.go b/pkg/controller/installation/core_controller.go index e9a8a9c7a5..4eca9ee9e0 100644 --- a/pkg/controller/installation/core_controller.go +++ b/pkg/controller/installation/core_controller.go @@ -1663,24 +1663,29 @@ func (r *ReconcileInstallation) setDefaultsOnFelixConfiguration(install *operato dnsService := "" switch install.Spec.KubernetesProvider { case operator.ProviderOpenShift: - dnsService = "openshift-dns/dns-default" + dnsService = "k8s-service:openshift-dns/dns-default" case operator.ProviderRKE2: - dnsService = "kube-system/rke2-coredns-rke2-coredns" + dnsService = "k8s-service:kube-system/rke2-coredns-rke2-coredns" } if dnsService != "" { felixDefault := "k8s-service:kube-dns" - trustedServers := []string{"k8s-service:" + dnsService} - // Keep any other values that are already configured, excepting the kube-dns - // default. + trustedServers := []string{dnsService} + // Keep any other values that are already configured, excepting the value + // that we are setting and the kube-dns default. + existingSetting := "" if fc.Spec.DNSTrustedServers != nil { + existingSetting = strings.Join(*(fc.Spec.DNSTrustedServers), ",") for _, server := range *(fc.Spec.DNSTrustedServers) { - if server != felixDefault { + if server != felixDefault && server != dnsService { trustedServers = append(trustedServers, server) } } } - fc.Spec.DNSTrustedServers = &trustedServers - updated = true + newSetting := strings.Join(trustedServers, ",") + if newSetting != existingSetting { + fc.Spec.DNSTrustedServers = &trustedServers + updated = true + } } } return updated