From 36438973dd45620e49a1dc9f498322c097925875 Mon Sep 17 00:00:00 2001
From: Yuvaraj Kakaraparthi <kyuvarajbr@gmail.com>
Date: Tue, 7 Jan 2025 14:26:01 -0800
Subject: [PATCH] sync machine labels

---
 controllers/alias.go                          |  4 ++
 .../src/reference/api/metadata-propagation.md | 12 ++--
 ...27-label-sync-between-machine-and-nodes.md | 14 ++--
 .../controllers/machine/machine_controller.go |  3 +
 .../machine/machine_controller_noderef.go     | 14 +++-
 .../machine_controller_noderef_test.go        | 70 ++++++++++++++++---
 internal/controllers/machine/suite_test.go    |  1 +
 main.go                                       | 22 ++++++
 8 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/controllers/alias.go b/controllers/alias.go
index 5789dd07c7b4..99944390b524 100644
--- a/controllers/alias.go
+++ b/controllers/alias.go
@@ -18,6 +18,7 @@ package controllers
 
 import (
 	"context"
+	"regexp"
 	"time"
 
 	ctrl "sigs.k8s.io/controller-runtime"
@@ -72,6 +73,8 @@ type MachineReconciler struct {
 	WatchFilterValue string
 
 	RemoteConditionsGracePeriod time.Duration
+
+	SyncMachineLabels []*regexp.Regexp
 }
 
 func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
@@ -81,6 +84,7 @@ func (r *MachineReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manag
 		ClusterCache:                r.ClusterCache,
 		WatchFilterValue:            r.WatchFilterValue,
 		RemoteConditionsGracePeriod: r.RemoteConditionsGracePeriod,
+		SyncMachineLabels:           r.SyncMachineLabels,
 	}).SetupWithManager(ctx, mgr, options)
 }
 
diff --git a/docs/book/src/reference/api/metadata-propagation.md b/docs/book/src/reference/api/metadata-propagation.md
index ca20c25dcee2..9c7966ef747a 100644
--- a/docs/book/src/reference/api/metadata-propagation.md
+++ b/docs/book/src/reference/api/metadata-propagation.md
@@ -63,7 +63,11 @@ Top-level labels that meet a specific cretria are propagated to the Node labels
 - `.labels.[label-meets-criteria]` => `Node.labels`
 - `.annotations` => Not propagated.
 
-Label should meet one of the following criteria to propagate to Node: 
-- Has `node-role.kubernetes.io` as prefix.
-- Belongs to `node-restriction.kubernetes.io` domain.
-- Belongs to `node.cluster.x-k8s.io` domain.  
+The following default set of labels on the Machines are always synced to the Nodes:
+- `node-role.kubernetes.io`
+- `node-restriction.kubernetes.io`
+- `*.node-restriction.kubernetes.io`
+- `node.cluster.x-k8s.io`
+- `*.node.cluster.x-k8s.io`
+
+In addition, any labels that match at least one of the regexes provided by the `--additional-sync-machine-labels` flag on the manager will be synced from the Machine to the Node.
diff --git a/docs/proposals/20220927-label-sync-between-machine-and-nodes.md b/docs/proposals/20220927-label-sync-between-machine-and-nodes.md
index cbdbe08e5329..66ffde1d418e 100644
--- a/docs/proposals/20220927-label-sync-between-machine-and-nodes.md
+++ b/docs/proposals/20220927-label-sync-between-machine-and-nodes.md
@@ -66,8 +66,9 @@ With the "divide and conquer" principle in mind this proposal aims to address th
 
 ### Goals
 
-- Support label sync from Machine to the linked Kubernetes node, limited to `node-role.kubernetes.io/` prefix and the `node-restriction.kubernetes.io` domain.
+- Support label sync from Machine to the linked Kubernetes node.
 - Support syncing labels from Machine to the linked Kubernetes node for the Cluster API owned `node.cluster.x-k8s.io` domain.
+- Support to configure the domains of labels to be synced from the Machine to the Node.
 
 ### Non-Goals
 
@@ -98,14 +99,16 @@ While designing a solution for syncing labels between Machine and underlying Kub
 
 ### Label domains & prefixes
 
-The idea of scoping synchronization to a well defined set of labels is a first answer to security/concurrency concerns; labels to be managed by Cluster API have been selected based on following criteria:
+A default list of labels would always be synced from the Machines to the Nodes. An additional list of labels can be synced from the Machine to the Node by providing a list of regexes as a flag to the manager. 
 
-- The `node-role.kubernetes.io` label has been used widely in the past to identify the role of a Kubernetes Node (e.g. `node-role.kubernetes.io/worker=''`). For example, `kubectl get node` looks for this specific label when displaying the role to the user.
+The following is the default list of label domains that would always be sync from Machines to Nodes:
 
-- The `node-restriction.kubernetes.io/` domain is recommended in the [Kubernetes docs](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction) for things such as workload placement; please note that in this case
+- `node-role.kubernetes.io`: The `node-role.kubernetes.io` label has been used widely in the past to identify the role of a Kubernetes Node (e.g. `node-role.kubernetes.io/worker=''`). For example, `kubectl get node` looks for this specific label when displaying the role to the user.
+
+- `node-restriction.kubernetes.io`, `*.node-restriction.kubernetes.io`:  The `node-restriction.kubernetes.io/` domain is recommended in the [Kubernetes docs](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/#noderestriction) for things such as workload placement; please note that in this case
   we are considering the entire domain, thus also labels like `example.node-restriction.kubernetes.io/fips=true` fall in this category.
 
-- Cluster API owns a specific domain: `node.cluster.x-k8s.io`.
+- `node.cluster.x-k8s.io`, `*node.cluster.x-k8s.io`: Cluster API owns a specific domain: `node.cluster.x-k8s.io`.
 
 #### Synchronization of CAPI Labels
 
@@ -163,3 +166,4 @@ Users could also implement their own label synchronizer in their tooling, but th
 
 - [ ] 09/27/2022: First Draft of this document
 - [ ] 09/28/2022: First Draft of this document presented in the Cluster API office hours meeting
+- [ ] 01/09/2025: Update to support configurable label syncing Ref:[11657](https://github.com/kubernetes-sigs/cluster-api/issues/11657) 
diff --git a/internal/controllers/machine/machine_controller.go b/internal/controllers/machine/machine_controller.go
index a296365786d4..4cdf5562f073 100644
--- a/internal/controllers/machine/machine_controller.go
+++ b/internal/controllers/machine/machine_controller.go
@@ -19,6 +19,7 @@ package machine
 import (
 	"context"
 	"fmt"
+	"regexp"
 	"slices"
 	"strings"
 	"time"
@@ -100,6 +101,8 @@ type Reconciler struct {
 
 	RemoteConditionsGracePeriod time.Duration
 
+	SyncMachineLabels []*regexp.Regexp
+
 	controller      controller.Controller
 	recorder        record.EventRecorder
 	externalTracker external.ObjectTracker
diff --git a/internal/controllers/machine/machine_controller_noderef.go b/internal/controllers/machine/machine_controller_noderef.go
index 683468242433..dce9c31554b5 100644
--- a/internal/controllers/machine/machine_controller_noderef.go
+++ b/internal/controllers/machine/machine_controller_noderef.go
@@ -127,7 +127,7 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
 	// Compute labels to be propagated from Machines to nodes.
 	// NOTE: CAPI should manage only a subset of node labels, everything else should be preserved.
 	// NOTE: Once we reconcile node labels for the first time, the NodeUninitializedTaint is removed from the node.
-	nodeLabels := getManagedLabels(machine.Labels)
+	nodeLabels := r.getManagedLabels(machine.Labels)
 
 	// Get interruptible instance status from the infrastructure provider and set the interruptible label on the node.
 	interruptible := false
@@ -178,9 +178,10 @@ func (r *Reconciler) reconcileNode(ctx context.Context, s *scope) (ctrl.Result,
 
 // getManagedLabels gets a map[string]string and returns another map[string]string
 // filtering out labels not managed by CAPI.
-func getManagedLabels(labels map[string]string) map[string]string {
+func (r *Reconciler) getManagedLabels(labels map[string]string) map[string]string {
 	managedLabels := make(map[string]string)
 	for key, value := range labels {
+		// Always sync the default set of labels.
 		dnsSubdomainOrName := strings.Split(key, "/")[0]
 		if dnsSubdomainOrName == clusterv1.NodeRoleLabelPrefix {
 			managedLabels[key] = value
@@ -191,8 +192,15 @@ func getManagedLabels(labels map[string]string) map[string]string {
 		if dnsSubdomainOrName == clusterv1.ManagedNodeLabelDomain || strings.HasSuffix(dnsSubdomainOrName, "."+clusterv1.ManagedNodeLabelDomain) {
 			managedLabels[key] = value
 		}
-	}
 
+		// Sync if the labels matches at least one user provided regex.
+		for _, regex := range r.SyncMachineLabels {
+			if regex.MatchString(key) {
+				managedLabels[key] = value
+				break
+			}
+		}
+	}
 	return managedLabels
 }
 
diff --git a/internal/controllers/machine/machine_controller_noderef_test.go b/internal/controllers/machine/machine_controller_noderef_test.go
index ef56f947ebc6..730b36f20701 100644
--- a/internal/controllers/machine/machine_controller_noderef_test.go
+++ b/internal/controllers/machine/machine_controller_noderef_test.go
@@ -19,6 +19,7 @@ package machine
 import (
 	"context"
 	"fmt"
+	"regexp"
 	"testing"
 	"time"
 
@@ -700,8 +701,7 @@ func TestSummarizeNodeConditions(t *testing.T) {
 }
 
 func TestGetManagedLabels(t *testing.T) {
-	// Create managedLabels map from known managed prefixes.
-	managedLabels := map[string]string{
+	defaultLabels := map[string]string{
 		clusterv1.NodeRoleLabelPrefix + "/anyRole": "",
 
 		clusterv1.ManagedNodeLabelDomain:                                  "",
@@ -715,22 +715,72 @@ func TestGetManagedLabels(t *testing.T) {
 		"custom-prefix." + clusterv1.NodeRestrictionLabelDomain + "/anything": "",
 	}
 
-	// Append arbitrary labels.
-	allLabels := map[string]string{
-		"foo":                               "",
-		"bar":                               "",
+	additionalLabels := map[string]string{
+		"foo":                               "bar",
+		"bar":                               "baz",
 		"company.xyz/node.cluster.x-k8s.io": "not-managed",
 		"gpu-node.cluster.x-k8s.io":         "not-managed",
 		"company.xyz/node-restriction.kubernetes.io": "not-managed",
 		"gpu-node-restriction.kubernetes.io":         "not-managed",
+		"wrong.test.foo.com":                         "",
 	}
-	for k, v := range managedLabels {
+
+	exampleRegex := regexp.MustCompile(`foo`)
+	defaultAndRegexLabels := map[string]string{}
+	for k, v := range defaultLabels {
+		defaultAndRegexLabels[k] = v
+	}
+	defaultAndRegexLabels["foo"] = "bar"
+	defaultAndRegexLabels["wrong.test.foo.com"] = ""
+
+	allLabels := map[string]string{}
+	for k, v := range defaultLabels {
+		allLabels[k] = v
+	}
+	for k, v := range additionalLabels {
 		allLabels[k] = v
 	}
 
-	g := NewWithT(t)
-	got := getManagedLabels(allLabels)
-	g.Expect(got).To(BeEquivalentTo(managedLabels))
+	tests := []struct {
+		name              string
+		syncMachineLabels []*regexp.Regexp
+		allLabels         map[string]string
+		managedLabels     map[string]string
+	}{
+		{
+			name:              "always sync default labels",
+			syncMachineLabels: nil,
+			allLabels:         allLabels,
+			managedLabels:     defaultLabels,
+		},
+		{
+			name: "sync additional defined labels",
+			syncMachineLabels: []*regexp.Regexp{
+				exampleRegex,
+			},
+			allLabels:     allLabels,
+			managedLabels: defaultAndRegexLabels,
+		},
+		{
+			name: "sync all labels",
+			syncMachineLabels: []*regexp.Regexp{
+				regexp.MustCompile(`.*`),
+			},
+			allLabels:     allLabels,
+			managedLabels: allLabels,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			g := NewWithT(t)
+			r := &Reconciler{
+				SyncMachineLabels: tt.syncMachineLabels,
+			}
+			got := r.getManagedLabels(tt.allLabels)
+			g.Expect(got).To(BeEquivalentTo(tt.managedLabels))
+		})
+	}
 }
 
 func TestPatchNode(t *testing.T) {
diff --git a/internal/controllers/machine/suite_test.go b/internal/controllers/machine/suite_test.go
index b6e00091e0d6..4804c8d83a5f 100644
--- a/internal/controllers/machine/suite_test.go
+++ b/internal/controllers/machine/suite_test.go
@@ -94,6 +94,7 @@ func TestMain(m *testing.M) {
 			APIReader:                   mgr.GetAPIReader(),
 			ClusterCache:                clusterCache,
 			RemoteConditionsGracePeriod: 5 * time.Minute,
+			SyncMachineLabels:           nil,
 		}).SetupWithManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: 1}); err != nil {
 			panic(fmt.Sprintf("Failed to start MachineReconciler: %v", err))
 		}
diff --git a/main.go b/main.go
index 59b7e3f9ce56..40c470fec167 100644
--- a/main.go
+++ b/main.go
@@ -22,6 +22,7 @@ import (
 	"flag"
 	"fmt"
 	"os"
+	"regexp"
 	goruntime "runtime"
 	"time"
 
@@ -35,6 +36,7 @@ import (
 	"k8s.io/apimachinery/pkg/labels"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/selection"
+	kerrors "k8s.io/apimachinery/pkg/util/errors"
 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 	"k8s.io/client-go/tools/leaderelection/resourcelock"
 	cliflag "k8s.io/component-base/cli/flag"
@@ -124,6 +126,7 @@ var (
 	clusterResourceSetConcurrency   int
 	machineHealthCheckConcurrency   int
 	useDeprecatedInfraMachineNaming bool
+	syncMachineLabels               []string
 )
 
 func init() {
@@ -250,6 +253,9 @@ func InitFlags(fs *pflag.FlagSet) {
 	fs.StringVar(&healthAddr, "health-addr", ":9440",
 		"The address the health endpoint binds to.")
 
+	fs.StringArrayVar(&syncMachineLabels, "additional-sync-machine-labels", []string{},
+		"List of regexes to select the additional set of labels to sync from the Machine to the Node. A label will be synced as long as it matches at least one of the regexes.")
+
 	fs.BoolVar(&useDeprecatedInfraMachineNaming, "use-deprecated-infra-machine-naming", false,
 		"Use deprecated infrastructure machine naming")
 	_ = fs.MarkDeprecated("use-deprecated-infra-machine-naming", "This flag will be removed in v1.9.")
@@ -559,12 +565,28 @@ func setupReconcilers(ctx context.Context, mgr ctrl.Manager, watchNamespaces map
 		setupLog.Error(err, "Unable to create controller", "controller", "Cluster")
 		os.Exit(1)
 	}
+
+	var errs []error
+	var syncMachineLabelRegexes []*regexp.Regexp
+	for _, re := range syncMachineLabels {
+		reg, err := regexp.Compile(re)
+		if err != nil {
+			errs = append(errs, err)
+		} else {
+			syncMachineLabelRegexes = append(syncMachineLabelRegexes, reg)
+		}
+	}
+	if len(errs) > 0 {
+		setupLog.Error(fmt.Errorf("at least one of --additional-sync-machine-labels regexes is invalid: %w", kerrors.NewAggregate(errs)), "Unable to start manager")
+		os.Exit(1)
+	}
 	if err := (&controllers.MachineReconciler{
 		Client:                      mgr.GetClient(),
 		APIReader:                   mgr.GetAPIReader(),
 		ClusterCache:                clusterCache,
 		WatchFilterValue:            watchFilterValue,
 		RemoteConditionsGracePeriod: remoteConditionsGracePeriod,
+		SyncMachineLabels:           syncMachineLabelRegexes,
 	}).SetupWithManager(ctx, mgr, concurrency(machineConcurrency)); err != nil {
 		setupLog.Error(err, "Unable to create controller", "controller", "Machine")
 		os.Exit(1)