Skip to content

Commit

Permalink
Fix subctl diagnose hostname mismatch issue
Browse files Browse the repository at this point in the history
Submariner Endpoint stores the hostname info as part of the endpoint object.
In most of the K8s clusters, the hostname matches with the nodeName, but on
some clusters, it was seen that nodeName does not match. This PR fixes this
issue.

Also, when more than a single node is labelled as Gateway node, the current
code was not handling it properly, this PR fixes it.

Fixes issue: #1471
Signed-Off-by: Sridhar Gaddam <[email protected]>
(cherry picked from commit 9d125f0)
  • Loading branch information
sridhargaddam authored and dfarrell07 committed Jul 27, 2021
1 parent 0040ebf commit 80e1a96
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 17 deletions.
48 changes: 39 additions & 9 deletions pkg/subctl/cmd/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@ import (
"bytes"
"fmt"
"io/ioutil"
"strings"

k8sV1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
v1opts "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
Expand Down Expand Up @@ -102,7 +100,7 @@ func getSubmarinerResource(config *rest.Config) *v1alpha1.Submariner {
return submariner
}

func getEndpointResource(config *rest.Config, clusterID string) *submarinerv1.Endpoint {
func getLocalEndpointResource(config *rest.Config, clusterID string) *submarinerv1.Endpoint {
submarinerClient, err := subClientsetv1.NewForConfig(config)
exitOnError("Unable to get the Submariner client", err)

Expand All @@ -127,18 +125,50 @@ func getActiveGatewayNodeName(clientSet *kubernetes.Clientset, hostname string)
}

for _, node := range nodes.Items {
for _, addr := range node.Status.Addresses {
if addr.Type == k8sV1.NodeHostName {
if strings.HasPrefix(addr.Address, hostname) {
return node.Name
}
}
if node.Name == hostname {
return hostname
}

// On some platforms, the nodeName does not match with the hostname.
// Submariner Endpoint stores the hostname info in the endpoint and not the nodeName. So, we spawn a
// tiny pod to read the hostname and return the corresponding node.
sPod, err := spawnSnifferPodOnNode(clientSet, node.Name, "default", "hostname")
if err != nil {
return ""
}

defer sPod.DeletePod()

if err = sPod.AwaitPodCompletion(); err != nil {
return ""
}

if sPod.PodOutput[:len(sPod.PodOutput)-1] == hostname {
return node.Name
}
}

return ""
}

func getAnyRemoteEndpointResource(config *rest.Config, clusterID string) *submarinerv1.Endpoint {
submarinerClient, err := subClientsetv1.NewForConfig(config)
exitOnError("Unable to get the Submariner client", err)

endpoints, err := submarinerClient.SubmarinerV1().Endpoints(OperatorNamespace).List(v1opts.ListOptions{})
if err != nil {
return nil
}

for _, endpoint := range endpoints.Items {
if endpoint.Spec.ClusterID != clusterID {
return &endpoint
}
}

return nil
}

func getGatewaysResource(config *rest.Config) *submarinerv1.GatewayList {
submarinerClient, err := subClientsetv1.NewForConfig(config)
exitOnError("Unable to get the Submariner client", err)
Expand Down
18 changes: 11 additions & 7 deletions pkg/subctl/cmd/validate_fw_vxlan.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,27 +91,31 @@ func validateFWConfigWithinCluster(config *rest.Config, submariner *v1alpha1.Sub
return false
}

gateways := getGatewaysResource(config)
if gateways == nil || len(gateways.Items) == 0 {
status.QueueWarningMessage("There are no gateways detected on the cluster.")
localEndpoint := getLocalEndpointResource(config, submariner.Spec.ClusterID)
if localEndpoint == nil {
return false
}

if len(gateways.Items[0].Status.Connections) == 0 {
status.QueueWarningMessage("There are no active connections to remote clusters.")
remoteEndpoint := getAnyRemoteEndpointResource(config, submariner.Spec.ClusterID)
if remoteEndpoint == nil {
return false
}

gwNodeName := getActiveGatewayNodeName(clientSet, localEndpoint.Spec.Hostname)
if gwNodeName == "" {
return false
}

podCommand := fmt.Sprintf("timeout %d %s", validationTimeout, TCPSniffVxLANCommand)
sPod, err := spawnSnifferPodOnGatewayNode(clientSet, namespace, podCommand)
sPod, err := spawnSnifferPodOnNode(clientSet, gwNodeName, namespace, podCommand)
if err != nil {
message := fmt.Sprintf("Error while spawning the sniffer pod on the GatewayNode: %v", err)
status.QueueFailureMessage(message)
return false
}

defer sPod.DeletePod()
remoteClusterIP := strings.Split(gateways.Items[0].Status.Connections[0].Endpoint.Subnets[0], "/")[0]
remoteClusterIP := strings.Split(remoteEndpoint.Spec.Subnets[0], "/")[0]
podCommand = fmt.Sprintf("nc -w %d %s 8080", validationTimeout/2, remoteClusterIP)
cPod, err := spawnClientPodOnNonGatewayNode(clientSet, namespace, podCommand)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/subctl/cmd/validate_tunnel.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func validateTunnelConfigAcrossClusters(localCfg, remoteCfg *rest.Config) bool {
status.Start(fmt.Sprintf("Checking if tunnels can be setup on Gateway node of cluster %q.",
submariner.Spec.ClusterID))

localEndpoint := getEndpointResource(localCfg, submariner.Spec.ClusterID)
localEndpoint := getLocalEndpointResource(localCfg, submariner.Spec.ClusterID)
if localEndpoint == nil {
status.QueueWarningMessage("Could not find the local cluster Endpoint")
return false
Expand Down

0 comments on commit 80e1a96

Please sign in to comment.