Skip to content

Commit

Permalink
Detect IPConflicting and gatewayReachable in ipam without coordinator
Browse files Browse the repository at this point in the history
Signed-off-by: Cyclinder Kuo <[email protected]>
  • Loading branch information
cyclinder committed Jan 15, 2025
1 parent 068a5cc commit 392a3f7
Show file tree
Hide file tree
Showing 21 changed files with 332 additions and 325 deletions.
6 changes: 0 additions & 6 deletions api/v1/agent/models/coordinator_config.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions api/v1/agent/models/ip_config.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions api/v1/agent/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ definitions:
type: string
vlan:
type: integer
enableGatewayDetection:
type: boolean
enableIPConflictDetection:
type: boolean
required:
- version
- address
Expand Down Expand Up @@ -340,10 +344,6 @@ definitions:
type: integer
txQueueLen:
type: integer
detectIPConflict:
type: boolean
detectGateway:
type: boolean
vethLinkAddress:
type: string
required:
Expand Down
24 changes: 12 additions & 12 deletions api/v1/agent/server/embedded_spec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 15 additions & 66 deletions cmd/coordinator/cmd/cni_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"path/filepath"
"regexp"
"strings"
"time"

"github.com/containernetworking/cni/pkg/types"
"github.com/containernetworking/cni/pkg/version"
Expand Down Expand Up @@ -44,24 +43,21 @@ const (

type Config struct {
types.NetConf
DetectGateway *bool `json:"detectGateway,omitempty"`
VethLinkAddress string `json:"vethLinkAddress,omitempty"`
MacPrefix string `json:"podMACPrefix,omitempty"`
MultusNicPrefix string `json:"multusNicPrefix,omitempty"`
PodDefaultCniNic string `json:"podDefaultCniNic,omitempty"`
OverlayPodCIDR []string `json:"overlayPodCIDR,omitempty"`
ServiceCIDR []string `json:"serviceCIDR,omitempty"`
HijackCIDR []string `json:"hijackCIDR,omitempty"`
TunePodRoutes *bool `json:"tunePodRoutes,omitempty"`
PodDefaultRouteNIC string `json:"podDefaultRouteNic,omitempty"`
Mode Mode `json:"mode,omitempty"`
HostRuleTable *int64 `json:"hostRuleTable,omitempty"`
HostRPFilter *int32 `json:"hostRPFilter,omitempty" `
PodRPFilter *int32 `json:"podRPFilter,omitempty" `
TxQueueLen *int64 `json:"txQueueLen,omitempty"`
IPConflict *bool `json:"detectIPConflict,omitempty"`
DetectOptions *DetectOptions `json:"detectOptions,omitempty"`
LogOptions *LogOptions `json:"logOptions,omitempty"`
VethLinkAddress string `json:"vethLinkAddress,omitempty"`
MacPrefix string `json:"podMACPrefix,omitempty"`
MultusNicPrefix string `json:"multusNicPrefix,omitempty"`
PodDefaultCniNic string `json:"podDefaultCniNic,omitempty"`
OverlayPodCIDR []string `json:"overlayPodCIDR,omitempty"`
ServiceCIDR []string `json:"serviceCIDR,omitempty"`
HijackCIDR []string `json:"hijackCIDR,omitempty"`
TunePodRoutes *bool `json:"tunePodRoutes,omitempty"`
PodDefaultRouteNIC string `json:"podDefaultRouteNic,omitempty"`
Mode Mode `json:"mode,omitempty"`
HostRuleTable *int64 `json:"hostRuleTable,omitempty"`
HostRPFilter *int32 `json:"hostRPFilter,omitempty" `
PodRPFilter *int32 `json:"podRPFilter,omitempty" `
TxQueueLen *int64 `json:"txQueueLen,omitempty"`
LogOptions *LogOptions `json:"logOptions,omitempty"`
}

// DetectOptions enable ip conflicting check for pod's ip
Expand Down Expand Up @@ -142,15 +138,6 @@ func ParseConfig(stdin []byte, coordinatorConfig *models.CoordinatorConfig) (*Co
return nil, err
}

if conf.IPConflict == nil && coordinatorConfig.DetectIPConflict {
conf.IPConflict = ptr.To(true)
}

conf.DetectOptions, err = ValidateDelectOptions(conf.DetectOptions)
if err != nil {
return nil, err
}

if conf.HostRuleTable == nil && coordinatorConfig.HostRuleTable > 0 {
conf.HostRuleTable = ptr.To(coordinatorConfig.HostRuleTable)
}
Expand All @@ -163,10 +150,6 @@ func ParseConfig(stdin []byte, coordinatorConfig *models.CoordinatorConfig) (*Co
conf.HostRuleTable = ptr.To(int64(500))
}

if conf.DetectGateway == nil {
conf.DetectGateway = ptr.To(coordinatorConfig.DetectGateway)
}

if conf.TunePodRoutes == nil {
conf.TunePodRoutes = coordinatorConfig.TunePodRoutes
}
Expand Down Expand Up @@ -270,37 +253,3 @@ func validateRPFilterConfig(rpfilter *int32, coordinatorConfig int64) (*int32, e
}
return rpfilter, nil
}

func ValidateDelectOptions(config *DetectOptions) (*DetectOptions, error) {
if config == nil {
return &DetectOptions{
Interval: "10ms",
TimeOut: "100ms",
Retry: 3,
}, nil
}

if config.Retry == 0 {
config.Retry = 3
}

if config.Interval == "" {
config.Interval = "10ms"
}

if config.TimeOut == "" {
config.TimeOut = "500ms"
}

_, err := time.ParseDuration(config.Interval)
if err != nil {
return nil, fmt.Errorf("invalid detectOptions.interval %s: %v, input like: 1s or 1m", config.Interval, err)
}

_, err = time.ParseDuration(config.TimeOut)
if err != nil {
return nil, fmt.Errorf("invalid detectOptions.timeout %s: %v, input like: 1s or 1m", config.TimeOut, err)
}

return config, nil
}
105 changes: 1 addition & 104 deletions cmd/coordinator/cmd/command_add.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,21 @@
package cmd

import (
"context"
"errors"
"fmt"
"net"
"time"

"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
current "github.com/containernetworking/cni/pkg/types/100"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/vishvananda/netlink"
"go.uber.org/multierr"
"go.uber.org/zap"

"github.com/spidernet-io/spiderpool/api/v1/agent/client/daemonset"
"github.com/spidernet-io/spiderpool/api/v1/agent/models"
plugincmd "github.com/spidernet-io/spiderpool/cmd/spiderpool/cmd"
"github.com/spidernet-io/spiderpool/pkg/constant"
"github.com/spidernet-io/spiderpool/pkg/errgroup"
"github.com/spidernet-io/spiderpool/pkg/logutils"
"github.com/spidernet-io/spiderpool/pkg/networking/gwconnection"
"github.com/spidernet-io/spiderpool/pkg/networking/ipchecking"
"github.com/spidernet-io/spiderpool/pkg/networking/networking"
"github.com/spidernet-io/spiderpool/pkg/networking/sysctl"
"github.com/spidernet-io/spiderpool/pkg/openapi"
Expand Down Expand Up @@ -118,6 +111,7 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
if err != nil {
return fmt.Errorf("failed to get current netns: %v", err)
}
defer c.hostNs.Close()
logger.Sugar().Debugf("Get current host netns: %v", c.hostNs.Path())

// checking if the nic is in up state
Expand Down Expand Up @@ -189,44 +183,6 @@ func CmdAdd(args *skel.CmdArgs) (err error) {

logger.Sugar().Infof("Get coordinator config: %+v", c)

errgConflict := errgroup.Group{}
// IP conflict detection must precede gateway detection, which avoids the
// possibility that gateway detection may update arp table entries first and cause
// communication problems when IP conflict detection fails
// see https://github.com/spidernet-io/spiderpool/issues/4475
var ipc *ipchecking.IPChecker
if conf.IPConflict != nil && *conf.IPConflict {
logger.Debug("Try to detect ip conflict")
ipc, err = ipchecking.NewIPChecker(conf.DetectOptions.Retry, conf.DetectOptions.Interval, conf.DetectOptions.TimeOut, c.hostNs, c.netns, logger)
if err != nil {
return fmt.Errorf("failed to run NewIPChecker: %w", err)
}
ipc.DoIPConflictChecking(prevResult.IPs, c.currentInterface, &errgConflict)
} else {
logger.Debug("disable detect ip conflict")
}

if err = errgConflict.Wait(); err != nil {
logger.Error("failed to detect ip conflict", zap.Error(err))
if errors.Is(err, constant.ErrIPConflict) {
logger.Info("ip conflict detected, clean up conflict IPs")
_, innerErr := client.Daemonset.DeleteIpamIps(daemonset.NewDeleteIpamIpsParams().WithContext(context.TODO()).WithIpamBatchDelArgs(
&models.IpamBatchDelArgs{
ContainerID: &args.ContainerID,
NetNamespace: args.Netns,
PodName: (*string)(&k8sArgs.K8S_POD_NAME),
PodNamespace: (*string)(&k8sArgs.K8S_POD_NAMESPACE),
PodUID: (*string)(&k8sArgs.K8S_POD_UID),
},
))
if innerErr != nil {
logger.Sugar().Errorf("failed to clean up conflict IPs, error: %v", innerErr)
return multierr.Append(err, innerErr)
}
}
return err
}

// Fixed Mac addresses must come after IP conflict detection, otherwise the switch learns to communicate
// with the wrong Mac address when IP conflict detection fails
if len(conf.MacPrefix) != 0 {
Expand All @@ -237,65 +193,6 @@ func CmdAdd(args *skel.CmdArgs) (err error) {
logger.Info("Fix mac address successfully", zap.String("interface", args.IfName), zap.String("macAddress", hwAddr))
}

// we do detect gateway connection lastly
// Finally, there is gateway detection, which updates the correct arp table entries
// once there are no IP address conflicts and fixed Mac addresses
errgGateway := errgroup.Group{}
if conf.DetectGateway != nil && *conf.DetectGateway {
logger.Debug("Try to detect gateway")

var gws []net.IP
err = c.netns.Do(func(netNS ns.NetNS) error {
gws, err = networking.GetDefaultGatewayByName(c.currentInterface, c.ipFamily)
if err != nil {
logger.Error("failed to GetDefaultGatewayByName", zap.Error(err))
return fmt.Errorf("failed to GetDefaultGatewayByName: %v", err)
}
logger.Debug("Get GetDefaultGatewayByName", zap.Any("Gws", gws))
p, err := gwconnection.New(conf.DetectOptions.Retry, conf.DetectOptions.Interval, conf.DetectOptions.TimeOut, c.currentInterface, logger)
if err != nil {
return fmt.Errorf("failed to init the gateway client: %v", err)
}
p.ParseAddrFromPreresult(prevResult.IPs)
for _, gw := range gws {
if gw.To4() != nil {
p.V4Gw = gw
errgGateway.Go(c.hostNs, c.netns, p.ArpingOverIface)
} else {
p.V6Gw = gw
errgGateway.Go(c.hostNs, c.netns, p.NDPingOverIface)
}
}
return nil
})
if err != nil {
return err
}
} else {
logger.Debug("disable detect gateway")
}

if err = errgGateway.Wait(); err != nil {
logger.Error("failed to detect gateway reachable", zap.Error(err))
if errors.Is(err, constant.ErrGatewayUnreachable) {
logger.Info("gateway unreachable detected, clean up conflict IPs")
_, innerErr := client.Daemonset.DeleteIpamIps(daemonset.NewDeleteIpamIpsParams().WithContext(context.TODO()).WithIpamBatchDelArgs(
&models.IpamBatchDelArgs{
ContainerID: &args.ContainerID,
NetNamespace: args.Netns,
PodName: (*string)(&k8sArgs.K8S_POD_NAME),
PodNamespace: (*string)(&k8sArgs.K8S_POD_NAMESPACE),
PodUID: (*string)(&k8sArgs.K8S_POD_UID),
},
))
if innerErr != nil {
logger.Sugar().Errorf("failed to clean up conflict IPs, error: %v", innerErr)
return multierr.Append(err, innerErr)
}
}
return err
}

// set txqueuelen
if conf.TxQueueLen != nil && *conf.TxQueueLen > 0 {
if err = networking.LinkSetTxqueueLen(args.IfName, int(*conf.TxQueueLen)); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions cmd/coordinator/cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -703,3 +703,7 @@ OUTER2:

return finalNodeIpList, nil
}

func AnnouncePodIPs() error {
return nil
}
Loading

0 comments on commit 392a3f7

Please sign in to comment.