Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NSM datapath monitoring on proxy and TAPA NSC #522

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmd/proxy/internal/client/fullmesh.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/google/uuid"
"github.com/networkservicemesh/api/pkg/api/networkservice"
"github.com/networkservicemesh/api/pkg/api/registry"
"github.com/networkservicemesh/sdk/pkg/networkservice/common/heal"
registryrefresh "github.com/networkservicemesh/sdk/pkg/registry/common/refresh"
registrysendfd "github.com/networkservicemesh/sdk/pkg/registry/common/sendfd"
registrychain "github.com/networkservicemesh/sdk/pkg/registry/core/chain"
Expand Down Expand Up @@ -237,9 +238,9 @@ func (fmnsc *FullMeshNetworkServiceClient) prepareQuery() *registry.NetworkServi
// monitoring Network Service Endpoints belonging to the Network Service of the request.
// Connects to each new Network Service Endpoint, and closes connection when a known
// endpoint disappears.
func NewFullMeshNetworkServiceClient(ctx context.Context, config *Config, additionalFunctionality ...networkservice.NetworkServiceClient) NetworkServiceClient {
func NewFullMeshNetworkServiceClient(ctx context.Context, config *Config, healOptions []heal.Option, additionalFunctionality ...networkservice.NetworkServiceClient) NetworkServiceClient {
// create base client relying on NSM's client.NewClient API
client := newClient(ctx, config.Name, config.APIClient, additionalFunctionality...)
client := newClient(ctx, config.Name, config.APIClient, healOptions, additionalFunctionality...)

fullMeshNetworkServiceClient := &FullMeshNetworkServiceClient{
networkServiceClients: make(map[string]NetworkServiceClient),
Expand Down
4 changes: 2 additions & 2 deletions cmd/proxy/internal/client/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ func expirationTimeIsNull(expirationTime *timestamppb.Timestamp) bool {
// Refresh Client comes from the NSM sdk version used. (In case of NSM v1.1.1 the built-in
// refresh might lead to connection issues if the different path segments have different
// maxTokenLifetime configured (unless the NSC side has the lowest maxtokenlifetime)).
func newClient(ctx context.Context, name string, nsmAPIClient *nsm.APIClient, additionalFunctionality ...networkservice.NetworkServiceClient) networkservice.NetworkServiceClient {
func newClient(ctx context.Context, name string, nsmAPIClient *nsm.APIClient, healOptions []heal.Option, additionalFunctionality ...networkservice.NetworkServiceClient) networkservice.NetworkServiceClient {
additionalFunctionality = append(additionalFunctionality,
sendfd.NewClient(),
)

return client.NewClient(ctx,
client.WithClientURL(&nsmAPIClient.Config.ConnectTo),
client.WithName(name),
client.WithHealClient(heal.NewClient(ctx)),
client.WithHealClient(heal.NewClient(ctx, healOptions...)),
client.WithAdditionalFunctionality(additionalFunctionality...),
client.WithDialTimeout(nsmAPIClient.Config.DialTimeout),
client.WithDialOptions(nsmAPIClient.GRPCDialOption...),
Expand Down
45 changes: 24 additions & 21 deletions cmd/proxy/internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,30 @@ import (

// Config for the proxy
type Config struct {
Name string `default:"proxy" desc:"Pod Name"`
ServiceName string `default:"proxy" desc:"Name of the Network Service" split_words:"true"`
ConnectTo url.URL `default:"unix:///var/lib/networkservicemesh/nsm.io.sock" desc:"url to connect to NSM" split_words:"true"`
DialTimeout time.Duration `default:"5s" desc:"timeout to dial NSMgr" split_words:"true"`
RequestTimeout time.Duration `default:"15s" desc:"timeout to request NSE" split_words:"true"`
MaxTokenLifetime time.Duration `default:"24h" desc:"maximum lifetime of tokens" split_words:"true"`
IPAMService string `default:"ipam-service:7777" desc:"IP (or domain) and port of the IPAM Service" split_words:"true"`
Host string `default:"" desc:"Host name the proxy is running on" split_words:"true"`
NetworkServiceName string `default:"load-balancer" desc:"Name of the network service the proxy request the connection" split_words:"true"`
Namespace string `default:"default" desc:"Namespace the pod is running on" split_words:"true"`
Trench string `default:"default" desc:"Trench the pod is running on" split_words:"true"`
Conduit string `default:"load-balancer" desc:"Name of the conduit" split_words:"true"`
NSPServiceName string `default:"nsp-service" desc:"IP (or domain) of the NSP Service" split_words:"true"`
NSPServicePort int `default:"7778" desc:"port of the NSP Service" split_words:"true"`
IPFamily string `default:"dualstack" desc:"ip family" envconfig:"ip_family"`
LogLevel string `default:"DEBUG" desc:"Log level" split_words:"true"`
MTU int `default:"1500" desc:"Conduit MTU considered by local NSCs and NSE composing the network mesh" split_words:"true"`
GRPCKeepaliveTime time.Duration `default:"30s" desc:"gRPC keepalive timeout"`
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
IPReleaseDelay time.Duration `default:"20s" desc:"delay releasing IP address of NSM connection" envconfig:"ip_release_delay"`
Name string `default:"proxy" desc:"Pod Name"`
ServiceName string `default:"proxy" desc:"Name of the Network Service" split_words:"true"`
ConnectTo url.URL `default:"unix:///var/lib/networkservicemesh/nsm.io.sock" desc:"url to connect to NSM" split_words:"true"`
DialTimeout time.Duration `default:"5s" desc:"timeout to dial NSMgr" split_words:"true"`
RequestTimeout time.Duration `default:"15s" desc:"timeout to request NSE" split_words:"true"`
MaxTokenLifetime time.Duration `default:"24h" desc:"maximum lifetime of tokens" split_words:"true"`
IPAMService string `default:"ipam-service:7777" desc:"IP (or domain) and port of the IPAM Service" split_words:"true"`
Host string `default:"" desc:"Host name the proxy is running on" split_words:"true"`
NetworkServiceName string `default:"load-balancer" desc:"Name of the network service the proxy request the connection" split_words:"true"`
Namespace string `default:"default" desc:"Namespace the pod is running on" split_words:"true"`
Trench string `default:"default" desc:"Trench the pod is running on" split_words:"true"`
Conduit string `default:"load-balancer" desc:"Name of the conduit" split_words:"true"`
NSPServiceName string `default:"nsp-service" desc:"IP (or domain) of the NSP Service" split_words:"true"`
NSPServicePort int `default:"7778" desc:"port of the NSP Service" split_words:"true"`
IPFamily string `default:"dualstack" desc:"ip family" envconfig:"ip_family"`
LogLevel string `default:"DEBUG" desc:"Log level" split_words:"true"`
MTU int `default:"1500" desc:"Conduit MTU considered by local NSCs and NSE composing the network mesh" split_words:"true"`
GRPCKeepaliveTime time.Duration `default:"30s" desc:"gRPC keepalive timeout"`
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
IPReleaseDelay time.Duration `default:"20s" desc:"delay releasing IP address of NSM connection" envconfig:"ip_release_delay"`
LivenessCheckInterval time.Duration `default:"2s" desc:"Dataplane liveness check interval" split_words:"true"`
LivenessCheckTimeout time.Duration `default:"1s" desc:"Dataplane liveness check timeout" split_words:"true"`
LivenessCheckEnabled bool `default:"false" desc:"Dataplane liveness check enabled/disabled" split_words:"true"`
}

// IsValid checks if the configuration is valid
Expand Down
4 changes: 3 additions & 1 deletion cmd/proxy/internal/service/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/networkservicemesh/api/pkg/api/networkservice/payload"
"github.com/networkservicemesh/sdk-sriov/pkg/networkservice/common/mechanisms/vfio"
sriovtoken "github.com/networkservicemesh/sdk-sriov/pkg/networkservice/common/token"
"github.com/networkservicemesh/sdk/pkg/networkservice/common/heal"
"github.com/networkservicemesh/sdk/pkg/networkservice/common/mechanisms"
"github.com/networkservicemesh/sdk/pkg/networkservice/common/mechanisms/kernel"
"github.com/networkservicemesh/sdk/pkg/networkservice/core/chain"
Expand All @@ -46,6 +47,7 @@ func GetNSC(ctx context.Context,
config *config.Config,
nsmAPIClient *nsm.APIClient,
p *proxy.Proxy,
healOptions []heal.Option,
interfaceMonitorClient networkservice.NetworkServiceClient) client.NetworkServiceClient {

logger := log.FromContextOrGlobal(ctx).WithValues("func", "GetNSC")
Expand All @@ -70,7 +72,7 @@ func GetNSC(ctx context.Context,
proxyHealth.NewClient(),
fullmeshtracker.NewClient(),
)
fullMeshClient := client.NewFullMeshNetworkServiceClient(ctx, clientConfig, additionalFunctionality)
fullMeshClient := client.NewFullMeshNetworkServiceClient(ctx, clientConfig, healOptions, additionalFunctionality)

return fullMeshClient
}
Expand Down
13 changes: 12 additions & 1 deletion cmd/proxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (

"github.com/kelseyhightower/envconfig"
"github.com/networkservicemesh/api/pkg/api/networkservice"
"github.com/networkservicemesh/sdk/pkg/networkservice/common/heal"
"github.com/networkservicemesh/sdk/pkg/tools/grpcutils"
nsmlog "github.com/networkservicemesh/sdk/pkg/tools/log"
ipamAPI "github.com/nordix/meridio/api/ipam/v1"
Expand All @@ -41,6 +42,7 @@ import (
"github.com/nordix/meridio/pkg/health/probe"
linuxKernel "github.com/nordix/meridio/pkg/kernel"
"github.com/nordix/meridio/pkg/nsm"
kernelheal "github.com/nordix/meridio/pkg/nsm/heal"
"github.com/nordix/meridio/pkg/nsm/interfacemonitor"
nsmmonitor "github.com/nordix/meridio/pkg/nsm/monitor"
"github.com/nordix/meridio/pkg/nsp"
Expand Down Expand Up @@ -236,9 +238,18 @@ func main() {
monitorClient := networkservice.NewMonitorConnectionClient(cc)
go nsmmonitor.ConnectionMonitor(ctx, config.Name, monitorClient)

healOptions := []heal.Option{}
if config.LivenessCheckEnabled {
healOptions = []heal.Option{
heal.WithLivenessCheckInterval(config.LivenessCheckInterval),
heal.WithLivenessCheckTimeout(config.LivenessCheckTimeout),
heal.WithLivenessCheck(kernelheal.KernelLivenessCheck),
}
}

// create and start NSC that connects all remote NSE belonging to the right service
interfaceMonitorClient := interfacemonitor.NewClient(interfaceMonitor, p, netUtils)
nsmClient := service.GetNSC(ctx, &config, nsmAPIClient, p, interfaceMonitorClient)
nsmClient := service.GetNSC(ctx, &config, nsmAPIClient, p, healOptions, interfaceMonitorClient)
defer nsmClient.Close()
go func() {
service.StartNSC(nsmClient, config.NetworkServiceName)
Expand Down
31 changes: 17 additions & 14 deletions cmd/tapa/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,23 @@ import (

// Config for the TAPA
type Config struct {
Name string `default:"nsc" desc:"Name of the target"`
Node string `default:"" desc:"Node name the target is running on" split_words:"true"`
Namespace string `default:"default" desc:"Namespace the trenches to connect to are running on" split_words:"true"`
Socket string `default:"/ambassador.sock" desc:"Path of the socket file of the TAPA" split_words:"true"`
NSMSocket url.URL `default:"unix:///var/lib/networkservicemesh/nsm.io.sock" desc:"Path of the socket file of NSM" envconfig:"nsm_socket"`
NSPServiceName string `default:"nsp-service" desc:"Domain name of the NSP Service" envconfig:"nsp_service_name"`
NSPServicePort int `default:"7778" desc:"port of the NSP Service" envconfig:"nsp_service_port"`
Timeout time.Duration `default:"15s" desc:"timeout of NSM request/close, NSP register/unregister..." split_words:"true"`
DialTimeout time.Duration `default:"5s" desc:"timeout to dial NSMgr" split_words:"true"`
MaxTokenLifetime time.Duration `default:"24h" desc:"maximum lifetime of tokens" split_words:"true"`
LogLevel string `default:"DEBUG" desc:"Log level" split_words:"true"`
NSPEntryTimeout time.Duration `default:"30s" desc:"Timeout of the entries" envconfig:"nsp_entry_timeout"`
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
Name string `default:"nsc" desc:"Name of the target"`
Node string `default:"" desc:"Node name the target is running on" split_words:"true"`
Namespace string `default:"default" desc:"Namespace the trenches to connect to are running on" split_words:"true"`
Socket string `default:"/ambassador.sock" desc:"Path of the socket file of the TAPA" split_words:"true"`
NSMSocket url.URL `default:"unix:///var/lib/networkservicemesh/nsm.io.sock" desc:"Path of the socket file of NSM" envconfig:"nsm_socket"`
NSPServiceName string `default:"nsp-service" desc:"Domain name of the NSP Service" envconfig:"nsp_service_name"`
NSPServicePort int `default:"7778" desc:"port of the NSP Service" envconfig:"nsp_service_port"`
Timeout time.Duration `default:"15s" desc:"timeout of NSM request/close, NSP register/unregister..." split_words:"true"`
DialTimeout time.Duration `default:"5s" desc:"timeout to dial NSMgr" split_words:"true"`
MaxTokenLifetime time.Duration `default:"24h" desc:"maximum lifetime of tokens" split_words:"true"`
LogLevel string `default:"DEBUG" desc:"Log level" split_words:"true"`
NSPEntryTimeout time.Duration `default:"30s" desc:"Timeout of the entries" envconfig:"nsp_entry_timeout"`
GRPCMaxBackoff time.Duration `default:"5s" desc:"Upper bound on gRPC connection backoff delay" envconfig:"grpc_max_backoff"`
GRPCProbeRPCTimeout time.Duration `default:"1s" desc:"RPC timeout of internal gRPC health probe" envconfig:"grpc_probe_rpc_timeout"`
LivenessCheckInterval time.Duration `default:"2s" desc:"Dataplane liveness check interval" split_words:"true"`
LivenessCheckTimeout time.Duration `default:"1s" desc:"Dataplane liveness check timeout" split_words:"true"`
LivenessCheckEnabled bool `default:"false" desc:"Dataplane liveness check enabled/disabled" split_words:"true"`
}

// IsValid checks if the configuration is valid
Expand Down
12 changes: 11 additions & 1 deletion cmd/tapa/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ import (
linuxKernel "github.com/nordix/meridio/pkg/kernel"
"github.com/nordix/meridio/pkg/log"
"github.com/nordix/meridio/pkg/nsm"
kernelheal "github.com/nordix/meridio/pkg/nsm/heal"
"github.com/nordix/meridio/pkg/nsm/interfacename"
"github.com/sirupsen/logrus"
"google.golang.org/grpc"
Expand Down Expand Up @@ -155,10 +156,19 @@ func main() {
sendfd.NewClient(),
}

healOptions := []heal.Option{}
if config.LivenessCheckEnabled {
healOptions = []heal.Option{
heal.WithLivenessCheckInterval(config.LivenessCheckInterval),
heal.WithLivenessCheckTimeout(config.LivenessCheckTimeout),
heal.WithLivenessCheck(kernelheal.KernelLivenessCheck),
}
}

networkServiceClient := client.NewClient(ctx,
client.WithClientURL(&nsmAPIClient.Config.ConnectTo),
client.WithName(config.Name),
client.WithHealClient(heal.NewClient(ctx)),
client.WithHealClient(heal.NewClient(ctx, healOptions...)),
client.WithAdditionalFunctionality(additionalFunctionality...),
client.WithDialTimeout(nsmAPIClient.Config.DialTimeout),
client.WithDialOptions(nsmAPIClient.GRPCDialOption...),
Expand Down
8 changes: 8 additions & 0 deletions docs/components/proxy.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ NSM_NSP_SERVICE_NAME | string | IP (or domain) of the NSP Service | nsp-service
NSM_NSP_SERVICE_PORT | int | port of the NSP Service | 7778
NSM_IP_FAMILY | string | ip family | dualstack
NSM_LOG_LEVEL | string | Log level | DEBUG
NSM_MTU | string | Conduit MTU considered by local NSCs and NSE composing the network mesh | 1500
NSM_GRPC_KEEPALIVE_TIME | time.Duration | gRPC keepalive timeout | 30s
NSM_GRPC_PROBE_RPC_TIMEOUT | time.Duration | RPC timeout of internal gRPC health probe | 1s
NSM_GRPC_MAX_BACKOFF | time.Duration | Upper bound on gRPC connection backoff delay | 5s
NSM_IP_RELEASE_DELAY | time.Duration | delay releasing IP address of NSM connection | 20s
NSM_LIVENESS_CHECK_INTERVAL | time.Duration | Dataplane liveness check interval | 2s
NSM_LIVENESS_CHECK_TIMEOUT | time.Duration | Dataplane liveness check timeout | 1s
NSM_LIVENESS_CHECK_ENABLED | bool | Dataplane liveness check enabled/disabled | false

## Command Line

Expand Down
3 changes: 3 additions & 0 deletions docs/components/tapa.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ MERIDIO_LOG_LEVEL | string | Log level | DEBUG
MERIDIO_NSP_ETRY_TIMEOUT | time.Duration | Timeout of the entries registered in NSP | 30s
MERIDO_GRPC_MAX_BACKOFF | time.Duration | Upper bound on gRPC connection backoff delay | 5s
MERIDIO_GRPC_PROBE_RPC_TIMEOUT | time.Duration | RPC timeout of internal gRPC health probes if any | 1s
MERIDIO_LIVENESS_CHECK_INTERVAL | time.Duration | Dataplane liveness check interval | 2s
MERIDIO_LIVENESS_CHECK_TIMEOUT | time.Duration | Dataplane liveness check timeout | 1s
MERIDIO_LIVENESS_CHECK_ENABLED | bool | Dataplane liveness check enabled/disabled | false

## Command Line

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/faisal-memon/sviddisk v0.0.0-20211007205134-77ccea0b9271
github.com/go-logr/logr v1.4.1
github.com/go-logr/zapr v1.3.0
github.com/go-ping/ping v1.0.0
github.com/golang/mock v1.6.0
github.com/google/nftables v0.1.0
github.com/google/uuid v1.3.1
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2Kv
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-ping/ping v1.0.0 h1:34GZiqLDqqIHEeL5NZIz7jSnMluK7/p0qDB436yO6H0=
github.com/go-ping/ping v1.0.0/go.mod h1:35JbSyV/BYqHwwRA6Zr1uVDm1637YlNOU61wI797NPI=
github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls=
Expand Down Expand Up @@ -412,6 +414,7 @@ golang.org/x/net v0.0.0-20191007182048-72f939374954/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201010224723-4f7140c49acb/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
Expand Down
3 changes: 2 additions & 1 deletion pkg/ambassador/tap/conduit/conduit.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,8 @@ func (c *Conduit) SetVIPs(ctx context.Context, vips []string) error {
Labels: c.connection.GetLabels(),
Payload: c.connection.GetPayload(),
Context: &networkservice.ConnectionContext{
IpContext: c.connection.GetContext().GetIpContext(),
ExtraContext: c.connection.GetContext().GetExtraContext(),
IpContext: c.connection.GetContext().GetIpContext(),
},
},
}
Expand Down
Loading