From ef0591a9201b83b408ad6c94226e42a70a1e5947 Mon Sep 17 00:00:00 2001 From: sushrk Date: Wed, 25 Dec 2024 00:01:37 +0000 Subject: [PATCH] Centralized leaked ENI cleanup- refactor periodic cleanup & add node termination cleaner --- main.go | 16 +- .../pkg/aws/ec2/api/mock_ec2_apihelper.go | 14 + .../pkg/aws/ec2/api/mock_ec2_wrapper.go | 29 ++ .../pkg/provider/branch/trunk/mock_trunk.go | 12 - pkg/aws/ec2/api/cleanup/eni_cleanup.go | 221 +++++++++++++++ pkg/aws/ec2/api/cleanup/eni_cleanup_test.go | 246 ++++++++++++++++ pkg/aws/ec2/api/cleanup/node_cleanup.go | 50 ++++ pkg/aws/ec2/api/cleanup/resource_cleaner.go | 19 ++ pkg/aws/ec2/api/eni_cleanup.go | 204 -------------- pkg/aws/ec2/api/eni_cleanup_test.go | 124 -------- pkg/aws/ec2/api/helper.go | 16 +- pkg/aws/ec2/api/helper_test.go | 96 +++---- pkg/aws/ec2/api/wrapper.go | 115 +++++++- pkg/config/type.go | 3 + pkg/provider/branch/provider.go | 10 +- pkg/provider/branch/trunk/trunk.go | 59 ++-- pkg/provider/branch/trunk/trunk_test.go | 264 ++++++++++++------ 17 files changed, 956 insertions(+), 542 deletions(-) create mode 100644 pkg/aws/ec2/api/cleanup/eni_cleanup.go create mode 100644 pkg/aws/ec2/api/cleanup/eni_cleanup_test.go create mode 100644 pkg/aws/ec2/api/cleanup/node_cleanup.go create mode 100644 pkg/aws/ec2/api/cleanup/resource_cleaner.go delete mode 100644 pkg/aws/ec2/api/eni_cleanup.go delete mode 100644 pkg/aws/ec2/api/eni_cleanup_test.go diff --git a/main.go b/main.go index 8ef148d9..2c615eea 100644 --- a/main.go +++ b/main.go @@ -28,6 +28,7 @@ import ( corecontroller "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/core" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/api" ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + eniCleaner "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" @@ -362,12 +363,17 @@ func main() { os.Exit(1) } - if err := (&ec2API.ENICleaner{ - EC2Wrapper: ec2Wrapper, + cleaner := &eniCleaner.ClusterENICleaner{ ClusterName: clusterName, - Log: ctrl.Log.WithName("eni cleaner"), - VPCID: vpcID, - }).SetupWithManager(ctx, mgr, healthzHandler); err != nil { + } + cleaner.ENICleaner = &eniCleaner.ENICleaner{ + EC2Wrapper: ec2Wrapper, + Manager: cleaner, + VpcId: vpcID, + Log: ctrl.Log.WithName("eniCleaner").WithName("cluster"), + } + + if err := cleaner.SetupWithManager(ctx, mgr, healthzHandler); err != nil { setupLog.Error(err, "unable to start eni cleaner") os.Exit(1) } diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go index 19f7e104..6bd94eb3 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_apihelper.go @@ -195,6 +195,20 @@ func (mr *MockEC2APIHelperMockRecorder) DetachNetworkInterfaceFromInstance(arg0 return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterfaceFromInstance", reflect.TypeOf((*MockEC2APIHelper)(nil).DetachNetworkInterfaceFromInstance), arg0) } +// DisassociateTrunkInterface mocks base method. +func (m *MockEC2APIHelper) DisassociateTrunkInterface(arg0 *string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DisassociateTrunkInterface", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// DisassociateTrunkInterface indicates an expected call of DisassociateTrunkInterface. +func (mr *MockEC2APIHelperMockRecorder) DisassociateTrunkInterface(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DisassociateTrunkInterface", reflect.TypeOf((*MockEC2APIHelper)(nil).DisassociateTrunkInterface), arg0) +} + // GetBranchNetworkInterface mocks base method. func (m *MockEC2APIHelper) GetBranchNetworkInterface(arg0, arg1 *string) ([]*ec2.NetworkInterface, error) { m.ctrl.T.Helper() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go index f40d94c6..53515c5d 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api/mock_ec2_wrapper.go @@ -182,6 +182,21 @@ func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfaces(arg0 interface{} return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfaces", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfaces), arg0) } +// DescribeNetworkInterfacesPages mocks base method. +func (m *MockEC2Wrapper) DescribeNetworkInterfacesPages(arg0 *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DescribeNetworkInterfacesPages", arg0) + ret0, _ := ret[0].([]*ec2.NetworkInterface) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DescribeNetworkInterfacesPages indicates an expected call of DescribeNetworkInterfacesPages. +func (mr *MockEC2WrapperMockRecorder) DescribeNetworkInterfacesPages(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DescribeNetworkInterfacesPages", reflect.TypeOf((*MockEC2Wrapper)(nil).DescribeNetworkInterfacesPages), arg0) +} + // DescribeSubnets mocks base method. func (m *MockEC2Wrapper) DescribeSubnets(arg0 *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) { m.ctrl.T.Helper() @@ -227,6 +242,20 @@ func (mr *MockEC2WrapperMockRecorder) DetachNetworkInterface(arg0 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DetachNetworkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DetachNetworkInterface), arg0) } +// DisassociateTrunkInterface mocks base method. +func (m *MockEC2Wrapper) DisassociateTrunkInterface(arg0 *ec2.DisassociateTrunkInterfaceInput) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DisassociateTrunkInterface", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// DisassociateTrunkInterface indicates an expected call of DisassociateTrunkInterface. +func (mr *MockEC2WrapperMockRecorder) DisassociateTrunkInterface(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DisassociateTrunkInterface", reflect.TypeOf((*MockEC2Wrapper)(nil).DisassociateTrunkInterface), arg0) +} + // ModifyNetworkInterfaceAttribute mocks base method. func (m *MockEC2Wrapper) ModifyNetworkInterfaceAttribute(arg0 *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) { m.ctrl.T.Helper() diff --git a/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go b/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go index ac4b1c73..59aae85c 100644 --- a/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go +++ b/mocks/amazon-vcp-resource-controller-k8s/pkg/provider/branch/trunk/mock_trunk.go @@ -64,18 +64,6 @@ func (mr *MockTrunkENIMockRecorder) CreateAndAssociateBranchENIs(arg0, arg1, arg return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateAndAssociateBranchENIs", reflect.TypeOf((*MockTrunkENI)(nil).CreateAndAssociateBranchENIs), arg0, arg1, arg2) } -// DeleteAllBranchENIs mocks base method. -func (m *MockTrunkENI) DeleteAllBranchENIs() { - m.ctrl.T.Helper() - m.ctrl.Call(m, "DeleteAllBranchENIs") -} - -// DeleteAllBranchENIs indicates an expected call of DeleteAllBranchENIs. -func (mr *MockTrunkENIMockRecorder) DeleteAllBranchENIs() *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAllBranchENIs", reflect.TypeOf((*MockTrunkENI)(nil).DeleteAllBranchENIs)) -} - // DeleteCooledDownENIs mocks base method. func (m *MockTrunkENI) DeleteCooledDownENIs() { m.ctrl.T.Helper() diff --git a/pkg/aws/ec2/api/cleanup/eni_cleanup.go b/pkg/aws/ec2/api/cleanup/eni_cleanup.go new file mode 100644 index 00000000..8b4d4ef7 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/eni_cleanup.go @@ -0,0 +1,221 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + + ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/go-logr/logr" + kerrors "k8s.io/apimachinery/pkg/util/errors" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" +) + +// NetworkInterfaceManager interface allows to define the ENI filters and checks if ENI should be deleted for different callers like in the periodic cleanup routine or +// during node termination +type NetworkInterfaceManager interface { + GetENITagFilters() []*ec2.Filter + ShouldDeleteENI(eniID *string) bool + UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) + UpdateCleanupMetrics(vpcrcAvailableCount int, vpccniAvailableCount int, leakedENICount int) +} + +type ENICleaner struct { + EC2Wrapper api.EC2Wrapper + Manager NetworkInterfaceManager + VpcId string + Log logr.Logger +} + +// common filters for describing network interfaces +var CommonNetworkInterfaceFilters = []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, +} + +// ClusterENICleaner periodically deletes leaked network interfaces(provisioned by the controller or VPC-CNI) in the cluster +type ClusterENICleaner struct { + ClusterName string + shutdown bool + ctx context.Context + availableENIs map[string]struct{} + *ENICleaner +} + +func (e *ClusterENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { + e.ctx = ctx + e.availableENIs = make(map[string]struct{}) + healthzHandler.AddControllersHealthCheckers( + map[string]healthz.Checker{ + "health-interface-cleaner": rcHealthz.SimplePing("interface cleanup", e.Log), + }, + ) + + return mgr.Add(e) +} + +// StartENICleaner starts the ENI Cleaner routine that cleans up dangling ENIs created by the controller +func (e *ClusterENICleaner) Start(ctx context.Context) error { + e.Log.Info("starting eni clean up routine") + + // Start routine to listen for shut down signal, on receiving the signal it set shutdown to true + go func() { + <-ctx.Done() + e.shutdown = true + }() + // Perform ENI cleanup after fixed time intervals till shut down variable is set to true on receiving the shutdown + // signal + for !e.shutdown { + e.DeleteLeakedResources() + time.Sleep(config.ENICleanUpInterval) + } + + return nil +} + +// DeleteLeakedResources describes all the network interfaces in available status that are created by the controller or VPC-CNI +// This is called by periodically by ClusterENICleaner which deletes available ENIs cluster-wide, and by the NodeTermination cleaner on node termination +// The available ENIs are deleted if ShouldDeleteENI is true, defined in the respective cleaners +// The function also updates metrics for the periodic cleanup routine and the node termination cleanup +func (e *ENICleaner) DeleteLeakedResources() error { + var errors []error + availableENIs := make(map[string]struct{}) + vpcrcAvailableCount := 0 + vpccniAvailableCount := 0 + leakedENICount := 0 + + filters := CommonNetworkInterfaceFilters + // Append the VPC-ID deep filter for the paginated call + filters = append(filters, []*ec2.Filter{ + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(e.VpcId)}, + }, + }...) + // get cleaner specific filters + filters = append(filters, e.Manager.GetENITagFilters()...) + describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ + Filters: filters, + } + for { + describeNetworkInterfaceOp, err := e.EC2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfaceIp) + if err != nil { + e.Log.Error(err, "failed to describe network interfaces, cleanup will be retried in next cycle") + return err + } + for _, nwInterface := range describeNetworkInterfaceOp.NetworkInterfaces { + if e.Manager.ShouldDeleteENI(nwInterface.NetworkInterfaceId) { + tagMap := utils.GetTagKeyValueMap(nwInterface.TagSet) + if val, ok := tagMap[config.NetworkInterfaceOwnerTagKey]; ok { + // Increment promethues metrics for number of leaked ENIs cleaned up + switch val { + case config.NetworkInterfaceOwnerTagValue: + vpcrcAvailableCount += 1 + case config.NetworkInterfaceOwnerVPCCNITagValue: + vpccniAvailableCount += 1 + default: + // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found + e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *nwInterface.NetworkInterfaceId) + continue + } + } + _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + }) + if err != nil { + if !strings.Contains(err.Error(), ec2Errors.NotFoundInterfaceID) { // ignore InvalidNetworkInterfaceID.NotFound error + // append err and continue, we will retry deletion in the next period/reconcile + leakedENICount += 1 + errors = append(errors, fmt.Errorf("failed to delete leaked network interface %v:%v", *nwInterface.NetworkInterfaceId, err)) + e.Log.Error(err, "failed to delete the leaked network interface", + "id", *nwInterface.NetworkInterfaceId) + } + continue + } + e.Log.Info("deleted leaked ENI successfully", "eni id", nwInterface.NetworkInterfaceId) + } else { + // Seeing the ENI for the first time, add it to the new list of available network interfaces + availableENIs[*nwInterface.NetworkInterfaceId] = struct{}{} + e.Log.Info("adding eni to to the map of available ENIs, will be removed if present in "+ + "next run too", "id", *nwInterface.NetworkInterfaceId) + } + } + + if describeNetworkInterfaceOp.NextToken == nil { + break + } + describeNetworkInterfaceIp.NextToken = describeNetworkInterfaceOp.NextToken + } + e.Manager.UpdateCleanupMetrics(vpcrcAvailableCount, vpccniAvailableCount, leakedENICount) + e.Manager.UpdateAvailableENIsIfNeeded(&availableENIs) + return kerrors.NewAggregate(errors) +} + +func (e *ClusterENICleaner) GetENITagFilters() []*ec2.Filter { + clusterNameTagKey := fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) + return []*ec2.Filter{ + { + Name: aws.String("tag:" + clusterNameTagKey), + Values: []*string{aws.String(config.ClusterNameTagValue)}, + }, + } +} + +// ShouldDeleteENI returns true if the ENI should be deleted. +func (e *ClusterENICleaner) ShouldDeleteENI(eniID *string) bool { + if _, exists := e.availableENIs[*eniID]; exists { + return true + } + return false +} + +// Set the available ENIs to the list of ENIs seen in the current cycle +// This adds ENIs that should not be deleted in the current cleanup cycle to the internal cache so it can be deleted in next cycle +// This prevents the clean up routine to remove ENIs that are created by another routines and are yet not attached to +// an instance or associated with a trunk interface in the periodic cleanup routine + +// Example +// 1st cycle, Describe Available NetworkInterface Result - Interface 1, Interface 2, Interface 3 +// 2nd cycle, Describe Available NetworkInterface Result - Interface 2, Interface 3 +// In the second cycle we can conclude that Interface 2 and 3 are leaked because they have been sitting for the time +// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was +// created but not attached at the the time when 1st cycle ran and hence it should not be deleted. +func (e *ClusterENICleaner) UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) { + e.availableENIs = *eniMap +} + +// Update cluster cleanup metrics for the current cleanup cycle +func (e *ClusterENICleaner) UpdateCleanupMetrics(vpcrcAvailableCount int, vpccniAvailableCount int, leakedENICount int) { + api.VpcRcAvailableClusterENICnt.Set(float64(vpcrcAvailableCount)) + api.VpcCniAvailableClusterENICnt.Set(float64(vpccniAvailableCount)) + api.LeakedENIClusterCleanupCnt.Set(float64(leakedENICount)) +} diff --git a/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go b/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go new file mode 100644 index 00000000..dc3a25da --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/eni_cleanup_test.go @@ -0,0 +1,246 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "context" + "fmt" + "reflect" + "testing" + + mock_api "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" +) + +var ( + mockClusterName = "cluster-name" + mockNodeName = "node-name" + mockClusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, mockClusterName) + + mockNetworkInterfaceId1 = "eni-000000000000000" + mockNetworkInterfaceId2 = "eni-000000000000001" + mockNetworkInterfaceId3 = "eni-000000000000002" + + mockVpcId = "vpc-0000000000000000" + + mockClusterTagInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVpcId)}, + }, + { + Name: aws.String("tag:" + mockClusterNameTagKey), + Values: []*string{aws.String(config.ClusterNameTagValue)}, + }, + }, + } + + mockNodenameTagInput = &ec2.DescribeNetworkInterfacesInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("status"), + Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), + Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, + config.NetworkInterfaceOwnerVPCCNITagValue}), + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(mockVpcId)}, + }, + { + Name: aws.String("tag:" + config.NetworkInterfaceNodenameKey), + Values: []*string{aws.String(mockNodeName)}, + }, + }, + } + + mockDescribeInterfaceOpWith1And2 = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1}, + {NetworkInterfaceId: &mockNetworkInterfaceId2}, + }, + } + + mockDescribeInterfaceOpWith1And3 = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{ + {NetworkInterfaceId: &mockNetworkInterfaceId1}, + {NetworkInterfaceId: &mockNetworkInterfaceId3}, + }, + } +) + +func getMockClusterENICleaner(ctrl *gomock.Controller) (*ClusterENICleaner, *mock_api.MockEC2Wrapper) { + mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) + mockclusterENICleaner := ClusterENICleaner{ + availableENIs: map[string]struct{}{}, + ctx: context.Background(), + ClusterName: mockClusterName, + } + mockclusterENICleaner.ENICleaner = &ENICleaner{ + EC2Wrapper: mockEC2Wrapper, + Manager: &mockclusterENICleaner, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + VpcId: mockVpcId, + } + return &mockclusterENICleaner, mockEC2Wrapper +} + +func TestENICleaner_DeleteLeakedResources(t *testing.T) { + type fields struct { + mockEC2Wrapper *mock_api.MockEC2Wrapper + clusterENICleaner *ClusterENICleaner + } + testENICleaner_DeleteLeakedResources := []struct { + name string + getENICleaner func() (*ENICleaner, *ClusterENICleaner) + prepare func(f *fields) + assertFirstCall func(f *fields) + assertSecondCall func(f *fields) + }{ + { + name: "ClusterENICleaner, verifies leaked ENIs are deleted in the periodic cleanup routine and availableENI is updated", + getENICleaner: func() (*ENICleaner, *ClusterENICleaner) { + mockClusterENICleaner := &ClusterENICleaner{ + ClusterName: mockClusterName, + ctx: context.Background(), + availableENIs: map[string]struct{}{}, + } + mockClusterENICleaner.ENICleaner = &ENICleaner{ + Manager: mockClusterENICleaner, + VpcId: mockVpcId, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + } + return mockClusterENICleaner.ENICleaner, mockClusterENICleaner + }, + prepare: func(f *fields) { + gomock.InOrder( + // Return network interface 1 and 2 in first cycle + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockClusterTagInput). + Return(mockDescribeInterfaceOpWith1And2, nil), + // Return network interface 1 and 3 in the second cycle + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockClusterTagInput). + Return(mockDescribeInterfaceOpWith1And3, nil), + // Expect to delete the network interface 1 + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + ) + + }, + assertFirstCall: func(f *fields) { + // After first call, network interface 1 and 2 should be added to the map of available ENIs + assert.True(t, reflect.DeepEqual( + map[string]struct{}{mockNetworkInterfaceId1: {}, mockNetworkInterfaceId2: {}}, f.clusterENICleaner.availableENIs)) + + }, + assertSecondCall: func(f *fields) { + // After second call, network interface 1 should be deleted and network interface 3 added to list + assert.True(t, reflect.DeepEqual( + map[string]struct{}{mockNetworkInterfaceId3: {}}, f.clusterENICleaner.availableENIs)) + }, + }, + { + name: "NodeTerminationENICleaner, verifies ENIs are deleted on node termination", + getENICleaner: func() (*ENICleaner, *ClusterENICleaner) { + mocknodeCleaner := &NodeTerminationCleaner{ + NodeName: mockNodeName, + } + mocknodeCleaner.ENICleaner = &ENICleaner{ + Manager: mocknodeCleaner, + VpcId: mockVpcId, + Log: zap.New(zap.UseDevMode(true)).WithName("cluster eni cleaner test"), + } + return mocknodeCleaner.ENICleaner, nil + }, + prepare: func(f *fields) { + gomock.InOrder( + + // Return network interface 1 and 2 in first cycle, expect to call delete on both + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockNodenameTagInput). + Return(mockDescribeInterfaceOpWith1And2, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId2}).Return(nil, nil), + // Return network interface 1 and 3 in the second cycle, again expect to call delete on both + f.mockEC2Wrapper.EXPECT().DescribeNetworkInterfaces(mockNodenameTagInput). + Return(mockDescribeInterfaceOpWith1And3, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), + f.mockEC2Wrapper.EXPECT().DeleteNetworkInterface( + &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId3}).Return(nil, nil), + ) + }, + }, + } + + for _, tt := range testENICleaner_DeleteLeakedResources { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) + var mockENICleaner *ENICleaner + var mockClusterENICleaner *ClusterENICleaner + if tt.getENICleaner != nil { + mockENICleaner, mockClusterENICleaner = tt.getENICleaner() + } + mockENICleaner.EC2Wrapper = mockEC2Wrapper + f := fields{ + mockEC2Wrapper: mockEC2Wrapper, + clusterENICleaner: mockClusterENICleaner, + } + if tt.prepare != nil { + tt.prepare(&f) + } + + err := mockENICleaner.DeleteLeakedResources() + assert.NoError(t, err) + if tt.assertFirstCall != nil { + tt.assertFirstCall(&f) + } + + err = mockENICleaner.DeleteLeakedResources() + assert.NoError(t, err) + if tt.assertSecondCall != nil { + tt.assertSecondCall(&f) + } + } +} + +// TestENICleaner_StartENICleaner_Shutdown tests that ENICleaner would not start if shutdown is set to true. +func TestENICleaner_StartENICleaner_Shutdown(t *testing.T) { + ctrl := gomock.NewController(t) + eniCleaner, _ := getMockClusterENICleaner(ctrl) + + eniCleaner.shutdown = true + + eniCleaner.Start(context.TODO()) +} diff --git a/pkg/aws/ec2/api/cleanup/node_cleanup.go b/pkg/aws/ec2/api/cleanup/node_cleanup.go new file mode 100644 index 00000000..12c7c0ac --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/node_cleanup.go @@ -0,0 +1,50 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +import ( + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" +) + +// NodeTerminationCleanerto handle resource cleanup at node termination +type NodeTerminationCleaner struct { + NodeName string + *ENICleaner +} + +func (n *NodeTerminationCleaner) GetENITagFilters() []*ec2.Filter { + return []*ec2.Filter{ + { + Name: aws.String("tag:" + config.NetworkInterfaceNodenameKey), + Values: []*string{aws.String(n.NodeName)}, + }, + } +} + +// Return true. As the node is terminating all available ENIs need to be deleted +func (n *NodeTerminationCleaner) ShouldDeleteENI(eniID *string) bool { + return true +} + +func (n *NodeTerminationCleaner) UpdateAvailableENIsIfNeeded(eniMap *map[string]struct{}) { + // Nothing to do for the node termination cleaner + return +} + +// Updating node termination metrics does not make much sense as it will be updated on each node deletion and does not give us much info +func (n *NodeTerminationCleaner) UpdateCleanupMetrics(vpcrcAvailableCount int, vpccniAvailableCount int, leakedENICount int) { + return +} diff --git a/pkg/aws/ec2/api/cleanup/resource_cleaner.go b/pkg/aws/ec2/api/cleanup/resource_cleaner.go new file mode 100644 index 00000000..d5c7e6d1 --- /dev/null +++ b/pkg/aws/ec2/api/cleanup/resource_cleaner.go @@ -0,0 +1,19 @@ +// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package cleanup + +// ResourceCleaner interface should be implemented by components that need to delete leaked AWS resources +type ResourceCleaner interface { + DeleteLeakedResources() error +} diff --git a/pkg/aws/ec2/api/eni_cleanup.go b/pkg/aws/ec2/api/eni_cleanup.go deleted file mode 100644 index 583529a8..00000000 --- a/pkg/aws/ec2/api/eni_cleanup.go +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package api - -import ( - "context" - "fmt" - "strings" - "time" - - "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" - rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" - "github.com/prometheus/client_golang/prometheus" - "golang.org/x/exp/slices" - - ec2Errors "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/errors" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/go-logr/logr" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/healthz" -) - -type ENICleaner struct { - EC2Wrapper EC2Wrapper - ClusterName string - Log logr.Logger - VPCID string - - availableENIs map[string]struct{} - shutdown bool - clusterNameTagKey string - ctx context.Context -} - -var ( - vpccniAvailableENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "vpc_cni_created_available_eni_count", - Help: "The number of available ENIs created by VPC-CNI that controller will try to delete in each cleanup cycle", - }, - ) - vpcrcAvailableENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "vpc_rc_created_available_eni_count", - Help: "The number of available ENIs created by VPC-RC that controller will try to delete in each cleanup cycle", - }, - ) - leakedENICnt = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "leaked_eni_count", - Help: "The number of available ENIs that failed to be deleted by the controller in each cleanup cycle", - }, - ) -) - -func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { - e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) - e.availableENIs = make(map[string]struct{}) - e.ctx = ctx - - healthzHandler.AddControllersHealthCheckers( - map[string]healthz.Checker{ - "health-interface-cleaner": rcHealthz.SimplePing("interface cleanup", e.Log), - }, - ) - - return mgr.Add(e) -} - -// StartENICleaner starts the ENI Cleaner routine that cleans up dangling ENIs created by the controller -func (e *ENICleaner) Start(ctx context.Context) error { - e.Log.Info("starting eni clean up routine") - // Start routine to listen for shut down signal, on receiving the signal it set shutdown to true - go func() { - <-ctx.Done() - e.shutdown = true - }() - // Perform ENI cleanup after fixed time intervals till shut down variable is set to true on receiving the shutdown - // signal - for !e.shutdown { - e.cleanUpAvailableENIs() - time.Sleep(config.ENICleanUpInterval) - } - - return nil -} - -// cleanUpAvailableENIs describes all the network interfaces in available status that are created by the controller, -// on seeing the a network interface for the first time, it is added to the map of available network interfaces, on -// seeing the network interface for the second time the network interface is deleted. This ensures that we are deleting -// the network interfaces that have been in available for upto the time interval between running the clean up routine. -// This prevents the clean up routine to remove ENIs that are created by another routines and are yet not attached to -// an instance or associated with a trunk interface -// Example, -// 1st cycle, Describe Available NetworkInterface Result - Interface 1, Interface 2, Interface 3 -// 2nd cycle, Describe Available NetworkInterface Result - Interface 2, Interface 3 -// In the second cycle we can conclude that Interface 2 and 3 are leaked because they have been sitting for the time -// interval between cycle 1 and 2 and hence can be safely deleted. And we can also conclude that Interface 1 was -// created but not attached at the the time when 1st cycle ran and hence it should not be deleted. -func (e *ENICleaner) cleanUpAvailableENIs() { - vpcrcAvailableCount := 0 - vpccniAvailableCount := 0 - leakedENICount := 0 - - describeNetworkInterfaceIp := &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("status"), - Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, - }, - { - Name: aws.String("tag:" + e.clusterNameTagKey), - Values: []*string{aws.String(config.ClusterNameTagValue)}, - }, - { - Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), - Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, - config.NetworkInterfaceOwnerVPCCNITagValue}), - }, - { - Name: aws.String("vpc-id"), - Values: []*string{aws.String(e.VPCID)}, - }, - }, - } - - availableENIs := make(map[string]struct{}) - - for { - describeNetworkInterfaceOp, err := e.EC2Wrapper.DescribeNetworkInterfaces(describeNetworkInterfaceIp) - if err != nil { - e.Log.Error(err, "failed to describe network interfaces, will retry") - return - } - - for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces { - if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists { - // Increment promethues metrics for number of leaked ENIs cleaned up - if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool { - return *tag.Key == config.NetworkInterfaceOwnerTagKey - }); tagIdx != -1 { - switch *networkInterface.TagSet[tagIdx].Value { - case config.NetworkInterfaceOwnerTagValue: - vpcrcAvailableCount += 1 - case config.NetworkInterfaceOwnerVPCCNITagValue: - vpccniAvailableCount += 1 - default: - // We should not hit this case as we only filter for relevant tag values, log error and continue if unexpected ENIs found - e.Log.Error(fmt.Errorf("found available ENI not created by VPC-CNI/VPC-RC"), "eniID", *networkInterface.NetworkInterfaceId) - continue - } - } - - // The ENI in available state has been sitting for at least the eni clean up interval and it should - // be removed - _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ - NetworkInterfaceId: networkInterface.NetworkInterfaceId, - }) - if err != nil { - if !strings.Contains(err.Error(), ec2Errors.NotFoundInterfaceID) { // ignore InvalidNetworkInterfaceID.NotFound error - // append err and continue, we will retry deletion in the next period/reconcile - leakedENICount += 1 - - e.Log.Error(err, "failed to delete the dangling network interface", - "id", *networkInterface.NetworkInterfaceId) - } - continue - } - e.Log.Info("deleted dangling ENI successfully", - "eni id", networkInterface.NetworkInterfaceId) - } else { - // Seeing the ENI for the first time, add it to the new list of available network interfaces - availableENIs[*networkInterface.NetworkInterfaceId] = struct{}{} - e.Log.V(1).Info("adding eni to to the map of available ENIs, will be removed if present in "+ - "next run too", "id", *networkInterface.NetworkInterfaceId) - } - } - - if describeNetworkInterfaceOp.NextToken == nil { - break - } - - describeNetworkInterfaceIp.NextToken = describeNetworkInterfaceOp.NextToken - } - - // Update leaked ENI metrics - vpcrcAvailableENICnt.Set(float64(vpcrcAvailableCount)) - vpccniAvailableENICnt.Set(float64(vpccniAvailableCount)) - leakedENICnt.Set(float64(leakedENICount)) - // Set the available ENIs to the list of ENIs seen in the current cycle - e.availableENIs = availableENIs -} diff --git a/pkg/aws/ec2/api/eni_cleanup_test.go b/pkg/aws/ec2/api/eni_cleanup_test.go deleted file mode 100644 index e00127c0..00000000 --- a/pkg/aws/ec2/api/eni_cleanup_test.go +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"). You may -// not use this file except in compliance with the License. A copy of the -// License is located at -// -// http://aws.amazon.com/apache2.0/ -// -// or in the "license" file accompanying this file. This file is distributed -// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language governing -// permissions and limitations under the License. - -package api - -import ( - "context" - "fmt" - "reflect" - "testing" - - mock_api "github.com/aws/amazon-vpc-resource-controller-k8s/mocks/amazon-vcp-resource-controller-k8s/pkg/aws/ec2/api" - "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" - - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/service/ec2" - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/assert" - "sigs.k8s.io/controller-runtime/pkg/log/zap" -) - -var ( - mockClusterName = "cluster-name" - mockClusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, mockClusterName) - - mockNetworkInterfaceId1 = "eni-000000000000000" - mockNetworkInterfaceId2 = "eni-000000000000001" - mockNetworkInterfaceId3 = "eni-000000000000002" - - mockVPCID = "vpc-0000000000000000" - - mockDescribeNetworkInterfaceIp = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("status"), - Values: []*string{aws.String(ec2.NetworkInterfaceStatusAvailable)}, - }, - { - Name: aws.String("tag:" + mockClusterNameTagKey), - Values: []*string{aws.String(config.ClusterNameTagValue)}, - }, - { - Name: aws.String("tag:" + config.NetworkInterfaceOwnerTagKey), - Values: aws.StringSlice([]string{config.NetworkInterfaceOwnerTagValue, - config.NetworkInterfaceOwnerVPCCNITagValue}), - }, - { - Name: aws.String("vpc-id"), - Values: []*string{aws.String(mockVPCID)}, - }, - }, - } - mockDescribeInterfaceOpWith1And2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId2}, - }, - } - mockDescribeInterfaceOpWith1And3 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{ - {NetworkInterfaceId: &mockNetworkInterfaceId1}, - {NetworkInterfaceId: &mockNetworkInterfaceId3}, - }, - } -) - -func getMockENICleaner(ctrl *gomock.Controller) (*ENICleaner, *mock_api.MockEC2Wrapper) { - mockEC2Wrapper := mock_api.NewMockEC2Wrapper(ctrl) - return &ENICleaner{ - EC2Wrapper: mockEC2Wrapper, - availableENIs: map[string]struct{}{}, - Log: zap.New(zap.UseDevMode(true)), - VPCID: mockVPCID, - clusterNameTagKey: mockClusterNameTagKey, - ctx: context.Background(), - }, mockEC2Wrapper -} - -func TestENICleaner_cleanUpAvailableENIs(t *testing.T) { - ctrl := gomock.NewController(t) - eniCleaner, mockWrapper := getMockENICleaner(ctrl) - - gomock.InOrder( - // Return network interface 1 and 2 in first cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). - Return(mockDescribeInterfaceOpWith1And2, nil), - // Return network interface 1 and 3 in the second cycle - mockWrapper.EXPECT().DescribeNetworkInterfaces(mockDescribeNetworkInterfaceIp). - Return(mockDescribeInterfaceOpWith1And3, nil), - // Expect to delete the network interface 1 - mockWrapper.EXPECT().DeleteNetworkInterface( - &ec2.DeleteNetworkInterfaceInput{NetworkInterfaceId: &mockNetworkInterfaceId1}).Return(nil, nil), - ) - - // Run 1st cycle, network interface 1 and 2 should be added to the map of available ENIs - eniCleaner.cleanUpAvailableENIs() - assert.True(t, reflect.DeepEqual( - map[string]struct{}{mockNetworkInterfaceId1: {}, mockNetworkInterfaceId2: {}}, eniCleaner.availableENIs)) - - // Run the second cycle, this time network interface 1 should be deleted and network interface 3 added to list - eniCleaner.cleanUpAvailableENIs() - assert.True(t, reflect.DeepEqual( - map[string]struct{}{mockNetworkInterfaceId3: {}}, eniCleaner.availableENIs)) -} - -// TestENICleaner_StartENICleaner_Shutdown tests that ENICleaner would not start if shutdown is set to true. -func TestENICleaner_StartENICleaner_Shutdown(t *testing.T) { - ctrl := gomock.NewController(t) - eniCleaner, _ := getMockENICleaner(ctrl) - - eniCleaner.shutdown = true - - eniCleaner.Start(context.TODO()) -} diff --git a/pkg/aws/ec2/api/helper.go b/pkg/aws/ec2/api/helper.go index 14a7864f..e06e6b06 100644 --- a/pkg/aws/ec2/api/helper.go +++ b/pkg/aws/ec2/api/helper.go @@ -93,6 +93,7 @@ type EC2APIHelper interface { GetInstanceDetails(instanceId *string) (*ec2.Instance, error) AssignIPv4ResourcesAndWaitTillReady(eniID string, resourceType config.ResourceType, count int) ([]string, error) UnassignIPv4Resources(eniID string, resourceType config.ResourceType, resources []string) error + DisassociateTrunkInterface(associationID *string) error } // CreateNetworkInterface creates a new network interface @@ -101,7 +102,7 @@ func (h *ec2APIHelper) CreateNetworkInterface(description *string, subnetId *str eniDescription := CreateENIDescriptionPrefix + *description var ec2SecurityGroups []*string - if securityGroups != nil && len(securityGroups) != 0 { + if len(securityGroups) > 0 { // Only add security groups if there are one or more security group provided, otherwise API call will fail instead // of creating the interface with default security groups ec2SecurityGroups = aws.StringSlice(securityGroups) @@ -405,10 +406,7 @@ func (h *ec2APIHelper) WaitForNetworkInterfaceStatusChange(networkInterfaceId *s err := retry.OnError(waitForENIAttachment, func(err error) bool { - if err == ErrRetryAttachmentStatusCheck { - return true - } - return false + return err == ErrRetryAttachmentStatusCheck }, func() error { interfaces, err := h.DescribeNetworkInterfaces([]*string{networkInterfaceId}) if err == nil && len(interfaces) == 1 { @@ -603,7 +601,6 @@ func (h *ec2APIHelper) GetBranchNetworkInterface(trunkID, subnetID *string) ([]* describeNetworkInterfacesInput.NextToken = describeNetworkInterfaceOutput.NextToken } - return nwInterfaces, nil } @@ -623,3 +620,10 @@ func (h *ec2APIHelper) DetachAndDeleteNetworkInterface(attachmentID *string, nwI } return nil } + +func (h *ec2APIHelper) DisassociateTrunkInterface(associationID *string) error { + input := &ec2.DisassociateTrunkInterfaceInput{ + AssociationId: associationID, + } + return h.ec2Wrapper.DisassociateTrunkInterface(input) +} diff --git a/pkg/aws/ec2/api/helper_test.go b/pkg/aws/ec2/api/helper_test.go index 6981c99a..390ad983 100644 --- a/pkg/aws/ec2/api/helper_test.go +++ b/pkg/aws/ec2/api/helper_test.go @@ -67,7 +67,7 @@ var ( eniDescription = "mock description of eni" eniDescriptionWithPrefix = "aws-k8s-" + eniDescription - mockError = fmt.Errorf("failed to do ec2 call") + errMock = fmt.Errorf("failed to do ec2 call") ) var ( @@ -177,9 +177,7 @@ var ( TagSet: branchTag2, } - tokenID = "token" - - describeTrunkInterfaceInput1 = &ec2.DescribeNetworkInterfacesInput{ + describeTrunkInterfaceInput = &ec2.DescribeNetworkInterfacesInput{ Filters: []*ec2.Filter{ { Name: aws.String("tag:" + config.TrunkENIIDTag), @@ -191,26 +189,9 @@ var ( }, }, } - describeTrunkInterfaceInput2 = &ec2.DescribeNetworkInterfacesInput{ - Filters: []*ec2.Filter{ - { - Name: aws.String("tag:" + config.TrunkENIIDTag), - Values: []*string{&trunkInterfaceId}, - }, - { - Name: aws.String("subnet-id"), - Values: aws.StringSlice([]string{subnetId}), - }, - }, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput1 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1}, - NextToken: &tokenID, - } - describeTrunkInterfaceOutput2 = &ec2.DescribeNetworkInterfacesOutput{ - NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface2}, + describeTrunkInterfaceOutput = &ec2.DescribeNetworkInterfacesOutput{ + NetworkInterfaces: []*ec2.NetworkInterface{&networkInterface1, &networkInterface2}, } describeTrunkInterfaceAssociationsInput = &ec2.DescribeTrunkInterfaceAssociationsInput{ @@ -413,13 +394,13 @@ func TestEc2APIHelper_AssociateBranchToTrunk_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) // Return empty association response - mockWrapper.EXPECT().AssociateTrunkInterface(associateTrunkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().AssociateTrunkInterface(associateTrunkInterfaceInput).Return(nil, errMock) mockWrapper.EXPECT().CreateNetworkInterfacePermission(createNetworkInterfacePermissionInputBranch). Return(nil, nil) _, err := ec2ApiHelper.AssociateBranchToTrunk(&trunkInterfaceId, &branchInterfaceId, vlanId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_CreateNetworkInterface_NoSecondaryIP tests network interface creation when no secondary IPs are @@ -502,7 +483,7 @@ func TestEc2APIHelper_CreateNetworkInterface_WithSecondaryIPAndPrefixCount_Error createNetworkInterfaceInput.SecondaryPrivateIpAddressCount = nil createNetworkInterfaceInput.Ipv4PrefixCount = nil - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_CreateNetworkInterface_TypeTrunk tests network interface creation with the interface type trunk @@ -550,11 +531,11 @@ func TestEc2APIHelper_CreateNetworkInterface_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(nil, errMock) _, err := ec2ApiHelper.CreateNetworkInterface(&eniDescription, &subnetId, securityGroups, tags, nil, nil) - assert.Error(t, err, mockError) + assert.Error(t, err, errMock) } // TestEc2APIHelper_DeleteNetworkInterface tests delete network interface returns correct response in case of valid @@ -579,10 +560,10 @@ func TestEc2APIHelper_DeleteNetworkInterface_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, mockError).Times(maxRetryOnError) + mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, errMock).Times(maxRetryOnError) err := ec2ApiHelper.DeleteNetworkInterface(&branchInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DeleteNetworkInterface_ErrorThenSuccess tests that if delete network call fails initially and @@ -594,7 +575,7 @@ func TestEc2APIHelper_DeleteNetworkInterface_ErrorThenSuccess(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) gomock.InOrder( - mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, mockError).Times(2), + mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, errMock).Times(2), mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, nil).Times(1), ) @@ -634,10 +615,10 @@ func TestEc2APIHelper_GetSubnet_Error(t *testing.T) { defer ctrl.Finish() ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeSubnets(describeSubnetInput).Return(nil, mockError) + mockWrapper.EXPECT().DescribeSubnets(describeSubnetInput).Return(nil, errMock) _, err := ec2ApiHelper.GetSubnet(&subnetId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_GetNetworkInterfaceOfInstance tests that describe network interface returns no errors @@ -664,10 +645,10 @@ func TestEc2APIHelper_GetNetworkInterfaceOfInstance_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeInstances(describeNetworkInterfaceInputUsingInstanceId).Return(nil, mockError) + mockWrapper.EXPECT().DescribeInstances(describeNetworkInterfaceInputUsingInstanceId).Return(nil, errMock) _, err := ec2ApiHelper.GetInstanceNetworkInterface(&instanceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DescribeNetworkInterfaces tests describe network interface call works as expected under @@ -695,10 +676,10 @@ func TestEc2APIHelper_DescribeNetworkInterfaces_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().DescribeNetworkInterfaces(describeNetworkInterfaceInputUsingInterfaceId). - Return(nil, mockError) + Return(nil, errMock) _, err := ec2ApiHelper.DescribeNetworkInterfaces([]*string{&branchInterfaceId, &branchInterfaceId2}) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DescribeTrunkInterfaceAssociation tests that the describe trunk interface association returns @@ -741,10 +722,10 @@ func TestEc2APIHelper_DescribeTrunkInterfaceAssociation_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().DescribeTrunkInterfaceAssociations(describeTrunkInterfaceAssociationsInput). - Return(nil, mockError) + Return(nil, errMock) _, err := ec2ApiHelper.DescribeTrunkInterfaceAssociation(&trunkInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } func TestEc2APIHelper_CreateAndAttachNetworkInterface(t *testing.T) { @@ -782,7 +763,7 @@ func TestEc2APIHelper_CreateAndAttachNetworkInterface_DeleteOnAttachFailed(t *te ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(createNetworkInterfaceOutput, nil) - mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(attachNetworkInterfaceOutput, mockError) + mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(attachNetworkInterfaceOutput, errMock) // Test delete is called mockWrapper.EXPECT().DeleteNetworkInterface(deleteNetworkInterfaceInput).Return(nil, nil) @@ -804,7 +785,7 @@ func TestEc2APIHelper_CreateAndAttachNetworkInterface_DeleteOnSetTerminationFail mockWrapper.EXPECT().CreateNetworkInterface(createNetworkInterfaceInput).Return(createNetworkInterfaceOutput, nil) mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(attachNetworkInterfaceOutput, nil) - mockWrapper.EXPECT().ModifyNetworkInterfaceAttribute(modifyNetworkInterfaceAttributeInput).Return(nil, mockError) + mockWrapper.EXPECT().ModifyNetworkInterfaceAttribute(modifyNetworkInterfaceAttributeInput).Return(nil, errMock) // Test detach and delete is called mockWrapper.EXPECT().DetachNetworkInterface(detachNetworkInterfaceInput).Return(nil, nil) @@ -841,10 +822,10 @@ func TestEc2APIHelper_SetDeleteOnTermination_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) mockWrapper.EXPECT().ModifyNetworkInterfaceAttribute(modifyNetworkInterfaceAttributeInput). - Return(nil, mockError) + Return(nil, errMock) err := ec2ApiHelper.SetDeleteOnTermination(&attachmentId, &branchInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AttachNetworkInterfaceToInstance no error is returned when valid inputs are passed @@ -884,10 +865,10 @@ func TestEC2APIHelper_AttachNetworkInterfaceToInstance_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().AttachNetworkInterface(attachNetworkInterfaceInput).Return(nil, errMock) _, err := ec2ApiHelper.AttachNetworkInterfaceToInstance(&instanceId, &branchInterfaceId, &deviceIndex) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DetachNetworkInterfaceFromInstance tests ec2 api call returns no error on valid input @@ -938,10 +919,10 @@ func TestEC2ADIHelper_WaitForNetworkInterfaceStatusChange_NonRetryableError(t *t statusAvailable := "available" mockWrapper.EXPECT().DescribeNetworkInterfaces(describeNetworkInterfaceInputUsingOneInterfaceId). - Return(nil, mockError) + Return(nil, errMock) err := ec2ApiHelper.WaitForNetworkInterfaceStatusChange(&branchInterfaceId, statusAvailable) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEc2APIHelper_DetachAndDeleteNetworkInterface tests the ec2 api calls are called in order with the desired input @@ -974,10 +955,10 @@ func TestEc2APIHelper_DetachAndDeleteNetworkInterface_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DetachNetworkInterface(detachNetworkInterfaceInput).Return(nil, mockError) + mockWrapper.EXPECT().DetachNetworkInterface(detachNetworkInterfaceInput).Return(nil, errMock) err := ec2ApiHelper.DetachAndDeleteNetworkInterface(&attachmentId, &branchInterfaceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } @@ -1019,10 +1000,10 @@ func TestEC2APIHelper_GetInstanceDetails_Error(t *testing.T) { ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeInstances(describeInstanceInput).Return(nil, mockError) + mockWrapper.EXPECT().DescribeInstances(describeInstanceInput).Return(nil, errMock) _, err := ec2ApiHelper.GetInstanceDetails(&instanceId) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Address tests that once new IP addresses are assigned they are returned @@ -1067,11 +1048,11 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Address_Error( ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInput).Return(nil, mockError) + mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInput).Return(nil, errMock) _, err := ec2ApiHelper.AssignIPv4ResourcesAndWaitTillReady(eniID, config.ResourceTypeIPv4Address, 2) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Error tests that error is returned if the assign private IP call @@ -1082,11 +1063,11 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Error(t ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInputPrefix).Return(nil, mockError) + mockWrapper.EXPECT().AssignPrivateIPAddresses(assignPrivateIPInputPrefix).Return(nil, errMock) _, err := ec2ApiHelper.AssignIPv4ResourcesAndWaitTillReady(eniID, config.ResourceTypeIPv4Prefix, 2) - assert.Error(t, mockError, err) + assert.Error(t, errMock, err) } // TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Address_AttachedAfterSecondDescribe tests if the describe call is called @@ -1190,14 +1171,13 @@ func TestEC2APIHelper_AssignIPv4ResourcesAndWaitTillReady_TypeIPv4Prefix_Describ } // TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults returns the branch interface when paginated results is returned -func TestEc2APIHelper_GetBranchNetworkInterface_PaginatedResults(t *testing.T) { +func TestEc2APIHelper_GetBranchNetworkInterface(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() ec2ApiHelper, mockWrapper := getMockWrapper(ctrl) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput1).Return(describeTrunkInterfaceOutput1, nil) - mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput2).Return(describeTrunkInterfaceOutput2, nil) + mockWrapper.EXPECT().DescribeNetworkInterfaces(describeTrunkInterfaceInput).Return(describeTrunkInterfaceOutput, nil) branchInterfaces, err := ec2ApiHelper.GetBranchNetworkInterface(&trunkInterfaceId, &subnetId) assert.NoError(t, err) diff --git a/pkg/aws/ec2/api/wrapper.go b/pkg/aws/ec2/api/wrapper.go index 29e93156..b7c15cc4 100644 --- a/pkg/aws/ec2/api/wrapper.go +++ b/pkg/aws/ec2/api/wrapper.go @@ -20,7 +20,9 @@ import ( "time" "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/utils" + "k8s.io/apimachinery/pkg/util/wait" + "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/credentials" "github.com/aws/aws-sdk-go/aws/credentials/stscreds" @@ -51,12 +53,14 @@ type EC2Wrapper interface { AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddressesInput) (*ec2.UnassignPrivateIpAddressesOutput, error) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfacesInput) (*ec2.DescribeNetworkInterfacesOutput, error) + DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) CreateTags(input *ec2.CreateTagsInput) (*ec2.CreateTagsOutput, error) DescribeSubnets(input *ec2.DescribeSubnetsInput) (*ec2.DescribeSubnetsOutput, error) AssociateTrunkInterface(input *ec2.AssociateTrunkInterfaceInput) (*ec2.AssociateTrunkInterfaceOutput, error) DescribeTrunkInterfaceAssociations(input *ec2.DescribeTrunkInterfaceAssociationsInput) (*ec2.DescribeTrunkInterfaceAssociationsOutput, error) ModifyNetworkInterfaceAttribute(input *ec2.ModifyNetworkInterfaceAttributeInput) (*ec2.ModifyNetworkInterfaceAttributeOutput, error) CreateNetworkInterfacePermission(input *ec2.CreateNetworkInterfacePermissionInput) (*ec2.CreateNetworkInterfacePermissionOutput, error) + DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error } var ( @@ -252,7 +256,7 @@ var ( ec2AssociateTrunkInterfaceAPIErrCnt = prometheus.NewCounter( prometheus.CounterOpts{ Name: "ec2_associate_trunk_interface_api_err_count", - Help: "The number of errors encountered while disassociating Trunk with Branch ENI", + Help: "The number of errors encountered while associating Trunk with Branch ENI", }, ) @@ -306,6 +310,54 @@ var ( }, ) + ec2DisassociateTrunkInterfaceCallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_disassociate_trunk_interface_api_req_count", + Help: "The number of calls made to EC2 to remove association between a branch and trunk network interface", + }, + ) + + ec2DisassociateTrunkInterfaceErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_disassociate_trunk_interface_api_err_count", + Help: "The number of errors encountered while removing association between a branch and trunk network interface", + }, + ) + + VpcCniAvailableClusterENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_cni_created_available_eni_count", + Help: "The number of available ENIs created by VPC-CNI that will tried to be deleted by the controller", + }, + ) + + VpcRcAvailableClusterENICnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "vpc_rc_created_available_eni_count", + Help: "The number of available ENIs created by VPC-RC that will tried to be deleted by the controller", + }, + ) + + LeakedENIClusterCleanupCnt = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "leaked_eni_count", + Help: "The number of available ENIs that failed to be deleted by the controller", + }, + ) + + ec2DescribeNetworkInterfacesPagesAPICallCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_call_count", + Help: "The number of calls made to describe network interfaces (paginated)", + }, + ) + ec2DescribeNetworkInterfacesPagesAPIErrCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "ec2_describe_network_interfaces_pages_api_err_count", + Help: "The number of errors encountered while making call to describe network interfaces (paginated)", + }, + ) + prometheusRegistered = false ) @@ -344,9 +396,13 @@ func prometheusRegister() { ec2modifyNetworkInterfaceAttributeAPICallCnt, ec2modifyNetworkInterfaceAttributeAPIErrCnt, ec2APICallLatencies, - vpccniAvailableENICnt, - vpcrcAvailableENICnt, - leakedENICnt, + ec2DisassociateTrunkInterfaceCallCnt, + ec2DisassociateTrunkInterfaceErrCnt, + VpcRcAvailableClusterENICnt, + VpcCniAvailableClusterENICnt, + LeakedENIClusterCleanupCnt, + ec2DescribeNetworkInterfacesPagesAPICallCnt, + ec2DescribeNetworkInterfacesPagesAPIErrCnt, ) prometheusRegistered = true @@ -641,6 +697,37 @@ func (e *ec2Wrapper) DescribeNetworkInterfaces(input *ec2.DescribeNetworkInterfa return describeNetworkInterfacesOutput, err } +// DescribeNetworkInterfacesPages returns network interfaces that match the filters specified in the input with MaxResult set to 1000(max value) +// The API is not used today, adding it for future use +func (e *ec2Wrapper) DescribeNetworkInterfacesPages(input *ec2.DescribeNetworkInterfacesInput) ([]*ec2.NetworkInterface, error) { + var networkInterfaces []*ec2.NetworkInterface + input.MaxResults = aws.Int64(config.DescribeNetworkInterfacesMaxResults) + + start := time.Now() + if err := e.userServiceClient.DescribeNetworkInterfacesPages(input, func(output *ec2.DescribeNetworkInterfacesOutput, _ bool) bool { + ec2APICallCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPICallCnt.Inc() + //Currently only network interface ID and the tag set is require, only add required details to avoid consuming extra memory + for _, nwInterface := range output.NetworkInterfaces { + networkInterfaces = append(networkInterfaces, &ec2.NetworkInterface{ + NetworkInterfaceId: nwInterface.NetworkInterfaceId, + TagSet: nwInterface.TagSet, + }) + } + // Add jitter to avoid EC2 API throttling in the account + time.Sleep(wait.Jitter(500*time.Millisecond, 0.5)) + return true + + }); err != nil { + ec2APIErrCnt.Inc() + ec2DescribeNetworkInterfacesPagesAPIErrCnt.Inc() + return nil, err + } + ec2APICallLatencies.WithLabelValues("describe_network_interfaces_pages").Observe(timeSinceMs(start)) + + return networkInterfaces, nil +} + func (e *ec2Wrapper) AssignPrivateIPAddresses(input *ec2.AssignPrivateIpAddressesInput) (*ec2.AssignPrivateIpAddressesOutput, error) { start := time.Now() assignPrivateIPAddressesOutput, err := e.userServiceClient.AssignPrivateIpAddresses(input) @@ -674,9 +761,9 @@ func (e *ec2Wrapper) UnassignPrivateIPAddresses(input *ec2.UnassignPrivateIpAddr // Metric updates ec2APICallCnt.Inc() ec2UnassignPrivateIPAddressAPICallCnt.Inc() - if input.PrivateIpAddresses != nil && len(input.PrivateIpAddresses) != 0 { + if len(input.PrivateIpAddresses) > 0 { numUnassignedSecondaryIPAddress.Add(float64(len(input.PrivateIpAddresses))) - } else if input.Ipv4Prefixes != nil && len(input.Ipv4Prefixes) != 0 { + } else if len(input.Ipv4Prefixes) > 0 { numUnassignedIPv4Prefixes.Add(float64(len(input.Ipv4Prefixes))) } @@ -822,3 +909,19 @@ func (e *ec2Wrapper) getRegionalStsEndpoint(partitionID, region string) (endpoin } return res, nil } + +func (e *ec2Wrapper) DisassociateTrunkInterface(input *ec2.DisassociateTrunkInterfaceInput) error { + start := time.Now() + // Using the instance role + _, err := e.instanceServiceClient.DisassociateTrunkInterface(input) + ec2APICallLatencies.WithLabelValues("disassociate_branch_from_trunk").Observe(timeSinceMs(start)) + + ec2APICallCnt.Inc() + ec2DisassociateTrunkInterfaceCallCnt.Inc() + + if err != nil { + ec2APIErrCnt.Inc() + ec2DisassociateTrunkInterfaceErrCnt.Inc() + } + return err +} diff --git a/pkg/config/type.go b/pkg/config/type.go index 348c8a2d..88f369f6 100644 --- a/pkg/config/type.go +++ b/pkg/config/type.go @@ -67,6 +67,7 @@ const ( NetworkInterfaceOwnerTagKey = "eks:eni:owner" NetworkInterfaceOwnerTagValue = "eks-vpc-resource-controller" NetworkInterfaceOwnerVPCCNITagValue = "amazon-vpc-cni" + NetworkInterfaceNodenameKey = "node.k8s.amazonaws.com/nodename" CNINodeClusterNameKey = "cluster.k8s.amazonaws.com/name" ) @@ -89,6 +90,8 @@ const ( VpcCNIDaemonSetName = "aws-node" OldVPCControllerDeploymentName = "vpc-resource-controller" BranchENICooldownPeriodKey = "branch-eni-cooldown" + // DescribeNetworkInterfacesMaxResults defines the max number of requests to return for DescribeNetworkInterfaces API call + DescribeNetworkInterfacesMaxResults = int64(1000) ) type ResourceType string diff --git a/pkg/provider/branch/provider.go b/pkg/provider/branch/provider.go index c79780b1..9ba8eb8c 100644 --- a/pkg/provider/branch/provider.go +++ b/pkg/provider/branch/provider.go @@ -179,8 +179,8 @@ func (b *branchENIProvider) InitResource(instance ec2.EC2Instance) error { } branchProviderOperationLatency.WithLabelValues(operationInitTrunk, "1").Observe(timeSinceSeconds(start)) - // Add the Trunk ENI to cache - if err := b.addTrunkToCache(nodeName, trunkENI); err != nil { + // Add the Trunk ENI to cache if it does not already exist + if err := b.addTrunkToCache(nodeName, trunkENI); err != nil && err != ErrTrunkExistInCache { branchProviderOperationsErrCount.WithLabelValues("add_trunk_to_cache").Inc() return err } @@ -238,12 +238,14 @@ func (b *branchENIProvider) ProcessAsyncJob(job interface{}) (ctrl.Result, error // DeleteNode deletes all the cached branch ENIs associated with the trunk and removes the trunk from the cache. func (b *branchENIProvider) DeleteNode(nodeName string) (ctrl.Result, error) { - trunkENI, isPresent := b.getTrunkFromCache(nodeName) + _, isPresent := b.getTrunkFromCache(nodeName) if !isPresent { return ctrl.Result{}, fmt.Errorf("failed to find node %s", nodeName) } - trunkENI.DeleteAllBranchENIs() + // At this point, the finalizer routine should have deleted all available branch ENIs + // Any leaked ENIs will be deleted by the periodic cleanup routine if cluster is active + // remove trunk from cache and de-initializer the resource provider b.removeTrunkFromCache(nodeName) b.log.Info("de-initialized resource provider successfully", "nodeName", nodeName) diff --git a/pkg/provider/branch/trunk/trunk.go b/pkg/provider/branch/trunk/trunk.go index ddd814b4..35ea209b 100644 --- a/pkg/provider/branch/trunk/trunk.go +++ b/pkg/provider/branch/trunk/trunk.go @@ -102,8 +102,6 @@ type TrunkENI interface { Reconcile(pods []v1.Pod) bool // PushENIsToFrontOfDeleteQueue pushes the eni network interfaces to the front of the delete queue PushENIsToFrontOfDeleteQueue(*v1.Pod, []*ENIDetails) - // DeleteAllBranchENIs deletes all the branch ENI associated with the trunk and also clears the cool down queue - DeleteAllBranchENIs() // Introspect returns the state of the Trunk ENI Introspect() IntrospectResponse } @@ -126,6 +124,8 @@ type trunkENI struct { uidToBranchENIMap map[string][]*ENIDetails // deleteQueue is the queue of ENIs that are being cooled down before being deleted deleteQueue []*ENIDetails + // nodeName tag is the tag added to trunk and branch ENIs created on the node + nodeNameTag []*awsEC2.Tag } // PodENI is a json convertible structure that stores the Branch ENI details that can be @@ -147,6 +147,8 @@ type ENIDetails struct { deletionTimeStamp time.Time // deleteRetryCount is the deleteRetryCount int + // ID of association between branch and trunk ENI + AssociationID string `json:"associationID"` } type IntrospectResponse struct { @@ -176,6 +178,12 @@ func NewTrunkENI(logger logr.Logger, instance ec2.EC2Instance, helper api.EC2API ec2ApiHelper: helper, instance: instance, uidToBranchENIMap: make(map[string][]*ENIDetails), + nodeNameTag: []*awsEC2.Tag{ + { + Key: aws.String(config.NetworkInterfaceNodenameKey), + Value: aws.String(instance.Name()), + }, + }, } } @@ -231,7 +239,7 @@ func (t *trunkENI) InitTrunk(instance ec2.EC2Instance, podList []v1.Pod) error { } trunk, err := t.ec2ApiHelper.CreateAndAttachNetworkInterface(&instanceID, aws.String(t.instance.SubnetID()), - t.instance.CurrentInstanceSecurityGroups(), nil, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil) + t.instance.CurrentInstanceSecurityGroups(), t.nodeNameTag, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil) if err != nil { trunkENIOperationsErrCount.WithLabelValues("create_trunk_eni").Inc() return err @@ -418,6 +426,8 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st Value: &t.trunkENIId, }, } + // append the nodeName tag to add to branch ENIs + tags = append(tags, t.nodeNameTag...) // Create Branch ENI nwInterface, err = t.ec2ApiHelper.CreateNetworkInterface(&BranchEniDescription, aws.String(t.instance.SubnetID()), securityGroups, tags, nil, nil) @@ -444,12 +454,14 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st newENIs = append(newENIs, newENI) // Associate Branch to trunk - _, err = t.ec2ApiHelper.AssociateBranchToTrunk(&t.trunkENIId, nwInterface.NetworkInterfaceId, vlanID) + var associationOutput *awsEC2.AssociateTrunkInterfaceOutput + associationOutput, err = t.ec2ApiHelper.AssociateBranchToTrunk(&t.trunkENIId, nwInterface.NetworkInterfaceId, vlanID) if err != nil { err = fmt.Errorf("associating branch to trunk, %w", err) trunkENIOperationsErrCount.WithLabelValues("associate_branch").Inc() break } + newENI.AssociationID = *associationOutput.InterfaceAssociation.AssociationId } if err != nil { @@ -467,31 +479,6 @@ func (t *trunkENI) CreateAndAssociateBranchENIs(pod *v1.Pod, securityGroups []st return newENIs, nil } -// DeleteAllBranchENIs deletes all the branch ENIs associated with the trunk and all the ENIs present in the cool down -// queue, this is the last API call to the the Trunk ENI before it is removed from cache -func (t *trunkENI) DeleteAllBranchENIs() { - // Delete all the branch used by the pod on this trunk ENI - // Since after this call, the trunk will be removed from cache. No need to clean up its branch map - for _, podENIs := range t.uidToBranchENIMap { - for _, eni := range podENIs { - err := t.deleteENI(eni) - if err != nil { - // Just log, if the ENI still exists it can be removed by the dangling ENI cleaner routine - t.log.Error(err, "failed to delete eni", "eni id", eni.ID) - } - } - } - - // Delete all the branch ENI present in the cool down queue - for _, eni := range t.deleteQueue { - err := t.deleteENI(eni) - if err != nil { - // Just log, if the ENI still exists it can be removed by the dangling ENI cleaner routine - t.log.Error(err, "failed to delete eni", "eni id", eni.ID) - } - } -} - // DeleteBranchNetworkInterface deletes the branch network interface and returns an error in case of failure to delete func (t *trunkENI) PushBranchENIsToCoolDownQueue(UID string) { // Lock is required as Reconciler is also performing operation concurrently @@ -545,7 +532,19 @@ func (t *trunkENI) DeleteCooledDownENIs() { // deleteENIs deletes the provided ENIs and frees up the Vlan assigned to then func (t *trunkENI) deleteENI(eniDetail *ENIDetails) (err error) { - // Delete Branch network interface first + // Disassociate branch ENI from trunk if association ID exists and delete branch network interface + if eniDetail.AssociationID != "" { + err = t.ec2ApiHelper.DisassociateTrunkInterface(&eniDetail.AssociationID) + if err != nil { + trunkENIOperationsErrCount.WithLabelValues("disassociate_trunk_error").Inc() + if !strings.Contains(err.Error(), ec2Errors.NotFoundAssociationID) { + t.log.Error(err, "failed to disassociate branch ENI from trunk, will try to delete the branch ENI") + // Not returning error here, fallback to force branch ENI deletion + } else { + t.log.Info("AssociationID not found when disassociating branch from trunk ENI, it is already disassociated so delete the branch ENI") + } + } + } err = t.ec2ApiHelper.DeleteNetworkInterface(&eniDetail.ID) if err != nil { branchENIOperationsFailureCount.WithLabelValues("delete_branch_error").Inc() diff --git a/pkg/provider/branch/trunk/trunk_test.go b/pkg/provider/branch/trunk/trunk_test.go index 49dcaf0d..5ca3a405 100644 --- a/pkg/provider/branch/trunk/trunk_test.go +++ b/pkg/provider/branch/trunk/trunk_test.go @@ -62,8 +62,9 @@ var ( Name: MockPodName1, Namespace: MockPodNamespace1, Annotations: map[string]string{config.ResourceNamePodENI: "[{\"eniId\":\"eni-00000000000000000\",\"ifAddress\":\"FF:FF:FF:FF:FF:FF\",\"privateIp\":\"192.168.0.15\"," + - "\"ipv6Addr\":\"2600::\",\"vlanId\":1,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"},{\"eniId\":\"eni-00000000000000001\",\"ifAddress\":\"" + - "FF:FF:FF:FF:FF:F9\",\"privateIp\":\"192.168.0.16\",\"ipv6Addr\":\"2600::1\",\"vlanId\":2,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"}]"}}, + "\"ipv6Addr\":\"2600::\",\"vlanId\":1,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\",\"AssociationId\":\"trunk-assoc-0000000000000000\"},{\"eniId\":\"eni-00000000000000001\"" + + ",\"ifAddress\":\"FF:FF:FF:FF:FF:F9\",\"privateIp\":\"192.168.0.16\",\"ipv6Addr\":\"2600::1\",\"vlanId\":2,\"subnetCidr\":\"192.168.0.0/16\",\"subnetV6Cidr\":\"2600::/64\"," + + "\"AssociationId\":\"trunk-assoc-0000000000000001\"}]"}}, Spec: v1.PodSpec{NodeName: NodeName}, Status: v1.PodStatus{}, } @@ -93,20 +94,23 @@ var ( SecurityGroups = []string{SecurityGroup1, SecurityGroup2} // Branch Interface 1 - Branch1Id = "eni-00000000000000000" - MacAddr1 = "FF:FF:FF:FF:FF:FF" - BranchIp1 = "192.168.0.15" - BranchV6Ip1 = "2600::" - VlanId1 = 1 + Branch1Id = "eni-00000000000000000" + MacAddr1 = "FF:FF:FF:FF:FF:FF" + BranchIp1 = "192.168.0.15" + BranchV6Ip1 = "2600::" + VlanId1 = 1 + MockAssociationID1 = "trunk-assoc-0000000000000000" + MockAssociationID2 = "trunk-assoc-0000000000000001" EniDetails1 = &ENIDetails{ - ID: Branch1Id, - MACAdd: MacAddr1, - IPV4Addr: BranchIp1, - IPV6Addr: BranchV6Ip1, - VlanID: VlanId1, - SubnetCIDR: SubnetCidrBlock, - SubnetV6CIDR: SubnetV6CidrBlock, + ID: Branch1Id, + MACAdd: MacAddr1, + IPV4Addr: BranchIp1, + IPV6Addr: BranchV6Ip1, + VlanID: VlanId1, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + AssociationID: MockAssociationID1, } branchENIs1 = []*ENIDetails{EniDetails1} @@ -126,13 +130,14 @@ var ( VlanId2 = 2 EniDetails2 = &ENIDetails{ - ID: Branch2Id, - MACAdd: MacAddr2, - IPV4Addr: BranchIp2, - IPV6Addr: BranchV6Ip2, - VlanID: VlanId2, - SubnetCIDR: SubnetCidrBlock, - SubnetV6CIDR: SubnetV6CidrBlock, + ID: Branch2Id, + MACAdd: MacAddr2, + IPV4Addr: BranchIp2, + IPV6Addr: BranchV6Ip2, + VlanID: VlanId2, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + AssociationID: MockAssociationID2, } BranchInterface2 = &awsEc2.NetworkInterface{ @@ -142,8 +147,6 @@ var ( Ipv6Address: &BranchV6Ip2, } - branchENIs2 = []*ENIDetails{EniDetails2} - // Trunk Interface trunkId = "eni-00000000000000002" trunkInterface = &awsEc2.NetworkInterface{ @@ -189,17 +192,27 @@ var ( }, } - trunkAssociationsBranch1And2 = []*awsEc2.TrunkInterfaceAssociation{ - { - BranchInterfaceId: &EniDetails1.ID, - VlanId: aws.Int64(int64(EniDetails1.VlanID)), + mockAssociationOutput1 = &awsEc2.AssociateTrunkInterfaceOutput{ + InterfaceAssociation: &awsEc2.TrunkInterfaceAssociation{ + AssociationId: &MockAssociationID1, }, - { - BranchInterfaceId: &EniDetails2.ID, - VlanId: aws.Int64(int64(EniDetails2.VlanID)), + } + mockAssociationOutput2 = &awsEc2.AssociateTrunkInterfaceOutput{ + InterfaceAssociation: &awsEc2.TrunkInterfaceAssociation{ + AssociationId: &MockAssociationID2, }, } + ENIDetailsMissingAssociationID = &ENIDetails{ + ID: Branch2Id, + MACAdd: MacAddr2, + IPV4Addr: BranchIp2, + IPV6Addr: BranchV6Ip2, + VlanID: VlanId2, + SubnetCIDR: SubnetCidrBlock, + SubnetV6CIDR: SubnetV6CidrBlock, + } + MockError = fmt.Errorf("mock error") ) @@ -229,11 +242,17 @@ func getMockTrunk() trunkENI { log: log, usedVlanIds: make([]bool, MaxAllocatableVlanIds), uidToBranchENIMap: map[string][]*ENIDetails{}, + nodeNameTag: []*awsEc2.Tag{ + { + Key: aws.String(config.NetworkInterfaceNodenameKey), + Value: aws.String(FakeInstance.Name()), + }, + }, } } func TestNewTrunkENI(t *testing.T) { - trunkENI := NewTrunkENI(zap.New(), nil, nil) + trunkENI := NewTrunkENI(zap.New(), FakeInstance, nil) assert.NotNil(t, trunkENI) } @@ -400,34 +419,106 @@ func TestTrunkENI_getBranchInterfaceMap_EmptyList(t *testing.T) { assert.Zero(t, len(branchENIsMap)) } -// TestTrunkENI_deleteENI tests the trunk is deleted and vlan ID freed in case of no errors +// TestTrunkENI_deleteENI tests deleting branch ENI func TestTrunkENI_deleteENI(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.markVlanAssigned(VlanId1) - - ec2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) - - err := trunkENI.deleteENI(EniDetails1) - assert.NoError(t, err) - assert.False(t, trunkENI.usedVlanIds[VlanId1]) -} - -// TestTrunkENI_deleteENI_Fail tests if the ENI deletion fails then the vlan ID is not freed -func TestTrunkENI_deleteENI_Fail(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() + type args struct { + eniDetail *ENIDetails + VlanID int + } + type fields struct { + mockEC2APIHelper *mock_api.MockEC2APIHelper + trunkENI *trunkENI + } + testTrunkENI_deleteENI := []struct { + name string + prepare func(f *fields) + args args + wantErr bool + asserts func(f *fields) + }{ + { + name: "Vland_Freed, verifies VLANID is freed when branch ENI is deleted", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "Vland_NotFreed, verifies VLANID is not freed when branch ENI delete fails", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(MockError) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: true, + asserts: func(f *fields) { + assert.True(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "DisassociateTrunkInterface_Fails, verifies branch ENI is deleted when disassociation fails for backward compatibility", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(MockError) + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil) + }, + args: args{ + eniDetail: EniDetails1, + VlanID: VlanId1, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId1]) + }, + }, + { + name: "MissingAssociationID, verifies DisassociateTrunkInterface is skipped when association ID is missing and branch ENI is deleted for backward compatibility", + prepare: func(f *fields) { + f.mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch2Id).Return(nil) + }, + args: args{ + eniDetail: ENIDetailsMissingAssociationID, + VlanID: VlanId2, + }, + wantErr: false, + asserts: func(f *fields) { + assert.False(t, f.trunkENI.usedVlanIds[VlanId2]) + }, + }, + } - trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.markVlanAssigned(VlanId1) + for _, tt := range testTrunkENI_deleteENI { + t.Run(tt.name, func(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() - ec2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(MockError) + trunkENI, ec2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) + trunkENI.markVlanAssigned(tt.args.VlanID) - err := trunkENI.deleteENI(EniDetails1) - assert.Error(t, MockError, err) - assert.True(t, trunkENI.usedVlanIds[VlanId1]) + f := fields{ + mockEC2APIHelper: ec2APIHelper, + trunkENI: trunkENI, + } + if tt.prepare != nil { + tt.prepare(&f) + } + err := f.trunkENI.deleteENI(tt.args.eniDetail) + assert.Equal(t, err != nil, tt.wantErr) + if tt.asserts != nil { + tt.asserts(&f) + } + }) + } } // TestTrunkENI_DeleteCooledDownENIs_NotCooledDown tests that ENIs that have not cooled down are not deleted @@ -463,7 +554,9 @@ func TestTrunkENI_DeleteCooledDownENIs_NoDeletionTimeStamp(t *testing.T) { trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(nil) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID2).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) @@ -487,6 +580,7 @@ func TestTrunkENI_DeleteCooledDownENIs_CooledDownResource(t *testing.T) { trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil) ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(nil) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) @@ -512,16 +606,17 @@ func TestTrunkENI_DeleteCooledDownENIs_DeleteFailed(t *testing.T) { trunkENI.usedVlanIds[VlanId2] = true trunkENI.deleteQueue = append(trunkENI.deleteQueue, EniDetails1, EniDetails2) - gomock.InOrder( - coolDown.EXPECT().GetCoolDownPeriod().Return(time.Second*60).AnyTimes(), - ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(MockError).Times(MaxDeleteRetries), - ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil), - ) mockK8sAPI := mock_k8s.NewMockK8sWrapper(ctrl) mockK8sAPI.EXPECT().GetConfigMap(config.VpcCniConfigMapName, config.KubeSystemNamespace).Return(createCoolDownMockCM("60"), nil) cooldown.InitCoolDownPeriod(mockK8sAPI, zap.New(zap.UseDevMode(true)).WithName("cooldown")) + coolDown.EXPECT().GetCoolDownPeriod().Return(time.Second * 60).AnyTimes() + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID1).Return(nil).Times(MaxDeleteRetries) + ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails1.ID).Return(MockError).Times(MaxDeleteRetries) + ec2APIHelper.EXPECT().DisassociateTrunkInterface(&MockAssociationID2).Return(nil) + ec2APIHelper.EXPECT().DeleteNetworkInterface(&EniDetails2.ID).Return(nil) + trunkENI.DeleteCooledDownENIs() assert.Zero(t, len(trunkENI.deleteQueue)) } @@ -598,7 +693,7 @@ func TestTrunkENI_InitTrunk(t *testing.T) { f.mockEC2APIHelper.EXPECT().GetInstanceNetworkInterface(&InstanceId).Return([]*awsEc2.InstanceNetworkInterface{}, nil) f.mockInstance.EXPECT().GetHighestUnusedDeviceIndex().Return(freeIndex, nil) f.mockInstance.EXPECT().SubnetID().Return(SubnetId) - f.mockEC2APIHelper.EXPECT().CreateAndAttachNetworkInterface(&InstanceId, &SubnetId, SecurityGroups, nil, + f.mockEC2APIHelper.EXPECT().CreateAndAttachNetworkInterface(&InstanceId, &SubnetId, SecurityGroups, f.trunkENI.nodeNameTag, &freeIndex, &TrunkEniDescription, &InterfaceTypeTrunk, nil).Return(trunkInterface, nil) }, // Pass nil to set the instance to fields.mockInstance in the function later @@ -731,23 +826,6 @@ func TestTrunkENI_InitTrunk(t *testing.T) { } } -// TestTrunkENI_DeleteAllBranchENIs tests all branch ENI associated with the trunk are deleted -func TestTrunkENI_DeleteAllBranchENIs(t *testing.T) { - ctrl := gomock.NewController(t) - defer ctrl.Finish() - - trunkENI, mockEC2APIHelper, _ := getMockHelperInstanceAndTrunkObject(ctrl) - trunkENI.uidToBranchENIMap[PodUID] = branchENIs1 - trunkENI.uidToBranchENIMap[PodUID2] = branchENIs2 - trunkENI.deleteQueue = append(trunkENI.deleteQueue, branchENIs1[0]) - - // Since we added the same branch ENIs in the cool down queue and in the pod to eni map - mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch1Id).Return(nil).Times(2) - mockEC2APIHelper.EXPECT().DeleteNetworkInterface(&Branch2Id).Return(nil) - - trunkENI.DeleteAllBranchENIs() -} - // TestTrunkENI_CreateAndAssociateBranchENIs test branch is created and associated with the trunk and valid eni details // are returned func TestTrunkENI_CreateAndAssociateBranchENIs(t *testing.T) { @@ -763,11 +841,11 @@ func TestTrunkENI_CreateAndAssociateBranchENIs(t *testing.T) { mockInstance.EXPECT().SubnetV6CidrBlock().Return(SubnetV6CidrBlock).Times(2) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan1Tag, nil, nil).Return(BranchInterface1, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil) - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan2Tag, + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil) + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(mockAssociationOutput2, nil) eniDetails, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, SecurityGroups, 2) expectedENIDetails := []*ENIDetails{EniDetails1, EniDetails2} @@ -797,11 +875,11 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_InstanceSecurityGroup(t *testing. mockInstance.EXPECT().CurrentInstanceSecurityGroups().Return(InstanceSecurityGroup) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, InstanceSecurityGroup, - vlan1Tag, nil, nil).Return(BranchInterface1, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil) + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil) mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, InstanceSecurityGroup, - vlan2Tag, nil, nil).Return(BranchInterface2, nil) - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, nil) + append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil) + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(mockAssociationOutput2, nil) eniDetails, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, []string{}, 2) expectedENIDetails := []*ENIDetails{EniDetails1, EniDetails2} @@ -831,16 +909,16 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_ErrorAssociate(t *testing.T) { gomock.InOrder( mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan1Tag, nil, nil).Return(BranchInterface1, nil), - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil), + append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil), + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil), mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, - vlan2Tag, nil, nil).Return(BranchInterface2, nil), + append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface2, nil), mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch2Id, VlanId2).Return(nil, MockError), ) _, err := trunkENI.CreateAndAssociateBranchENIs(MockPod2, SecurityGroups, 2) assert.Error(t, MockError, err) - assert.Equal(t, []*ENIDetails{EniDetails1, EniDetails2}, trunkENI.deleteQueue) + assert.Equal(t, []*ENIDetails{EniDetails1, ENIDetailsMissingAssociationID}, trunkENI.deleteQueue) } // TestTrunkENI_CreateAndAssociateBranchENIs_ErrorCreate tests if error is returned on associate then the created interfaces @@ -858,10 +936,10 @@ func TestTrunkENI_CreateAndAssociateBranchENIs_ErrorCreate(t *testing.T) { mockInstance.EXPECT().SubnetV6CidrBlock().Return(SubnetV6CidrBlock).Times(1) gomock.InOrder( - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan1Tag, + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan1Tag, trunkENI.nodeNameTag...), nil, nil).Return(BranchInterface1, nil), - mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(nil, nil), - mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, vlan2Tag, + mockEC2APIHelper.EXPECT().AssociateBranchToTrunk(&trunkId, &Branch1Id, VlanId1).Return(mockAssociationOutput1, nil), + mockEC2APIHelper.EXPECT().CreateNetworkInterface(&BranchEniDescription, &SubnetId, SecurityGroups, append(vlan2Tag, trunkENI.nodeNameTag...), nil, nil).Return(nil, MockError), )