diff --git a/pkg/cloudprovider/plugins/aws/aws_security.go b/pkg/cloudprovider/plugins/aws/aws_security.go index 923ef375..43cd2115 100644 --- a/pkg/cloudprovider/plugins/aws/aws_security.go +++ b/pkg/cloudprovider/plugins/aws/aws_security.go @@ -446,6 +446,9 @@ func (ec2Cfg *ec2ServiceConfig) updateSecurityGroupMembers(groupCloudSgID *strin } networkInterfacesToModify[*networkInterface.NetworkInterfaceId] = networkInterfaceCloudSgsSetToAttach + } else if !membershipOnly && len(networkInterfaceOtherCloudSgsSet) > 0 { + // remove non-nephe sgs if AT is attached. + networkInterfacesToModify[*networkInterface.NetworkInterfaceId] = networkInterfaceNepheControllerCreatedCloudSgsSet } } else { if isNicAttachedToMemberVM || isNicMemberNetworkInterface { diff --git a/pkg/cloudprovider/plugins/azure/azure_nsg_rules.go b/pkg/cloudprovider/plugins/azure/azure_nsg_rules.go index de55edb0..19c6c963 100644 --- a/pkg/cloudprovider/plugins/azure/azure_nsg_rules.go +++ b/pkg/cloudprovider/plugins/azure/azure_nsg_rules.go @@ -69,6 +69,22 @@ func isAzureRuleAttachedToAtSg(rule *armnetwork.SecurityRule, asg string) bool { return false } +// getEmptyCloudRule returns a *securitygroup.CloudRule object with valid fields based on sync content, except nil for the rule field. +// If no valid options are available, nil is returned. +func getEmptyCloudRule(syncContent *cloudresource.SynchronizationContent) *cloudresource.CloudRule { + for _, rule := range append(syncContent.IngressRules, syncContent.EgressRules...) { + if rule.NpNamespacedName != "" { + emptyRule := &cloudresource.CloudRule{ + NpNamespacedName: rule.NpNamespacedName, + AppliedToGrp: rule.AppliedToGrp, + } + emptyRule.Hash = emptyRule.GetHash() + return emptyRule + } + } + return nil +} + // getUnusedPriority finds and returns the first unused priority starting from startPriority. func getUnusedPriority(existingRulePriority map[int32]struct{}, startPriority int32) int32 { _, ok := existingRulePriority[startPriority] @@ -496,69 +512,77 @@ func convertToAzureAddressPrefix(ruleIPs []*net.IPNet) (*string, []*string) { } // convertToCloudRulesByAppliedToSGName converts Azure rules to securitygroup.CloudRule and split them by security group names. +// It also returns a boolean as the third value indicating whether there are user rules in Nephe priority range or not. func convertToCloudRulesByAppliedToSGName(azureSecurityRules []*armnetwork.SecurityRule, - vnetID string) (map[string][]cloudresource.CloudRule, map[string][]cloudresource.CloudRule) { + vnetID string) (map[string][]cloudresource.CloudRule, map[string][]cloudresource.CloudRule, bool) { nepheControllerATSgNameToIngressRules := make(map[string][]cloudresource.CloudRule) nepheControllerATSgNameToEgressRules := make(map[string][]cloudresource.CloudRule) + removeUserRules := false for _, azureSecurityRule := range azureSecurityRules { - if azureSecurityRule.Properties == nil { + if azureSecurityRule.Properties == nil || *azureSecurityRule.Properties.Priority == vnetToVnetDenyRulePriority { continue } - desc, _ := utils.ExtractCloudDescription(azureSecurityRule.Properties.Description) - // Nephe inbound rule implies destination is AT sg. Nephe outbound rule implies source is AT sg. - // we don't care about syncing rules that doesn't match above pattern, as they will not conflict with any Nephe rules. - if *azureSecurityRule.Properties.Direction == armnetwork.SecurityRuleDirectionInbound { - for _, asg := range azureSecurityRule.Properties.DestinationApplicationSecurityGroups { - _, _, asgName, err := extractFieldsFromAzureResourceID(*asg.ID) - if err != nil { - azurePluginLogger().Error(err, "failed to extract asg name from resource id", "id", *asg.ID) - } - sgName, _, isATSg := utils.IsNepheControllerCreatedSG(asgName) - if !isATSg { - continue - } - sgID := cloudresource.CloudResourceID{ - Name: sgName, - Vpc: vnetID, - } - ingressRule, err := convertFromAzureIngressSecurityRuleToCloudRule(*azureSecurityRule, sgID.String(), vnetID, desc) - if err != nil { - azurePluginLogger().Error(err, "failed to convert to ingress cloud rule", "ruleName", azureSecurityRule.Name) - continue - } - rules := nepheControllerATSgNameToIngressRules[sgName] - rules = append(rules, ingressRule...) - nepheControllerATSgNameToIngressRules[sgName] = rules + atAsgs := azureSecurityRule.Properties.DestinationApplicationSecurityGroups + ruleMap := nepheControllerATSgNameToIngressRules + convertFunc := convertFromAzureIngressSecurityRuleToCloudRule + if *azureSecurityRule.Properties.Direction == armnetwork.SecurityRuleDirectionOutbound { + atAsgs = azureSecurityRule.Properties.SourceApplicationSecurityGroups + ruleMap = nepheControllerATSgNameToEgressRules + convertFunc = convertFromAzureEgressSecurityRuleToCloudRule + } + isInNephePriorityRange := *azureSecurityRule.Properties.Priority >= ruleStartPriority + + // Nephe rule has correct description. + desc, ok := utils.ExtractCloudDescription(azureSecurityRule.Properties.Description) + if !ok { + removeUserRules = removeUserRules || isInNephePriorityRange + // Skip converting user rule that is in Nephe priority range, as they will be removed. + if isInNephePriorityRange { + continue } - } else { - for _, asg := range azureSecurityRule.Properties.SourceApplicationSecurityGroups { - _, _, asgName, err := extractFieldsFromAzureResourceID(*asg.ID) - if err != nil { - azurePluginLogger().Error(err, "failed to extract asg name from resource id", "id", *asg.ID) - } - sgName, _, isATSg := utils.IsNepheControllerCreatedSG(asgName) - if !isATSg { - continue - } - sgID := cloudresource.CloudResourceID{ - Name: sgName, - Vpc: vnetID, - } - egressRule, err := convertFromAzureEgressSecurityRuleToCloudRule(*azureSecurityRule, sgID.String(), vnetID, desc) - if err != nil { - azurePluginLogger().Error(err, "failed to convert to egress cloud rule", "ruleName", azureSecurityRule.Name) - continue - } - rules := nepheControllerATSgNameToEgressRules[sgName] - rules = append(rules, egressRule...) - nepheControllerATSgNameToEgressRules[sgName] = rules + } + + // Nephe rule has AT sg. We skip syncing rules that doesn't have a Nephe AT sg, as they will not conflict with any Nephe rules. + if len(atAsgs) == 0 { + removeUserRules = removeUserRules || isInNephePriorityRange + continue + } + for _, asg := range atAsgs { + // Nephe rule has the correct AT sg naming format. + _, _, asgName, err := extractFieldsFromAzureResourceID(*asg.ID) + if err != nil { + azurePluginLogger().Error(err, "failed to extract asg name from resource id", "id", *asg.ID) + removeUserRules = removeUserRules || isInNephePriorityRange + continue + } + sgName, _, isATSg := utils.IsNepheControllerCreatedSG(asgName) + if !isATSg { + removeUserRules = removeUserRules || isInNephePriorityRange + continue + } + + sgID := cloudresource.CloudResourceID{ + Name: sgName, + Vpc: vnetID, } + + rule, err := convertFunc(*azureSecurityRule, sgID.String(), vnetID, desc) + if err != nil { + azurePluginLogger().Error(err, "failed to convert to cloud rule", + "direction", azureSecurityRule.Properties.Direction, "ruleName", azureSecurityRule.Name) + removeUserRules = removeUserRules || isInNephePriorityRange + continue + } + + rules := ruleMap[sgName] + rules = append(rules, rule...) + ruleMap[sgName] = rules } } - return nepheControllerATSgNameToIngressRules, nepheControllerATSgNameToEgressRules + return nepheControllerATSgNameToIngressRules, nepheControllerATSgNameToEgressRules, removeUserRules } // convertFromAzureIngressSecurityRuleToCloudRule converts Azure ingress rules from armnetwork.SecurityRule to securitygroup.CloudRule. diff --git a/pkg/cloudprovider/plugins/azure/azure_security.go b/pkg/cloudprovider/plugins/azure/azure_security.go index 7f919d58..9fdc13c1 100644 --- a/pkg/cloudprovider/plugins/azure/azure_security.go +++ b/pkg/cloudprovider/plugins/azure/azure_security.go @@ -338,8 +338,12 @@ func (computeCfg *computeServiceConfig) buildEffectiveNSGSecurityRulesToApply(ap } // check if the rule is Nephe rules based on ruleStartPriority and description. - _, ok := utils.ExtractCloudDescription(rule.Properties.Description) - if *rule.Properties.Priority >= ruleStartPriority && ok { + if *rule.Properties.Priority >= ruleStartPriority { + _, ok := utils.ExtractCloudDescription(rule.Properties.Description) + // remove user rule that is in Nephe priority range. + if !ok { + continue + } // check if the rule is created by current processing appliedToGroup. if isAzureRuleAttachedToAtSg(rule, appliedToGroupNepheControllerName) { // skip the rule if found in remove list. @@ -691,7 +695,7 @@ func (computeCfg *computeServiceConfig) getATGroupView(nepheControllerATSGNameTo if networkSecurityGroup.Properties == nil { continue } - nepheControllerATSgNameToIngressRulesMap, nepheControllerATSgNameToEgressRulesMap := + nepheControllerATSgNameToIngressRulesMap, nepheControllerATSgNameToEgressRulesMap, removeUserRules := convertToCloudRulesByAppliedToSGName(networkSecurityGroup.Properties.SecurityRules, vnetIDLowercase) for atSgName := range appliedToSgNameSet { @@ -711,6 +715,14 @@ func (computeCfg *computeServiceConfig) getATGroupView(nepheControllerATSGNameTo IngressRules: nepheControllerATSgNameToIngressRulesMap[atSgName], EgressRules: nepheControllerATSgNameToEgressRulesMap[atSgName], } + // If there are user rules needs to be removed, trick the sync to trigger a rule update by adding an empty valid rule. + // In case of no AT or NP for valid rule, it implies Nephe is not actively managing the Vnet, therefore user rules are ignored. + if removeUserRules { + if rule := getEmptyCloudRule(&groupSyncObj); rule != nil { + groupSyncObj.IngressRules = append(groupSyncObj.IngressRules, *rule) + removeUserRules = false + } + } enforcedSecurityCloudView = append(enforcedSecurityCloudView, groupSyncObj) } } diff --git a/pkg/controllers/networkpolicy/controller.go b/pkg/controllers/networkpolicy/controller.go index 04080696..5f25093a 100644 --- a/pkg/controllers/networkpolicy/controller.go +++ b/pkg/controllers/networkpolicy/controller.go @@ -694,7 +694,7 @@ func (r *NetworkPolicyReconciler) SetupWithManager(mgr ctrl.Manager) error { }) // cloudRuleIndexer stores the realized rules on the cloud. r.cloudRuleIndexer = cache.NewIndexer( - // Each cloudRule is uniquely identified by its UUID. + // Each cloudRule is uniquely identified by its hash. func(obj interface{}) (string, error) { rule := obj.(*cloudresource.CloudRule) return rule.Hash, nil diff --git a/pkg/controllers/networkpolicy/networkpolicy.go b/pkg/controllers/networkpolicy/networkpolicy.go index e4c97e14..93f1f8ce 100644 --- a/pkg/controllers/networkpolicy/networkpolicy.go +++ b/pkg/controllers/networkpolicy/networkpolicy.go @@ -956,7 +956,11 @@ func (a *appliedToSecurityGroup) computeCloudRulesFromNp(r *NetworkPolicyReconci sameNP := realizedRule.NpNamespacedName == npNamespacedName currentRule, sameRule := currentRuleMap[realizedRule.Hash] if sameRule && !sameNP { - err = fmt.Errorf("duplicate rules with anp %s", realizedRule.NpNamespacedName) + if realizedRule.NpNamespacedName != "" { + err = fmt.Errorf("duplicate rules with anp %s", realizedRule.NpNamespacedName) + } else { + err = fmt.Errorf("duplicate rules with user rule %+v", realizedRule) + } r.Log.Error(err, "unable to compute rules", "rule", currentRule, "anp", npNamespacedName) return nil, nil, err } diff --git a/pkg/controllers/networkpolicy/sync.go b/pkg/controllers/networkpolicy/sync.go index 3d0bf33f..81586a92 100644 --- a/pkg/controllers/networkpolicy/sync.go +++ b/pkg/controllers/networkpolicy/sync.go @@ -34,7 +34,7 @@ func (s *securityGroupImpl) syncImpl(csg cloudSecurityGroup, syncContent *cloudr // If syncContent is nil, explicitly set internal sg state to init, so that // AddressGroup or AppliedToGroup in cloud can be recreated. s.state = securityGroupStateInit - } else if syncContent != nil { + } else { s.state = securityGroupStateCreated syncMembers := make([]*cloudresource.CloudResource, 0, len(syncContent.Members)) for i := range syncContent.Members { @@ -45,15 +45,15 @@ func (s *securityGroupImpl) syncImpl(csg cloudSecurityGroup, syncContent *cloudr if len(syncMembers) > 0 && syncMembers[0].Type == cloudresource.CloudResourceTypeNIC { cachedMembers, _ = r.getNICsOfCloudResources(s.members) } - if compareCloudResources(cachedMembers, syncMembers) { + if !membershipOnly && len(syncContent.MembersWithOtherSGAttached) > 0 { + log.V(1).Info("AppliedTo members have non-nephe sg attached", "Name", s.id.Name, "State", s.state, + "members", syncContent.MembersWithOtherSGAttached) + } else if compareCloudResources(cachedMembers, syncMembers) { return true } else { log.V(1).Info("Members are not in sync with cloud", "Name", s.id.Name, "State", s.state, "Sync members", syncMembers, "Cached SG members", cachedMembers) } - } else if len(s.members) == 0 { - log.V(1).Info("Empty memberships", "Name", s.id.Name) - return true } if s.state == securityGroupStateCreated { @@ -140,7 +140,7 @@ func (a *appliedToSecurityGroup) sync(syncContent *cloudresource.Synchronization // roughly count and compare rules in syncContent against nps. // also updates cloudRuleIndexer in the process. for _, rule := range syncContent.IngressRules { - if rule.NpNamespacedName != "" { + if rule.NpNamespacedName != "" && rule.Rule != nil { iRule := rule.Rule.(*cloudresource.IngressRule) countIngressRuleItems(iRule, items, true) } @@ -149,7 +149,7 @@ func (a *appliedToSecurityGroup) sync(syncContent *cloudresource.Synchronization } } for _, rule := range syncContent.EgressRules { - if rule.NpNamespacedName != "" { + if rule.NpNamespacedName != "" && rule.Rule != nil { eRule := rule.Rule.(*cloudresource.EgressRule) countEgressRuleItems(eRule, items, true) } @@ -199,7 +199,6 @@ func (r *NetworkPolicyReconciler) syncWithCloud() { ch := securitygroup.CloudSecurityGroup.GetSecurityGroupSyncChan() cloudAddrSGs := make(map[cloudresource.CloudResourceID]*cloudresource.SynchronizationContent) cloudAppliedToSGs := make(map[cloudresource.CloudResourceID]*cloudresource.SynchronizationContent) - rscWithUnknownSGs := make(map[cloudresource.CloudResource]struct{}) removeAddrSgs := make([]*addrSecurityGroup, 0) for content := range ch { log.V(1).Info("Sync from cloud", "SecurityGroup", content) @@ -228,9 +227,6 @@ func (r *NetworkPolicyReconciler) syncWithCloud() { cloudAddrSGs[content.Resource.CloudResourceID] = &cc } else { cloudAppliedToSGs[content.Resource.CloudResourceID] = &cc - for _, rsc := range content.MembersWithOtherSGAttached { - rscWithUnknownSGs[rsc] = struct{}{} - } } } r.syncedWithCloud = true @@ -247,20 +243,6 @@ func (r *NetworkPolicyReconciler) syncWithCloud() { log.V(0).Info("Delete address security group not found in cache", "Name", sg.id.Name) _ = sg.delete(r) } - // For cloud resource with any non nephe created SG, tricking plug-in to remove them by explicitly - // updating a single instance of associated security group. - for rsc := range rscWithUnknownSGs { - i, ok, _ := r.cloudResourceNPTrackerIndexer.GetByKey(rsc.String()) - if !ok { - log.Info("Unable to find resource in tracker", "CloudResource", rsc) - continue - } - tracker := i.(*cloudResourceNPTracker) - for _, sg := range tracker.appliedToSGs { - _ = sg.update(nil, nil, r) - break - } - } } // processBookMark process bookmark event and return true.