Skip to content

Fix logging response from NMAgent in syncHostNCVersion function #3747

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions cns/restserver/internalapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ import (
// internal APIs (definde in internalapi.go).
// This will be used internally (say by RequestController in case of AKS)

// ncVersionInfo holds expected and actual version information for an NC
type ncVersionInfo struct {
expected string
actual string
}

// GetPartitionKey - Get dnc/service partition key
func (service *HTTPRestService) GetPartitionKey() (dncPartitionKey string) {
service.RLock()
Expand Down Expand Up @@ -226,10 +232,18 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
nmaNCs[strings.ToLower(nc.NetworkContainerID)] = nc.Version
}
hasNC.Set(float64(len(nmaNCs)))

// Track NCs missing from NMAgent response and outdated NCs for better error reporting
missingNCs := make(map[string]string) // ncID -> expected version
outdatedNMaNCs := make(map[string]ncVersionInfo) // ncID -> version info with expected and actual

for ncID := range outdatedNCs {
nmaNCVersionStr, ok := nmaNCs[ncID]
if !ok {
// NMA doesn't have this NC that we need programmed yet, bail out
// NMA doesn't have this NC that we need programmed yet, track as missing
if ncInfo, exists := service.state.ContainerStatus[ncID]; exists {
missingNCs[ncID] = ncInfo.CreateNetworkContainerRequest.Version
}
continue
}
nmaNCVersion, err := strconv.Atoi(nmaNCVersionStr)
Expand Down Expand Up @@ -258,6 +272,23 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
logger.Errorf("NC version from NMA is decreasing: have %d, got %d", localNCVersion, nmaNCVersion)
continue
}

expectedVersion := ncInfo.CreateNetworkContainerRequest.Version
expectedVersionInt, err := strconv.Atoi(expectedVersion)
if err != nil {
logger.Errorf("failed to parse expected NC version string %s: %s", expectedVersion, err)
continue
}

// Check if NMAgent version is still outdated compared to expected DNC version
if nmaNCVersion < expectedVersionInt {
outdatedNMaNCs[ncID] = ncVersionInfo{
expected: expectedVersion,
actual: nmaNCVersionStr,
}
continue
}

if channelMode == cns.CRD {
service.MarkIpsAsAvailableUntransacted(ncInfo.ID, nmaNCVersion)
}
Expand All @@ -271,7 +302,18 @@ func (service *HTTPRestService) syncHostNCVersion(ctx context.Context, channelMo
// if we didn't empty out the needs update set, NMA has not programmed all the NCs we are expecting, and we
// need to return an error indicating that
if len(outdatedNCs) > 0 {
return len(programmedNCs), errors.Errorf("unabled to update some NCs: %v, missing or bad response from NMA", outdatedNCs)
var errorParts []string
if len(missingNCs) > 0 {
errorParts = append(errorParts, fmt.Sprintf("missing NCs from NMAgent response: %v", missingNCs))
}
if len(outdatedNMaNCs) > 0 {
errorParts = append(errorParts, fmt.Sprintf("outdated NCs in NMAgent response: %v", outdatedNMaNCs))
}
if len(errorParts) == 0 {
// Fallback for any unexpected cases
errorParts = append(errorParts, fmt.Sprintf("unable to update NCs: %v", outdatedNCs))
}
return len(programmedNCs), errors.Errorf("unable to update some NCs - %s", strings.Join(errorParts, "; "))
}

return len(programmedNCs), nil
Expand Down
81 changes: 81 additions & 0 deletions cns/restserver/internalapi_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"reflect"
"strconv"
"strings"
"sync"
"testing"
"time"
Expand Down Expand Up @@ -330,6 +331,86 @@ func createNCReqeustForSyncHostNCVersion(t *testing.T) cns.CreateNetworkContaine
return req
}

func TestSyncHostNCVersionErrorMessages(t *testing.T) {
tests := []struct {
name string
ncVersionInDNC string
nmaResponse nma.NCVersionList
expectedErrorSubstring string
}{
{
name: "missing NC from NMAgent response",
ncVersionInDNC: "2",
nmaResponse: nma.NCVersionList{
Containers: []nma.NCVersion{}, // Empty response - NC is missing
},
expectedErrorSubstring: "missing NCs from NMAgent response",
},
{
name: "outdated NC in NMAgent response",
ncVersionInDNC: "2",
nmaResponse: nma.NCVersionList{
Containers: []nma.NCVersion{
{
NetworkContainerID: ncID,
Version: "1", // Outdated version compared to DNC version 2
},
},
},
expectedErrorSubstring: "outdated NCs in NMAgent response",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create NC request with specified DNC version
restartService()
setEnv(t)
setOrchestratorTypeInternal(cns.KubernetesCRD)

secondaryIPConfigs := make(map[string]cns.SecondaryIPConfig)
ipAddress := "10.0.0.16"
secIPConfig := newSecondaryIPConfig(ipAddress, 0)
ipID := uuid.New()
secondaryIPConfigs[ipID.String()] = secIPConfig
_ = createNCReqInternal(t, secondaryIPConfigs, ncID, tt.ncVersionInDNC)

// Set up mock NMAgent with the specified response
mnma := &fakes.NMAgentClientFake{
GetNCVersionListF: func(_ context.Context) (nma.NCVersionList, error) {
return tt.nmaResponse, nil
},
}
cleanup := setMockNMAgent(svc, mnma)
defer cleanup()

// Call syncHostNCVersion and verify it returns the expected error
programmedCount, err := svc.syncHostNCVersion(context.Background(), cns.KubernetesCRD)

t.Logf("Test case: %s, programmedCount: %d, error: %v", tt.name, programmedCount, err)

if err == nil {
t.Errorf("Expected error but got none")
return
}

if !strings.Contains(err.Error(), tt.expectedErrorSubstring) {
t.Errorf("Expected error message to contain '%s', but got: %s", tt.expectedErrorSubstring, err.Error())
}

// In the outdated case, NMAgent has a version of the NC so it counts as programmed, just not up-to-date
expectedProgrammedCount := 0
if tt.name == "outdated NC in NMAgent response" {
expectedProgrammedCount = 1 // NMAgent has version 1, so it's programmed to some version
}

if programmedCount != expectedProgrammedCount {
t.Errorf("Expected programmedCount to be %d, but got %d", expectedProgrammedCount, programmedCount)
}
})
}
}

func TestReconcileNCWithEmptyState(t *testing.T) {
restartService()
setEnv(t)
Expand Down