From a48b5bb2dbdf08f9eb23f37088ba9e4e9f537a6f Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Fri, 3 Jan 2025 02:39:37 +0530 Subject: [PATCH 01/16] xdsclient: update watcher API as per gRFC A88 --- xds/csds/csds_e2e_test.go | 35 ++++-------- .../balancer/cdsbalancer/cdsbalancer.go | 15 +++-- .../balancer/cdsbalancer/cluster_watcher.go | 16 +++--- .../clusterresolver/resource_resolver_eds.go | 49 ++++++++-------- xds/internal/resolver/watch_service.go | 32 +++++------ xds/internal/resolver/xds_resolver.go | 20 +++++-- xds/internal/server/listener_wrapper.go | 36 ++++++------ xds/internal/server/rds_handler.go | 29 ++++------ xds/internal/testutils/resource_watcher.go | 38 +++++++------ xds/internal/xdsclient/authority.go | 53 +++++++++-------- xds/internal/xdsclient/clientimpl_watchers.go | 4 +- .../tests/ads_stream_flow_control_test.go | 38 ++++++++----- .../xdsclient/tests/cds_watchers_test.go | 21 +++---- .../xdsclient/tests/eds_watchers_test.go | 18 +++--- .../xdsclient/tests/lds_watchers_test.go | 35 ++++++------ .../xdsclient/tests/misc_watchers_test.go | 14 ++--- .../xdsclient/tests/rds_watchers_test.go | 21 +++---- .../xdsclient/tests/resource_update_test.go | 24 ++++---- .../xdsresource/cluster_resource_type.go | 49 ++++++++-------- .../xdsresource/endpoints_resource_type.go | 49 ++++++++-------- .../xdsresource/listener_resource_type.go | 49 ++++++++-------- .../xdsclient/xdsresource/resource_type.go | 57 +++++++++++++------ .../xdsresource/route_config_resource_type.go | 49 ++++++++-------- 23 files changed, 385 insertions(+), 366 deletions(-) diff --git a/xds/csds/csds_e2e_test.go b/xds/csds/csds_e2e_test.go index 3c838afb67fc..bd11580bb640 100644 --- a/xds/csds/csds_e2e_test.go +++ b/xds/csds/csds_e2e_test.go @@ -70,49 +70,37 @@ func Test(t *testing.T) { type nopListenerWatcher struct{} -func (nopListenerWatcher) OnUpdate(_ *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { +func (nopListenerWatcher) OnResourceChanged(_ *xdsresource.ListenerResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (nopListenerWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (nopListenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (nopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } type nopRouteConfigWatcher struct{} -func (nopRouteConfigWatcher) OnUpdate(_ *xdsresource.RouteConfigResourceData, onDone xdsresource.OnDoneFunc) { +func (nopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.RouteConfigResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (nopRouteConfigWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (nopRouteConfigWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (nopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } type nopClusterWatcher struct{} -func (nopClusterWatcher) OnUpdate(_ *xdsresource.ClusterResourceData, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (nopClusterWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) { +func (nopClusterWatcher) OnResourceChanged(_ *xdsresource.ClusterResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (nopClusterWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (nopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } type nopEndpointsWatcher struct{} -func (nopEndpointsWatcher) OnUpdate(_ *xdsresource.EndpointsResourceData, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (nopEndpointsWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) { +func (nopEndpointsWatcher) OnResourceChanged(_ *xdsresource.EndpointsResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (nopEndpointsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (nopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } @@ -137,13 +125,10 @@ func newBlockingListenerWatcher(testCtxDone <-chan struct{}) *blockingListenerWa } } -func (w *blockingListenerWatcher) OnUpdate(_ *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { - writeOnDone(w.testCtxDone, w.onDoneCh, onDone) -} -func (w *blockingListenerWatcher) OnError(_ error, onDone xdsresource.OnDoneFunc) { +func (w *blockingListenerWatcher) OnResourceChanged(_ *xdsresource.ListenerResourceData, _ error, onDone xdsresource.OnDoneFunc) { writeOnDone(w.testCtxDone, w.onDoneCh, onDone) } -func (w *blockingListenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (w *blockingListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { writeOnDone(w.testCtxDone, w.onDoneCh, onDone) } diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 9a112e276977..6254833bd100 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -342,7 +342,7 @@ func (b *cdsBalancer) ResolverError(err error) { if b.lbCfg != nil { root = b.lbCfg.ClusterName } - b.onClusterError(root, err) + b.onClusterAmbientError(root, err) }) } @@ -428,7 +428,7 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd // If the security config is invalid, for example, if the provider // instance is not found in the bootstrap config, we need to put the // channel in transient failure. - b.onClusterError(name, fmt.Errorf("received Cluster resource contains invalid security config: %v", err)) + b.onClusterAmbientError(name, fmt.Errorf("received Cluster resource contains invalid security config: %v", err)) return } } @@ -436,12 +436,12 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd clustersSeen := make(map[string]bool) dms, ok, err := b.generateDMsForCluster(b.lbCfg.ClusterName, 0, nil, clustersSeen) if err != nil { - b.onClusterError(b.lbCfg.ClusterName, fmt.Errorf("failed to generate discovery mechanisms: %v", err)) + b.onClusterAmbientError(b.lbCfg.ClusterName, fmt.Errorf("failed to generate discovery mechanisms: %v", err)) return } if ok { if len(dms) == 0 { - b.onClusterError(b.lbCfg.ClusterName, fmt.Errorf("aggregate cluster graph has no leaf clusters")) + b.onClusterAmbientError(b.lbCfg.ClusterName, fmt.Errorf("aggregate cluster graph has no leaf clusters")) return } // Child policy is built the first time we resolve the cluster graph. @@ -501,7 +501,7 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd // TRANSIENT_FAILURE. // // Only executed in the context of a serializer callback. -func (b *cdsBalancer) onClusterError(name string, err error) { +func (b *cdsBalancer) onClusterAmbientError(name string, err error) { b.logger.Warningf("Cluster resource %q received error update: %v", name, err) if b.childLB != nil { @@ -525,15 +525,14 @@ func (b *cdsBalancer) onClusterError(name string, err error) { // TRANSIENT_FAILURE. // // Only executed in the context of a serializer callback. -func (b *cdsBalancer) onClusterResourceNotFound(name string) { - err := xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "resource name %q of type Cluster not found in received response", name) +func (b *cdsBalancer) onClusterResourceChangedError(name string, err error) { if b.childLB != nil { b.childLB.ResolverError(err) } else { // If child balancer was never created, fail the RPCs with errors. b.ccw.UpdateState(balancer.State{ ConnectivityState: connectivity.TransientFailure, - Picker: base.NewErrPicker(err), + Picker: base.NewErrPicker(fmt.Errorf("%q: %v", name, err)), }) } } diff --git a/xds/internal/balancer/cdsbalancer/cluster_watcher.go b/xds/internal/balancer/cdsbalancer/cluster_watcher.go index 835461d0997b..e6d6c6d0d34a 100644 --- a/xds/internal/balancer/cdsbalancer/cluster_watcher.go +++ b/xds/internal/balancer/cdsbalancer/cluster_watcher.go @@ -32,21 +32,21 @@ type clusterWatcher struct { parent *cdsBalancer } -func (cw *clusterWatcher) OnUpdate(u *xdsresource.ClusterResourceData, onDone xdsresource.OnDoneFunc) { +func (cw *clusterWatcher) OnResourceChanged(u *xdsresource.ClusterResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + handleError := func(context.Context) { cw.parent.onClusterResourceChangedError(cw.name, err); onDone() } + cw.parent.serializer.ScheduleOr(handleError, onDone) + return + } handleUpdate := func(context.Context) { cw.parent.onClusterUpdate(cw.name, u.Resource); onDone() } cw.parent.serializer.ScheduleOr(handleUpdate, onDone) } -func (cw *clusterWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - handleError := func(context.Context) { cw.parent.onClusterError(cw.name, err); onDone() } +func (cw *clusterWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { + handleError := func(context.Context) { cw.parent.onClusterAmbientError(cw.name, err); onDone() } cw.parent.serializer.ScheduleOr(handleError, onDone) } -func (cw *clusterWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - handleNotFound := func(context.Context) { cw.parent.onClusterResourceNotFound(cw.name); onDone() } - cw.parent.serializer.ScheduleOr(handleNotFound, onDone) -} - // watcherState groups the state associated with a clusterWatcher. type watcherState struct { watcher *clusterWatcher // The underlying watcher. diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go index ddb949019ee5..7ad3628ccc22 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go @@ -76,12 +76,34 @@ func newEDSResolver(nameToWatch string, producer xdsresource.Producer, topLevelR } // OnUpdate is invoked to report an update for the resource being watched. -func (er *edsDiscoveryMechanism) OnUpdate(update *xdsresource.EndpointsResourceData, onDone xdsresource.OnDoneFunc) { +func (er *edsDiscoveryMechanism) OnResourceChanged(update *xdsresource.EndpointsResourceData, err error, onDone xdsresource.OnDoneFunc) { if er.stopped.HasFired() { onDone() return } + if err != nil { + if er.logger.V(2) { + if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + er.logger.Infof("EDS discovery mechanism for resource %q reported resource-does-not-exist error", er.nameToWatch) + } else { + er.logger.Infof("EDS discovery mechanism for resource %q reported on resource changed error: %v", er.nameToWatch, err) + } + } + // Report an empty update that would result in no priority child being + // created for this discovery mechanism. This would result in the priority + // LB policy reporting TRANSIENT_FAILURE (as there would be no priorities or + // localities) if this was the only discovery mechanism, or would result in + // the priority LB policy using a lower priority discovery mechanism when + // that becomes available. + er.mu.Lock() + er.update = &xdsresource.EndpointsUpdate{} + er.mu.Unlock() + + er.topLevelResolver.onUpdate(onDone) + return + } + er.mu.Lock() er.update = &update.Resource er.mu.Unlock() @@ -89,7 +111,7 @@ func (er *edsDiscoveryMechanism) OnUpdate(update *xdsresource.EndpointsResourceD er.topLevelResolver.onUpdate(onDone) } -func (er *edsDiscoveryMechanism) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (er *edsDiscoveryMechanism) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { if er.stopped.HasFired() { onDone() return @@ -119,26 +141,3 @@ func (er *edsDiscoveryMechanism) OnError(err error, onDone xdsresource.OnDoneFun er.topLevelResolver.onUpdate(onDone) } - -func (er *edsDiscoveryMechanism) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - if er.stopped.HasFired() { - onDone() - return - } - - if er.logger.V(2) { - er.logger.Infof("EDS discovery mechanism for resource %q reported resource-does-not-exist error", er.nameToWatch) - } - - // Report an empty update that would result in no priority child being - // created for this discovery mechanism. This would result in the priority - // LB policy reporting TRANSIENT_FAILURE (as there would be no priorities or - // localities) if this was the only discovery mechanism, or would result in - // the priority LB policy using a lower priority discovery mechanism when - // that becomes available. - er.mu.Lock() - er.update = &xdsresource.EndpointsUpdate{} - er.mu.Unlock() - - er.topLevelResolver.onUpdate(onDone) -} diff --git a/xds/internal/resolver/watch_service.go b/xds/internal/resolver/watch_service.go index 0de6604484b1..6b716ea08b4c 100644 --- a/xds/internal/resolver/watch_service.go +++ b/xds/internal/resolver/watch_service.go @@ -36,21 +36,21 @@ func newListenerWatcher(resourceName string, parent *xdsResolver) *listenerWatch return lw } -func (l *listenerWatcher) OnUpdate(update *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { +func (l *listenerWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + handleError := func(context.Context) { l.parent.onListenerResourceChangedError(err); onDone() } + l.parent.serializer.ScheduleOr(handleError, onDone) + return + } handleUpdate := func(context.Context) { l.parent.onListenerResourceUpdate(update.Resource); onDone() } l.parent.serializer.ScheduleOr(handleUpdate, onDone) } -func (l *listenerWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - handleError := func(context.Context) { l.parent.onListenerResourceError(err); onDone() } +func (l *listenerWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { + handleError := func(context.Context) { l.parent.onListenerResourceAmbientError(err); onDone() } l.parent.serializer.ScheduleOr(handleError, onDone) } -func (l *listenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - handleNotFound := func(context.Context) { l.parent.onListenerResourceNotFound(); onDone() } - l.parent.serializer.ScheduleOr(handleNotFound, onDone) -} - func (l *listenerWatcher) stop() { l.cancel() l.parent.logger.Infof("Canceling watch on Listener resource %q", l.resourceName) @@ -68,7 +68,12 @@ func newRouteConfigWatcher(resourceName string, parent *xdsResolver) *routeConfi return rw } -func (r *routeConfigWatcher) OnUpdate(u *xdsresource.RouteConfigResourceData, onDone xdsresource.OnDoneFunc) { +func (r *routeConfigWatcher) OnResourceChanged(u *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + handleError := func(context.Context) { r.parent.onRouteConfigResourceChangedError(r.resourceName, err); onDone() } + r.parent.serializer.ScheduleOr(handleError, onDone) + return + } handleUpdate := func(context.Context) { r.parent.onRouteConfigResourceUpdate(r.resourceName, u.Resource) onDone() @@ -76,16 +81,11 @@ func (r *routeConfigWatcher) OnUpdate(u *xdsresource.RouteConfigResourceData, on r.parent.serializer.ScheduleOr(handleUpdate, onDone) } -func (r *routeConfigWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - handleError := func(context.Context) { r.parent.onRouteConfigResourceError(r.resourceName, err); onDone() } +func (r *routeConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { + handleError := func(context.Context) { r.parent.onRouteConfigResourceAmbientError(r.resourceName, err); onDone() } r.parent.serializer.ScheduleOr(handleError, onDone) } -func (r *routeConfigWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - handleNotFound := func(context.Context) { r.parent.onRouteConfigResourceNotFound(r.resourceName); onDone() } - r.parent.serializer.ScheduleOr(handleNotFound, onDone) -} - func (r *routeConfigWatcher) stop() { r.cancel() r.parent.logger.Infof("Canceling watch on RouteConfiguration resource %q", r.resourceName) diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index 1ba6c001d93d..ea6a6bc62d4f 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -518,7 +518,7 @@ func (r *xdsResolver) onListenerResourceUpdate(update xdsresource.ListenerUpdate r.routeConfigWatcher = newRouteConfigWatcher(r.rdsResourceName, r) } -func (r *xdsResolver) onListenerResourceError(err error) { +func (r *xdsResolver) onListenerResourceAmbientError(err error) { if r.logger.V(2) { r.logger.Infof("Received error for Listener resource %q: %v", r.ldsResourceName, err) } @@ -526,9 +526,13 @@ func (r *xdsResolver) onListenerResourceError(err error) { } // Only executed in the context of a serializer callback. -func (r *xdsResolver) onListenerResourceNotFound() { +func (r *xdsResolver) onListenerResourceChangedError(err error) { if r.logger.V(2) { - r.logger.Infof("Received resource-not-found-error for Listener resource %q", r.ldsResourceName) + if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + r.logger.Infof("Received resource-not-found-error for Listener resource %q", r.ldsResourceName) + } else { + r.logger.Infof("Received on-resource-changed error for Listener resource %q: %v", r.ldsResourceName, err) + } } r.listenerUpdateRecvd = false @@ -559,7 +563,7 @@ func (r *xdsResolver) onRouteConfigResourceUpdate(name string, update xdsresourc } // Only executed in the context of a serializer callback. -func (r *xdsResolver) onRouteConfigResourceError(name string, err error) { +func (r *xdsResolver) onRouteConfigResourceAmbientError(name string, err error) { if r.logger.V(2) { r.logger.Infof("Received error for RouteConfiguration resource %q: %v", name, err) } @@ -567,9 +571,13 @@ func (r *xdsResolver) onRouteConfigResourceError(name string, err error) { } // Only executed in the context of a serializer callback. -func (r *xdsResolver) onRouteConfigResourceNotFound(name string) { +func (r *xdsResolver) onRouteConfigResourceChangedError(name string, err error) { if r.logger.V(2) { - r.logger.Infof("Received resource-not-found-error for RouteConfiguration resource %q", name) + if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + r.logger.Infof("Received resource-not-found-error for RouteConfiguration resource %q", name) + } else { + r.logger.Infof("Received on-resource-changed error for RouteConfiguration resource %q: %v", name, err) + } } if r.rdsResourceName != name { diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index 09d320018aee..a820a921afa1 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -397,7 +397,7 @@ func (l *listenerWrapper) switchModeLocked(newMode connectivity.ServingMode, err } } -func (l *listenerWrapper) onLDSResourceDoesNotExist(err error) { +func (l *listenerWrapper) onLDSResourceChangedError(err error) { l.mu.Lock() defer l.mu.Unlock() l.switchModeLocked(connectivity.ServingModeNotServing, err) @@ -414,19 +414,31 @@ type ldsWatcher struct { name string } -func (lw *ldsWatcher) OnUpdate(update *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { +func (lw *ldsWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { defer onDone() if lw.parent.closed.HasFired() { - lw.logger.Warningf("Resource %q received update: %#v after listener was closed", lw.name, update) + if err != nil { + lw.logger.Warningf("Resource %q received err: %#v after listener was closed", lw.name, err) + } else { + lw.logger.Warningf("Resource %q received update: %#v after listener was closed", lw.name, update) + } return } if lw.logger.V(2) { - lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, update.Resource) + if err != nil { + lw.logger.Infof("LDS watch for resource %q received error: %#v", lw.name, err) + } else { + lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, update.Resource) + } + } + if err != nil { + lw.parent.onLDSResourceChangedError(err) + return } lw.parent.handleLDSUpdate(update.Resource) } -func (lw *ldsWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (lw *ldsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { defer onDone() if lw.parent.closed.HasFired() { lw.logger.Warningf("Resource %q received error: %v after listener was closed", lw.name, err) @@ -438,17 +450,3 @@ func (lw *ldsWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { // For errors which are anything other than "resource-not-found", we // continue to use the old configuration. } - -func (lw *ldsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - defer onDone() - if lw.parent.closed.HasFired() { - lw.logger.Warningf("Resource %q received resource-does-not-exist error after listener was closed", lw.name) - return - } - if lw.logger.V(2) { - lw.logger.Infof("LDS watch for resource %q reported resource-does-not-exist error", lw.name) - } - - err := xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "resource name %q of type Listener not found in received response", lw.name) - lw.parent.onLDSResourceDoesNotExist(err) -} diff --git a/xds/internal/server/rds_handler.go b/xds/internal/server/rds_handler.go index bcd3938e6f1a..998145b32767 100644 --- a/xds/internal/server/rds_handler.go +++ b/xds/internal/server/rds_handler.go @@ -147,7 +147,7 @@ type rdsWatcher struct { canceled bool // eats callbacks if true } -func (rw *rdsWatcher) OnUpdate(update *xdsresource.RouteConfigResourceData, onDone xdsresource.OnDoneFunc) { +func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { defer onDone() rw.mu.Lock() if rw.canceled { @@ -156,26 +156,20 @@ func (rw *rdsWatcher) OnUpdate(update *xdsresource.RouteConfigResourceData, onDo } rw.mu.Unlock() if rw.logger.V(2) { - rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, update.Resource) + if err != nil { + rw.logger.Infof("RDS watch for resource %q received error: %#v", rw.routeName, err) + } else { + rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, update.Resource) + } } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &update.Resource}) -} - -func (rw *rdsWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - defer onDone() - rw.mu.Lock() - if rw.canceled { - rw.mu.Unlock() + if err != nil { + rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) return } - rw.mu.Unlock() - if rw.logger.V(2) { - rw.logger.Infof("RDS watch for resource %q reported error: %v", rw.routeName, err) - } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) + rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &update.Resource}) } -func (rw *rdsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (rw *rdsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { defer onDone() rw.mu.Lock() if rw.canceled { @@ -184,8 +178,7 @@ func (rw *rdsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { } rw.mu.Unlock() if rw.logger.V(2) { - rw.logger.Infof("RDS watch for resource %q reported resource-does-not-exist error: %v", rw.routeName) + rw.logger.Infof("RDS watch for resource %q reported error: %v", rw.routeName, err) } - err := xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "resource name %q of type RouteConfiguration not found in received response", rw.routeName) rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) } diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index dae72e2a7733..522b5d9f37a9 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -35,10 +35,27 @@ type TestResourceWatcher struct { ResourceDoesNotExistCh chan struct{} } -// OnUpdate is invoked by the xDS client to report the latest update on the resource -// being watched. -func (w *TestResourceWatcher) OnUpdate(data xdsresource.ResourceData, onDone xdsresource.OnDoneFunc) { +// OnResourceChanged is invoked by the xDS client to report the latest update +// or an error on the resource being watched. +func (w *TestResourceWatcher) OnResourceChanged(data xdsresource.ResourceData, err error, onDone xdsresource.OnDoneFunc) { defer onDone() + if err != nil { + if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + select { + case <-w.ResourceDoesNotExistCh: + default: + } + w.ResourceDoesNotExistCh <- struct{}{} + return + } + select { + case <-w.ErrorCh: + default: + } + w.ErrorCh <- err + return + + } select { case <-w.UpdateCh: default: @@ -46,8 +63,8 @@ func (w *TestResourceWatcher) OnUpdate(data xdsresource.ResourceData, onDone xds w.UpdateCh <- &data } -// OnError is invoked by the xDS client to report the latest error. -func (w *TestResourceWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +// OnAmbientError is invoked by the xDS client to report the latest error. +func (w *TestResourceWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { defer onDone() select { case <-w.ErrorCh: @@ -56,17 +73,6 @@ func (w *TestResourceWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) w.ErrorCh <- err } -// OnResourceDoesNotExist is used by the xDS client to report that the resource -// being watched no longer exists. -func (w *TestResourceWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - defer onDone() - select { - case <-w.ResourceDoesNotExistCh: - default: - } - w.ResourceDoesNotExistCh <- struct{}{} -} - // NewTestResourceWatcher returns a TestResourceWatcher to watch for resources // via the xDS client. func NewTestResourceWatcher() *TestResourceWatcher { diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index f81685a45e69..4e324208e0f8 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -177,24 +177,13 @@ func (a *authority) handleADSStreamFailure(serverConfig *bootstrap.ServerConfig, a.logger.Infof("Connection to server %s failed with error: %v", serverConfig, err) } - // We do not consider it an error if the ADS stream was closed after having - // received a response on the stream. This is because there are legitimate - // reasons why the server may need to close the stream during normal - // operations, such as needing to rebalance load or the underlying - // connection hitting its max connection age limit. See gRFC A57 for more - // details. - if xdsresource.ErrType(err) == xdsresource.ErrTypeStreamFailedAfterRecv { - a.logger.Warningf("Watchers not notified since ADS stream failed after having received at least one response: %v", err) - return - } - // Propagate the connection error from the transport layer to all watchers. for _, rType := range a.resources { for _, state := range rType { for watcher := range state.watchers { watcher := watcher a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) + watcher.OnAmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) }) } } @@ -363,7 +352,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher err := uErr.Err watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnError(err, done) }) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnAmbientError(err, done) }) } continue } @@ -388,7 +377,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher resource := uErr.Resource watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnUpdate(resource, done) }) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnResourceChanged(resource, nil, done) }) } } @@ -436,9 +425,15 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig } if state.md.Status == xdsresource.ServiceStatusNotExist { // The metadata status is set to "ServiceStatusNotExist" if a - // previous update deleted this resource, in which case we do not - // want to repeatedly call the watch callbacks with a - // "resource-not-found" error. + // previous update deleted this resource, in which case we + // want to send an ambient error. + for watcher := range state.watchers { + watcher := watcher + watcherCnt.Add(1) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { + watcher.OnAmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: previous update deleted this resource"), done) + }) + } continue } if serverConfig.ServerFeaturesIgnoreResourceDeletion() { @@ -455,17 +450,17 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig continue } - // If we get here, it means that the resource exists in cache, but not - // in the new update. Delete the resource from cache, and send a - // resource not found error to indicate that the resource has been - // removed. Metadata for the resource is still maintained, as this is - // required by CSDS. + // If we get here, it means that the resource exists in cache, but + // not in the new update. Delete the resource from cache. Metadata + // for the resource is still maintained, as this is required by CSDS. state.cache = nil state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} for watcher := range state.watchers { watcher := watcher watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnResourceDoesNotExist(done) }) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { + watcher.OnResourceChanged(nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource has been removed"), done) + }) } } } @@ -507,7 +502,9 @@ func (a *authority) handleADSResourceDoesNotExist(rType xdsresource.Type, resour state.md = xdsresource.UpdateMetadata{Status: xdsresource.ServiceStatusNotExist} for watcher := range state.watchers { watcher := watcher - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnResourceDoesNotExist(func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.OnResourceChanged(nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName()), func() {}) + }) } } @@ -643,7 +640,7 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // xdsClientSerializer callback. Hence making a copy of the cached // resource here for watchCallbackSerializer. resource := state.cache - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnUpdate(resource, func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnResourceChanged(resource, nil, func() {}) }) } // If last update was NACK'd, notify the new watcher of error // immediately as well. @@ -655,12 +652,14 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // xdsClientSerializer callback. Hence making a copy of the error // here for watchCallbackSerializer. err := state.md.ErrState.Err - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnError(err, func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnAmbientError(err, func() {}) }) } // If the metadata field is updated to indicate that the management // server does not have this resource, notify the new watcher. if state.md.Status == xdsresource.ServiceStatusNotExist { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnResourceDoesNotExist(func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.OnResourceChanged(nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName()), func() {}) + }) } cleanup = a.unwatchResource(rType, resourceName, watcher) }, func() { diff --git a/xds/internal/xdsclient/clientimpl_watchers.go b/xds/internal/xdsclient/clientimpl_watchers.go index ed4ee360fb7d..b21f89131296 100644 --- a/xds/internal/xdsclient/clientimpl_watchers.go +++ b/xds/internal/xdsclient/clientimpl_watchers.go @@ -45,7 +45,7 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, if err := c.resourceTypes.maybeRegister(rType); err != nil { logger.Warningf("Watch registered for name %q of type %q which is already registered", rType.TypeName(), resourceName) - c.serializer.TrySchedule(func(context.Context) { watcher.OnError(err, func() {}) }) + c.serializer.TrySchedule(func(context.Context) { watcher.OnResourceChanged(nil, err, func() {}) }) return func() {} } @@ -54,7 +54,7 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, if a == nil { logger.Warningf("Watch registered for name %q of type %q, authority %q is not found", rType.TypeName(), resourceName, n.Authority) c.serializer.TrySchedule(func(context.Context) { - watcher.OnError(fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName), func() {}) + watcher.OnResourceChanged(nil, fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName), func() {}) }) return func() {} } diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go index ff0243f3d462..ffa6cdf09d03 100644 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go @@ -60,7 +60,28 @@ func newBLockingListenerWatcher() *blockingListenerWatcher { } } -func (lw *blockingListenerWatcher) OnUpdate(update *xdsresource.ListenerResourceData, done xdsresource.OnDoneFunc) { +func (lw *blockingListenerWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, done xdsresource.OnDoneFunc) { + if err != nil { + if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + // Notify receipt of resource not found. + select { + case lw.notFoundCh <- struct{}{}: + default: + } + } else { + select { + case lw.errorCh <- struct{}{}: + default: + } + } + + select { + case lw.doneNotifierCh <- done: + default: + } + + return + } // Notify receipt of the update. select { case lw.updateCh <- struct{}{}: @@ -73,7 +94,7 @@ func (lw *blockingListenerWatcher) OnUpdate(update *xdsresource.ListenerResource } } -func (lw *blockingListenerWatcher) OnError(err error, done xdsresource.OnDoneFunc) { +func (lw *blockingListenerWatcher) OnAmbientError(err error, done xdsresource.OnDoneFunc) { // Notify receipt of an error. select { case lw.errorCh <- struct{}{}: @@ -86,19 +107,6 @@ func (lw *blockingListenerWatcher) OnError(err error, done xdsresource.OnDoneFun } } -func (lw *blockingListenerWatcher) OnResourceDoesNotExist(done xdsresource.OnDoneFunc) { - // Notify receipt of resource not found. - select { - case lw.notFoundCh <- struct{}{}: - default: - } - - select { - case lw.doneNotifierCh <- done: - default: - } -} - type wrappedADSStream struct { v3adsgrpc.AggregatedDiscoveryService_StreamAggregatedResourcesClient recvCh chan struct{} diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index f8cd6dac7691..165ca0057b6b 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -44,13 +44,10 @@ import ( type noopClusterWatcher struct{} -func (noopClusterWatcher) OnUpdate(update *xdsresource.ClusterResourceData, onDone xdsresource.OnDoneFunc) { +func (noopClusterWatcher) OnResourceChanged(_ *xdsresource.ClusterResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (noopClusterWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (noopClusterWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (noopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } @@ -67,12 +64,17 @@ func newClusterWatcher() *clusterWatcher { return &clusterWatcher{updateCh: testutils.NewChannel()} } -func (cw *clusterWatcher) OnUpdate(update *xdsresource.ClusterResourceData, onDone xdsresource.OnDoneFunc) { +func (cw *clusterWatcher) OnResourceChanged(update *xdsresource.ClusterResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + cw.updateCh.Replace(clusterUpdateErrTuple{err: err}) + onDone() + return + } cw.updateCh.Send(clusterUpdateErrTuple{update: update.Resource}) onDone() } -func (cw *clusterWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (cw *clusterWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -81,11 +83,6 @@ func (cw *clusterWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { onDone() } -func (cw *clusterWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - cw.updateCh.Replace(clusterUpdateErrTuple{err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "Cluster not found in received response")}) - onDone() -} - // badClusterResource returns a cluster resource for the given name which // contains a config_source_specifier for the `lrs_server` field which is not // set to `self`, and hence is expected to be NACKed by the client. diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index 21021b8992bb..c6506ddf408a 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -53,10 +53,10 @@ const ( type noopEndpointsWatcher struct{} -func (noopEndpointsWatcher) OnUpdate(update *xdsresource.EndpointsResourceData, onDone xdsresource.OnDoneFunc) { +func (noopEndpointsWatcher) OnResourceChanged(_ *xdsresource.EndpointsResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (noopEndpointsWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (noopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } func (noopEndpointsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { @@ -76,12 +76,17 @@ func newEndpointsWatcher() *endpointsWatcher { return &endpointsWatcher{updateCh: testutils.NewChannel()} } -func (ew *endpointsWatcher) OnUpdate(update *xdsresource.EndpointsResourceData, onDone xdsresource.OnDoneFunc) { +func (ew *endpointsWatcher) OnResourceChanged(update *xdsresource.EndpointsResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + ew.updateCh.Replace(endpointsUpdateErrTuple{err: err}) + onDone() + return + } ew.updateCh.Send(endpointsUpdateErrTuple{update: update.Resource}) onDone() } -func (ew *endpointsWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (ew *endpointsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -90,11 +95,6 @@ func (ew *endpointsWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { onDone() } -func (ew *endpointsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - ew.updateCh.Replace(endpointsUpdateErrTuple{err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "Endpoints not found in received response")}) - onDone() -} - // badEndpointsResource returns a endpoints resource for the given // edsServiceName which contains an endpoint with a load_balancing weight of // `0`. This is expected to be NACK'ed by the xDS client. diff --git a/xds/internal/xdsclient/tests/lds_watchers_test.go b/xds/internal/xdsclient/tests/lds_watchers_test.go index b05b9caf4adc..b03e296e207e 100644 --- a/xds/internal/xdsclient/tests/lds_watchers_test.go +++ b/xds/internal/xdsclient/tests/lds_watchers_test.go @@ -48,13 +48,10 @@ import ( type noopListenerWatcher struct{} -func (noopListenerWatcher) OnUpdate(update *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { +func (noopListenerWatcher) OnResourceChanged(_ *xdsresource.ListenerResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (noopListenerWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (noopListenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (noopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } @@ -71,12 +68,17 @@ func newListenerWatcher() *listenerWatcher { return &listenerWatcher{updateCh: testutils.NewChannel()} } -func (lw *listenerWatcher) OnUpdate(update *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { +func (lw *listenerWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + lw.updateCh.Replace(listenerUpdateErrTuple{err: err}) + onDone() + return + } lw.updateCh.Send(listenerUpdateErrTuple{update: update.Resource}) onDone() } -func (lw *listenerWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (lw *listenerWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -85,11 +87,6 @@ func (lw *listenerWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { onDone() } -func (lw *listenerWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - lw.updateCh.Replace(listenerUpdateErrTuple{err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "Listener not found in received response")}) - onDone() -} - type listenerWatcherMultiple struct { updateCh *testutils.Channel } @@ -100,21 +97,21 @@ func newListenerWatcherMultiple(size int) *listenerWatcherMultiple { return &listenerWatcherMultiple{updateCh: testutils.NewChannelWithSize(size)} } -func (lw *listenerWatcherMultiple) OnUpdate(update *xdsresource.ListenerResourceData, onDone xdsresource.OnDoneFunc) { +func (lw *listenerWatcherMultiple) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + lw.updateCh.Send(listenerUpdateErrTuple{err: err}) + onDone() + return + } lw.updateCh.Send(listenerUpdateErrTuple{update: update.Resource}) onDone() } -func (lw *listenerWatcherMultiple) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (lw *listenerWatcherMultiple) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { lw.updateCh.Send(listenerUpdateErrTuple{err: err}) onDone() } -func (lw *listenerWatcherMultiple) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - lw.updateCh.Send(listenerUpdateErrTuple{err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "Listener not found in received response")}) - onDone() -} - // badListenerResource returns a listener resource for the given name which does // not contain the `RouteSpecifier` field in the HTTPConnectionManager, and // hence is expected to be NACKed by the client. diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go index 6b8152620231..76e764421730 100644 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ b/xds/internal/xdsclient/tests/misc_watchers_test.go @@ -69,7 +69,12 @@ func newTestRouteConfigWatcher(client xdsclient.XDSClient, name1, name2 string) } } -func (rw *testRouteConfigWatcher) OnUpdate(update *xdsresource.RouteConfigResourceData, onDone xdsresource.OnDoneFunc) { +func (rw *testRouteConfigWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) + onDone() + return + } rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) rw.cancel1 = xdsresource.WatchRouteConfig(rw.client, rw.name1, rw.rcw1) @@ -77,7 +82,7 @@ func (rw *testRouteConfigWatcher) OnUpdate(update *xdsresource.RouteConfigResour onDone() } -func (rw *testRouteConfigWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (rw *testRouteConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -86,11 +91,6 @@ func (rw *testRouteConfigWatcher) OnError(err error, onDone xdsresource.OnDoneFu onDone() } -func (rw *testRouteConfigWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "RouteConfiguration not found in received response")}) - onDone() -} - func (rw *testRouteConfigWatcher) cancel() { rw.cancel1() rw.cancel2() diff --git a/xds/internal/xdsclient/tests/rds_watchers_test.go b/xds/internal/xdsclient/tests/rds_watchers_test.go index b8dd1c72f465..dfb161bb69a5 100644 --- a/xds/internal/xdsclient/tests/rds_watchers_test.go +++ b/xds/internal/xdsclient/tests/rds_watchers_test.go @@ -43,13 +43,10 @@ import ( type noopRouteConfigWatcher struct{} -func (noopRouteConfigWatcher) OnUpdate(update *xdsresource.RouteConfigResourceData, onDone xdsresource.OnDoneFunc) { +func (noopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.RouteConfigResourceData, _ error, onDone xdsresource.OnDoneFunc) { onDone() } -func (noopRouteConfigWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { - onDone() -} -func (noopRouteConfigWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (noopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { onDone() } @@ -66,12 +63,17 @@ func newRouteConfigWatcher() *routeConfigWatcher { return &routeConfigWatcher{updateCh: testutils.NewChannel()} } -func (rw *routeConfigWatcher) OnUpdate(update *xdsresource.RouteConfigResourceData, onDone xdsresource.OnDoneFunc) { +func (rw *routeConfigWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { + if err != nil { + rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) + onDone() + return + } rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) onDone() } -func (rw *routeConfigWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) { +func (rw *routeConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -80,11 +82,6 @@ func (rw *routeConfigWatcher) OnError(err error, onDone xdsresource.OnDoneFunc) onDone() } -func (rw *routeConfigWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "RouteConfiguration not found in received response")}) - onDone() -} - // badRouteConfigResource returns a RouteConfiguration resource for the given // routeName which contains a retry config with num_retries set to `0`. This is // expected to be NACK'ed by the xDS client. diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index 0460385d0fb7..67681a1ce641 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -161,7 +161,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "Listener not found in received response", + wantErr: "xds: resource ListenerResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -177,7 +177,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", VersionInfo: "1", }, - wantErr: "Listener not found in received response", + wantErr: "xds: resource ListenerResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -194,7 +194,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3routepb.RouteConfiguration{})}, }, - wantErr: "Listener not found in received response", + wantErr: "xds: resource ListenerResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -418,7 +418,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "RouteConfiguration not found in received response", + wantErr: "xds: resource RouteConfigResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -434,7 +434,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", VersionInfo: "1", }, - wantErr: "RouteConfiguration not found in received response", + wantErr: "xds: resource RouteConfigResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -451,7 +451,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3clusterpb.Cluster{})}, }, - wantErr: "RouteConfiguration not found in received response", + wantErr: "xds: resource RouteConfigResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -667,7 +667,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "Cluster not found in received response", + wantErr: "xds: resource ClusterResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -683,7 +683,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", VersionInfo: "1", }, - wantErr: "Cluster not found in received response", + wantErr: "xds: resource ClusterResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -700,7 +700,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3endpointpb.ClusterLoadAssignment{})}, }, - wantErr: "Cluster not found in received response", + wantErr: "xds: resource ClusterResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -974,7 +974,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "Endpoints not found in received response", + wantErr: "xds: resource EndpointsResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -990,7 +990,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", VersionInfo: "1", }, - wantErr: "Endpoints not found in received response", + wantErr: "xds: resource EndpointsResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1007,7 +1007,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3listenerpb.Listener{})}, }, - wantErr: "Endpoints not found in received response", + wantErr: "xds: resource EndpointsResource does not exist", wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index 8e9375fcbbec..0e43f0261cd4 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -110,39 +110,42 @@ func (c *ClusterResourceData) Raw() *anypb.Any { // ClusterWatcher wraps the callbacks to be invoked for different events // corresponding to the cluster resource being watched. type ClusterWatcher interface { - // OnUpdate is invoked to report an update for the resource being watched. - OnUpdate(*ClusterResourceData, OnDoneFunc) - - // OnError is invoked under different error conditions including but not + // OnResourceChanged is invoked to notify the watcher of a new version of + // the resource received from the xDS server or an error indicating the + // reason why the resource cannot be obtained. + // + // It is invoked under different error conditions including but not // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource deserialization error - // - resource validation error - // - ADS stream failure - // - connection failure - OnError(error, OnDoneFunc) - - // OnResourceDoesNotExist is invoked for a specific error condition where - // the requested resource is not found on the xDS management server. - OnResourceDoesNotExist(OnDoneFunc) + // - authority mentioned in the resource is not found + // - resource name parsing error + // - resource validation error (if resource is not cached) + // - ADS stream failure (if resource is not cached) + // - connection failure (if resource is not cached) + OnResourceChanged(*ClusterResourceData, error, OnDoneFunc) + + // If resource is already cached, it is invoked under different error + // conditions including but not limited to the following: + // - resource validation error + // - ADS stream failure + // - connection failure + OnAmbientError(error, OnDoneFunc) } type delegatingClusterWatcher struct { watcher ClusterWatcher } -func (d *delegatingClusterWatcher) OnUpdate(data ResourceData, onDone OnDoneFunc) { +func (d *delegatingClusterWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { + if err != nil { + d.watcher.OnResourceChanged(nil, err, onDone) + return + } c := data.(*ClusterResourceData) - d.watcher.OnUpdate(c, onDone) -} - -func (d *delegatingClusterWatcher) OnError(err error, onDone OnDoneFunc) { - d.watcher.OnError(err, onDone) + d.watcher.OnResourceChanged(c, nil, onDone) } -func (d *delegatingClusterWatcher) OnResourceDoesNotExist(onDone OnDoneFunc) { - d.watcher.OnResourceDoesNotExist(onDone) +func (d *delegatingClusterWatcher) OnAmbientError(err error, onDone OnDoneFunc) { + d.watcher.OnAmbientError(err, onDone) } // WatchCluster uses xDS to discover the configuration associated with the diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index 94c03d0c5228..2f0faf5b70aa 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -106,39 +106,42 @@ func (e *EndpointsResourceData) Raw() *anypb.Any { // EndpointsWatcher wraps the callbacks to be invoked for different // events corresponding to the endpoints resource being watched. type EndpointsWatcher interface { - // OnUpdate is invoked to report an update for the resource being watched. - OnUpdate(*EndpointsResourceData, OnDoneFunc) - - // OnError is invoked under different error conditions including but not + // OnResourceChanged is invoked to notify the watcher of a new version of + // the resource received from the xDS server or an error indicating the + // reason why the resource cannot be obtained. + // + // It is invoked under different error conditions including but not // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource deserialization error - // - resource validation error - // - ADS stream failure - // - connection failure - OnError(error, OnDoneFunc) - - // OnResourceDoesNotExist is invoked for a specific error condition where - // the requested resource is not found on the xDS management server. - OnResourceDoesNotExist(OnDoneFunc) + // - authority mentioned in the resource is not found + // - resource name parsing error + // - resource validation error (if resource is not cached) + // - ADS stream failure (if resource is not cached) + // - connection failure (if resource is not cached) + OnResourceChanged(*EndpointsResourceData, error, OnDoneFunc) + + // If resource is already cached, it is invoked under different error + // conditions including but not limited to the following: + // - resource validation error + // - ADS stream failure + // - connection failure + OnAmbientError(error, OnDoneFunc) } type delegatingEndpointsWatcher struct { watcher EndpointsWatcher } -func (d *delegatingEndpointsWatcher) OnUpdate(data ResourceData, onDone OnDoneFunc) { +func (d *delegatingEndpointsWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { + if err != nil { + d.watcher.OnResourceChanged(nil, err, onDone) + return + } e := data.(*EndpointsResourceData) - d.watcher.OnUpdate(e, onDone) -} - -func (d *delegatingEndpointsWatcher) OnError(err error, onDone OnDoneFunc) { - d.watcher.OnError(err, onDone) + d.watcher.OnResourceChanged(e, nil, onDone) } -func (d *delegatingEndpointsWatcher) OnResourceDoesNotExist(onDone OnDoneFunc) { - d.watcher.OnResourceDoesNotExist(onDone) +func (d *delegatingEndpointsWatcher) OnAmbientError(err error, onDone OnDoneFunc) { + d.watcher.OnAmbientError(err, onDone) } // WatchEndpoints uses xDS to discover the configuration associated with the diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index e3ca1134a07b..07ddd5ae1bfc 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -143,39 +143,42 @@ func (l *ListenerResourceData) Raw() *anypb.Any { // ListenerWatcher wraps the callbacks to be invoked for different // events corresponding to the listener resource being watched. type ListenerWatcher interface { - // OnUpdate is invoked to report an update for the resource being watched. - OnUpdate(*ListenerResourceData, OnDoneFunc) - - // OnError is invoked under different error conditions including but not + // OnResourceChanged is invoked to notify the watcher of a new version of + // the resource received from the xDS server or an error indicating the + // reason why the resource cannot be obtained. + // + // It is invoked under different error conditions including but not // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource deserialization error - // - resource validation error - // - ADS stream failure - // - connection failure - OnError(error, OnDoneFunc) - - // OnResourceDoesNotExist is invoked for a specific error condition where - // the requested resource is not found on the xDS management server. - OnResourceDoesNotExist(OnDoneFunc) + // - authority mentioned in the resource is not found + // - resource name parsing error + // - resource validation error (if resource is not cached) + // - ADS stream failure (if resource is not cached) + // - connection failure (if resource is not cached) + OnResourceChanged(*ListenerResourceData, error, OnDoneFunc) + + // If resource is already cached, it is invoked under different error + // conditions including but not limited to the following: + // - resource validation error + // - ADS stream failure + // - connection failure + OnAmbientError(error, OnDoneFunc) } type delegatingListenerWatcher struct { watcher ListenerWatcher } -func (d *delegatingListenerWatcher) OnUpdate(data ResourceData, onDone OnDoneFunc) { +func (d *delegatingListenerWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { + if err != nil { + d.watcher.OnResourceChanged(nil, err, onDone) + return + } l := data.(*ListenerResourceData) - d.watcher.OnUpdate(l, onDone) -} - -func (d *delegatingListenerWatcher) OnError(err error, onDone OnDoneFunc) { - d.watcher.OnError(err, onDone) + d.watcher.OnResourceChanged(l, nil, onDone) } -func (d *delegatingListenerWatcher) OnResourceDoesNotExist(onDone OnDoneFunc) { - d.watcher.OnResourceDoesNotExist(onDone) +func (d *delegatingListenerWatcher) OnAmbientError(err error, onDone OnDoneFunc) { + d.watcher.OnAmbientError(err, onDone) } // WatchListener uses xDS to discover the configuration associated with the diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index e14f56f781d1..55b5f4a88430 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -58,27 +58,48 @@ type Producer interface { // from the xDS server. type OnDoneFunc func() -// ResourceWatcher wraps the callbacks to be invoked for different events -// corresponding to the resource being watched. +// ResourceWatcher is an interface that can to be implemented to wrap the +// callbacks to be invoked for different events corresponding to the resource +// being watched. type ResourceWatcher interface { - // OnUpdate is invoked to report an update for the resource being watched. + // OnResourceChanged is invoked to notify the watcher of a new version of + // the resource received from the xDS server or an error indicating the + // reason why the resource cannot be obtained. + // // The ResourceData parameter needs to be type asserted to the appropriate - // type for the resource being watched. - OnUpdate(ResourceData, OnDoneFunc) - - // OnError is invoked under different error conditions including but not + // type for the resource being watched. In case of error, the ResourceData + // is nil otherwise its not nil and error is nil but both will never be nil + // together. + // + // Watcher is expected to use the most recent value passed to + // OnResourceChanged(), regardless of whether that's a resource or an error + // i.e., if the watcher is given an error via OnResourceChanged(), that + // means it should stop using any previously delivered resource. + // + // It is invoked under different error conditions including but not // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource deserialization error - // - resource validation error - // - ADS stream failure - // - connection failure - OnError(error, OnDoneFunc) - - // OnResourceDoesNotExist is invoked for a specific error condition where - // the requested resource is not found on the xDS management server. - OnResourceDoesNotExist(OnDoneFunc) + // - authority mentioned in the resource is not found + // - resource name parsing error + // - resource validation error (if resource is not cached) + // - ADS stream failure (if resource is not cached) + // - connection failure (if resource is not cached) + OnResourceChanged(ResourceData, error, OnDoneFunc) + + // OnAmbientError is invoked to notify the watcher of an error that occurs + // after a resource has been received (i.e. we already have a cached + // resource) that should not modify the watcher’s use of that resource but + // that may be useful information about the ambient state of the XdsClient. + // In particular, the watcher should NOT stop using the previously seen + // resource, and the XdsClient will NOT remove the resource from its cache. + // However, the error message may be useful as additional context to + // include in errors that are being generated for other reasons. + // + // If resource is already cached, it is invoked under different error + // conditions including but not limited to the following: + // - resource validation error + // - ADS stream failure + // - connection failure + OnAmbientError(error, OnDoneFunc) } // TODO: Once the implementation is complete, rename this interface as diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index 98ac313288a2..25576903d96d 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -107,39 +107,42 @@ func (r *RouteConfigResourceData) Raw() *anypb.Any { // RouteConfigWatcher wraps the callbacks to be invoked for different // events corresponding to the route configuration resource being watched. type RouteConfigWatcher interface { - // OnUpdate is invoked to report an update for the resource being watched. - OnUpdate(*RouteConfigResourceData, OnDoneFunc) - - // OnError is invoked under different error conditions including but not + // OnResourceChanged is invoked to notify the watcher of a new version of + // the resource received from the xDS server or an error indicating the + // reason why the resource cannot be obtained. + // + // It is invoked under different error conditions including but not // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource deserialization error - // - resource validation error - // - ADS stream failure - // - connection failure - OnError(error, OnDoneFunc) - - // OnResourceDoesNotExist is invoked for a specific error condition where - // the requested resource is not found on the xDS management server. - OnResourceDoesNotExist(OnDoneFunc) + // - authority mentioned in the resource is not found + // - resource name parsing error + // - resource validation error (if resource is not cached) + // - ADS stream failure (if resource is not cached) + // - connection failure (if resource is not cached) + OnResourceChanged(*RouteConfigResourceData, error, OnDoneFunc) + + // If resource is already cached, it is invoked under different error + // conditions including but not limited to the following: + // - resource validation error + // - ADS stream failure + // - connection failure + OnAmbientError(error, OnDoneFunc) } type delegatingRouteConfigWatcher struct { watcher RouteConfigWatcher } -func (d *delegatingRouteConfigWatcher) OnUpdate(data ResourceData, onDone OnDoneFunc) { +func (d *delegatingRouteConfigWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { + if err != nil { + d.watcher.OnResourceChanged(nil, err, onDone) + return + } rc := data.(*RouteConfigResourceData) - d.watcher.OnUpdate(rc, onDone) -} - -func (d *delegatingRouteConfigWatcher) OnError(err error, onDone OnDoneFunc) { - d.watcher.OnError(err, onDone) + d.watcher.OnResourceChanged(rc, nil, onDone) } -func (d *delegatingRouteConfigWatcher) OnResourceDoesNotExist(onDone OnDoneFunc) { - d.watcher.OnResourceDoesNotExist(onDone) +func (d *delegatingRouteConfigWatcher) OnAmbientError(err error, onDone OnDoneFunc) { + d.watcher.OnAmbientError(err, onDone) } // WatchRouteConfig uses xDS to discover the configuration associated with the From 89caa23f7f363031d8efde8a89b7598cdf0ea294 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Wed, 8 Jan 2025 22:52:54 +0530 Subject: [PATCH 02/16] revert notififying watchers on stream close if one response is received --- xds/internal/xdsclient/authority.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 4e324208e0f8..c0723686b3bf 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -177,6 +177,17 @@ func (a *authority) handleADSStreamFailure(serverConfig *bootstrap.ServerConfig, a.logger.Infof("Connection to server %s failed with error: %v", serverConfig, err) } + // We do not consider it an error if the ADS stream was closed after having + // received a response on the stream. This is because there are legitimate + // reasons why the server may need to close the stream during normal + // operations, such as needing to rebalance load or the underlying + // connection hitting its max connection age limit. See gRFC A57 for more + // details. + if xdsresource.ErrType(err) == xdsresource.ErrTypeStreamFailedAfterRecv { + a.logger.Warningf("Watchers not notified since ADS stream failed after having received at least one response: %v", err) + return + } + // Propagate the connection error from the transport layer to all watchers. for _, rType := range a.resources { for _, state := range rType { From 25dce25c7ea0dc68c388c2acf652dafd130e051e Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Wed, 8 Jan 2025 23:28:47 +0530 Subject: [PATCH 03/16] revert notifying watcher if previous updated deleted it --- xds/internal/xdsclient/authority.go | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index c0723686b3bf..3313b9e45df8 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -436,15 +436,9 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig } if state.md.Status == xdsresource.ServiceStatusNotExist { // The metadata status is set to "ServiceStatusNotExist" if a - // previous update deleted this resource, in which case we - // want to send an ambient error. - for watcher := range state.watchers { - watcher := watcher - watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { - watcher.OnAmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: previous update deleted this resource"), done) - }) - } + // previous update deleted this resource, in which case we do not + // want to repeatedly call the watch callbacks with a + // "resource-not-found" error. continue } if serverConfig.ServerFeaturesIgnoreResourceDeletion() { From 4c7a2043f83da7fa2f4d4e205cd4506871047142 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Fri, 10 Jan 2025 21:55:51 +0530 Subject: [PATCH 04/16] update OnResourceChanged() param to ResourceDataOrErr --- xds/csds/csds_e2e_test.go | 10 ++++---- .../balancer/cdsbalancer/cluster_watcher.go | 9 ++++--- .../clusterresolver/resource_resolver_eds.go | 11 ++++---- xds/internal/resolver/watch_service.go | 18 +++++++------ xds/internal/server/listener_wrapper.go | 20 ++++++++------- xds/internal/server/rds_handler.go | 16 ++++++------ xds/internal/testutils/resource_watcher.go | 10 ++++---- xds/internal/xdsclient/authority.go | 14 +++++++---- xds/internal/xdsclient/clientimpl_watchers.go | 6 +++-- .../tests/ads_stream_flow_control_test.go | 6 ++--- .../xdsclient/tests/cds_watchers_test.go | 11 ++++---- .../xdsclient/tests/eds_watchers_test.go | 11 ++++---- .../xdsclient/tests/lds_watchers_test.go | 20 ++++++++------- .../xdsclient/tests/misc_watchers_test.go | 9 ++++--- .../xdsclient/tests/rds_watchers_test.go | 11 ++++---- .../xdsresource/cluster_resource_type.go | 12 ++++----- .../xdsresource/endpoints_resource_type.go | 12 ++++----- .../xdsresource/listener_resource_type.go | 12 ++++----- .../xdsclient/xdsresource/resource_type.go | 25 ++++++++++++------- .../xdsresource/route_config_resource_type.go | 12 ++++----- 20 files changed, 141 insertions(+), 114 deletions(-) diff --git a/xds/csds/csds_e2e_test.go b/xds/csds/csds_e2e_test.go index bd11580bb640..7d75a2f83339 100644 --- a/xds/csds/csds_e2e_test.go +++ b/xds/csds/csds_e2e_test.go @@ -70,7 +70,7 @@ func Test(t *testing.T) { type nopListenerWatcher struct{} -func (nopListenerWatcher) OnResourceChanged(_ *xdsresource.ListenerResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (nopListenerWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (nopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -79,7 +79,7 @@ func (nopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) type nopRouteConfigWatcher struct{} -func (nopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.RouteConfigResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (nopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (nopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -88,7 +88,7 @@ func (nopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFu type nopClusterWatcher struct{} -func (nopClusterWatcher) OnResourceChanged(_ *xdsresource.ClusterResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (nopClusterWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (nopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -97,7 +97,7 @@ func (nopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) type nopEndpointsWatcher struct{} -func (nopEndpointsWatcher) OnResourceChanged(_ *xdsresource.EndpointsResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (nopEndpointsWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (nopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -125,7 +125,7 @@ func newBlockingListenerWatcher(testCtxDone <-chan struct{}) *blockingListenerWa } } -func (w *blockingListenerWatcher) OnResourceChanged(_ *xdsresource.ListenerResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (w *blockingListenerWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { writeOnDone(w.testCtxDone, w.onDoneCh, onDone) } func (w *blockingListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { diff --git a/xds/internal/balancer/cdsbalancer/cluster_watcher.go b/xds/internal/balancer/cdsbalancer/cluster_watcher.go index e6d6c6d0d34a..f6aeff1f7ef0 100644 --- a/xds/internal/balancer/cdsbalancer/cluster_watcher.go +++ b/xds/internal/balancer/cdsbalancer/cluster_watcher.go @@ -32,13 +32,14 @@ type clusterWatcher struct { parent *cdsBalancer } -func (cw *clusterWatcher) OnResourceChanged(u *xdsresource.ClusterResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - handleError := func(context.Context) { cw.parent.onClusterResourceChangedError(cw.name, err); onDone() } +func (cw *clusterWatcher) OnResourceChanged(u *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if u.Err != nil { + handleError := func(context.Context) { cw.parent.onClusterResourceChangedError(cw.name, u.Err); onDone() } cw.parent.serializer.ScheduleOr(handleError, onDone) return } - handleUpdate := func(context.Context) { cw.parent.onClusterUpdate(cw.name, u.Resource); onDone() } + update := u.Data.(*xdsresource.ClusterResourceData) + handleUpdate := func(context.Context) { cw.parent.onClusterUpdate(cw.name, update.Resource); onDone() } cw.parent.serializer.ScheduleOr(handleUpdate, onDone) } diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go index 7ad3628ccc22..9bd551331a31 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go @@ -76,18 +76,18 @@ func newEDSResolver(nameToWatch string, producer xdsresource.Producer, topLevelR } // OnUpdate is invoked to report an update for the resource being watched. -func (er *edsDiscoveryMechanism) OnResourceChanged(update *xdsresource.EndpointsResourceData, err error, onDone xdsresource.OnDoneFunc) { +func (er *edsDiscoveryMechanism) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { if er.stopped.HasFired() { onDone() return } - if err != nil { + if update.Err != nil { if er.logger.V(2) { - if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + if xdsresource.ErrType(update.Err) == xdsresource.ErrorTypeResourceNotFound { er.logger.Infof("EDS discovery mechanism for resource %q reported resource-does-not-exist error", er.nameToWatch) } else { - er.logger.Infof("EDS discovery mechanism for resource %q reported on resource changed error: %v", er.nameToWatch, err) + er.logger.Infof("EDS discovery mechanism for resource %q reported on resource changed error: %v", er.nameToWatch, update.Err) } } // Report an empty update that would result in no priority child being @@ -105,7 +105,8 @@ func (er *edsDiscoveryMechanism) OnResourceChanged(update *xdsresource.Endpoints } er.mu.Lock() - er.update = &update.Resource + u := update.Data.(*xdsresource.EndpointsResourceData) + er.update = &u.Resource er.mu.Unlock() er.topLevelResolver.onUpdate(onDone) diff --git a/xds/internal/resolver/watch_service.go b/xds/internal/resolver/watch_service.go index 6b716ea08b4c..cddd571cf4f3 100644 --- a/xds/internal/resolver/watch_service.go +++ b/xds/internal/resolver/watch_service.go @@ -36,13 +36,14 @@ func newListenerWatcher(resourceName string, parent *xdsResolver) *listenerWatch return lw } -func (l *listenerWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - handleError := func(context.Context) { l.parent.onListenerResourceChangedError(err); onDone() } +func (l *listenerWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + handleError := func(context.Context) { l.parent.onListenerResourceChangedError(update.Err); onDone() } l.parent.serializer.ScheduleOr(handleError, onDone) return } - handleUpdate := func(context.Context) { l.parent.onListenerResourceUpdate(update.Resource); onDone() } + u := update.Data.(*xdsresource.ListenerResourceData) + handleUpdate := func(context.Context) { l.parent.onListenerResourceUpdate(u.Resource); onDone() } l.parent.serializer.ScheduleOr(handleUpdate, onDone) } @@ -68,14 +69,15 @@ func newRouteConfigWatcher(resourceName string, parent *xdsResolver) *routeConfi return rw } -func (r *routeConfigWatcher) OnResourceChanged(u *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - handleError := func(context.Context) { r.parent.onRouteConfigResourceChangedError(r.resourceName, err); onDone() } +func (r *routeConfigWatcher) OnResourceChanged(u *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if u.Err != nil { + handleError := func(context.Context) { r.parent.onRouteConfigResourceChangedError(r.resourceName, u.Err); onDone() } r.parent.serializer.ScheduleOr(handleError, onDone) return } handleUpdate := func(context.Context) { - r.parent.onRouteConfigResourceUpdate(r.resourceName, u.Resource) + update := u.Data.(*xdsresource.RouteConfigResourceData) + r.parent.onRouteConfigResourceUpdate(r.resourceName, update.Resource) onDone() } r.parent.serializer.ScheduleOr(handleUpdate, onDone) diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index a820a921afa1..19fe2acfe957 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -414,28 +414,30 @@ type ldsWatcher struct { name string } -func (lw *ldsWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { +func (lw *ldsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { defer onDone() if lw.parent.closed.HasFired() { - if err != nil { - lw.logger.Warningf("Resource %q received err: %#v after listener was closed", lw.name, err) + if update.Err != nil { + lw.logger.Warningf("Resource %q received err: %#v after listener was closed", lw.name, update.Err) } else { lw.logger.Warningf("Resource %q received update: %#v after listener was closed", lw.name, update) } return } if lw.logger.V(2) { - if err != nil { - lw.logger.Infof("LDS watch for resource %q received error: %#v", lw.name, err) + if update.Err != nil { + lw.logger.Infof("LDS watch for resource %q received error: %#v", lw.name, update.Err) } else { - lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, update.Resource) + u := update.Data.(*xdsresource.ListenerResourceData) + lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, u.Resource) } } - if err != nil { - lw.parent.onLDSResourceChangedError(err) + if update.Err != nil { + lw.parent.onLDSResourceChangedError(update.Err) return } - lw.parent.handleLDSUpdate(update.Resource) + u := update.Data.(*xdsresource.ListenerResourceData) + lw.parent.handleLDSUpdate(u.Resource) } func (lw *ldsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { diff --git a/xds/internal/server/rds_handler.go b/xds/internal/server/rds_handler.go index 998145b32767..90ba071ec226 100644 --- a/xds/internal/server/rds_handler.go +++ b/xds/internal/server/rds_handler.go @@ -147,7 +147,7 @@ type rdsWatcher struct { canceled bool // eats callbacks if true } -func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { +func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { defer onDone() rw.mu.Lock() if rw.canceled { @@ -156,17 +156,19 @@ func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceD } rw.mu.Unlock() if rw.logger.V(2) { - if err != nil { - rw.logger.Infof("RDS watch for resource %q received error: %#v", rw.routeName, err) + if update.Err != nil { + rw.logger.Infof("RDS watch for resource %q received error: %#v", rw.routeName, update.Err) } else { - rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, update.Resource) + u := update.Data.(*xdsresource.RouteConfigResourceData) + rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, u.Resource) } } - if err != nil { - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) + if update.Err != nil { + rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: update.Err}) return } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &update.Resource}) + u := update.Data.(*xdsresource.RouteConfigResourceData) + rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &u.Resource}) } func (rw *rdsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index 522b5d9f37a9..1c9fa5143d43 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -37,10 +37,10 @@ type TestResourceWatcher struct { // OnResourceChanged is invoked by the xDS client to report the latest update // or an error on the resource being watched. -func (w *TestResourceWatcher) OnResourceChanged(data xdsresource.ResourceData, err error, onDone xdsresource.OnDoneFunc) { +func (w *TestResourceWatcher) OnResourceChanged(update xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { defer onDone() - if err != nil { - if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { + if update.Err != nil { + if xdsresource.ErrType(update.Err) == xdsresource.ErrorTypeResourceNotFound { select { case <-w.ResourceDoesNotExistCh: default: @@ -52,7 +52,7 @@ func (w *TestResourceWatcher) OnResourceChanged(data xdsresource.ResourceData, e case <-w.ErrorCh: default: } - w.ErrorCh <- err + w.ErrorCh <- update.Err return } @@ -60,7 +60,7 @@ func (w *TestResourceWatcher) OnResourceChanged(data xdsresource.ResourceData, e case <-w.UpdateCh: default: } - w.UpdateCh <- &data + w.UpdateCh <- &update.Data } // OnAmbientError is invoked by the xDS client to report the latest error. diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 3313b9e45df8..977cdfa71e78 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -388,7 +388,9 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher resource := uErr.Resource watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnResourceChanged(resource, nil, done) }) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Data: resource}, done) + }) } } @@ -464,7 +466,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher watcherCnt.Add(1) funcsToSchedule = append(funcsToSchedule, func(context.Context) { - watcher.OnResourceChanged(nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource has been removed"), done) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource has been removed")}, done) }) } } @@ -508,7 +510,7 @@ func (a *authority) handleADSResourceDoesNotExist(rType xdsresource.Type, resour for watcher := range state.watchers { watcher := watcher a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName()), func() {}) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName())}, func() {}) }) } } @@ -645,7 +647,9 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // xdsClientSerializer callback. Hence making a copy of the cached // resource here for watchCallbackSerializer. resource := state.cache - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnResourceChanged(resource, nil, func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Data: resource}, func() {}) + }) } // If last update was NACK'd, notify the new watcher of error // immediately as well. @@ -663,7 +667,7 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // server does not have this resource, notify the new watcher. if state.md.Status == xdsresource.ServiceStatusNotExist { a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(nil, xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName()), func() {}) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName())}, func() {}) }) } cleanup = a.unwatchResource(rType, resourceName, watcher) diff --git a/xds/internal/xdsclient/clientimpl_watchers.go b/xds/internal/xdsclient/clientimpl_watchers.go index b21f89131296..22292dbbdc7d 100644 --- a/xds/internal/xdsclient/clientimpl_watchers.go +++ b/xds/internal/xdsclient/clientimpl_watchers.go @@ -45,7 +45,9 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, if err := c.resourceTypes.maybeRegister(rType); err != nil { logger.Warningf("Watch registered for name %q of type %q which is already registered", rType.TypeName(), resourceName) - c.serializer.TrySchedule(func(context.Context) { watcher.OnResourceChanged(nil, err, func() {}) }) + c.serializer.TrySchedule(func(context.Context) { + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: err}, func() {}) + }) return func() {} } @@ -54,7 +56,7 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, if a == nil { logger.Warningf("Watch registered for name %q of type %q, authority %q is not found", rType.TypeName(), resourceName, n.Authority) c.serializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(nil, fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName), func() {}) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName)}, func() {}) }) return func() {} } diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go index ffa6cdf09d03..ee6b610ecb55 100644 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go @@ -60,9 +60,9 @@ func newBLockingListenerWatcher() *blockingListenerWatcher { } } -func (lw *blockingListenerWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, done xdsresource.OnDoneFunc) { - if err != nil { - if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { +func (lw *blockingListenerWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, done xdsresource.OnDoneFunc) { + if update.Err != nil { + if xdsresource.ErrType(update.Err) == xdsresource.ErrorTypeResourceNotFound { // Notify receipt of resource not found. select { case lw.notFoundCh <- struct{}{}: diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index 165ca0057b6b..229e821d9d5a 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -44,7 +44,7 @@ import ( type noopClusterWatcher struct{} -func (noopClusterWatcher) OnResourceChanged(_ *xdsresource.ClusterResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (noopClusterWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (noopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -64,13 +64,14 @@ func newClusterWatcher() *clusterWatcher { return &clusterWatcher{updateCh: testutils.NewChannel()} } -func (cw *clusterWatcher) OnResourceChanged(update *xdsresource.ClusterResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - cw.updateCh.Replace(clusterUpdateErrTuple{err: err}) +func (cw *clusterWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + cw.updateCh.Replace(clusterUpdateErrTuple{err: update.Err}) onDone() return } - cw.updateCh.Send(clusterUpdateErrTuple{update: update.Resource}) + u := update.Data.(*xdsresource.ClusterResourceData) + cw.updateCh.Send(clusterUpdateErrTuple{update: u.Resource}) onDone() } diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index c6506ddf408a..12b9b004b76d 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -53,7 +53,7 @@ const ( type noopEndpointsWatcher struct{} -func (noopEndpointsWatcher) OnResourceChanged(_ *xdsresource.EndpointsResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (noopEndpointsWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (noopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -76,13 +76,14 @@ func newEndpointsWatcher() *endpointsWatcher { return &endpointsWatcher{updateCh: testutils.NewChannel()} } -func (ew *endpointsWatcher) OnResourceChanged(update *xdsresource.EndpointsResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - ew.updateCh.Replace(endpointsUpdateErrTuple{err: err}) +func (ew *endpointsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + ew.updateCh.Replace(endpointsUpdateErrTuple{err: update.Err}) onDone() return } - ew.updateCh.Send(endpointsUpdateErrTuple{update: update.Resource}) + u := update.Data.(*xdsresource.EndpointsResourceData) + ew.updateCh.Send(endpointsUpdateErrTuple{update: u.Resource}) onDone() } diff --git a/xds/internal/xdsclient/tests/lds_watchers_test.go b/xds/internal/xdsclient/tests/lds_watchers_test.go index b03e296e207e..ac913e01512f 100644 --- a/xds/internal/xdsclient/tests/lds_watchers_test.go +++ b/xds/internal/xdsclient/tests/lds_watchers_test.go @@ -48,7 +48,7 @@ import ( type noopListenerWatcher struct{} -func (noopListenerWatcher) OnResourceChanged(_ *xdsresource.ListenerResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (noopListenerWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (noopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -68,13 +68,14 @@ func newListenerWatcher() *listenerWatcher { return &listenerWatcher{updateCh: testutils.NewChannel()} } -func (lw *listenerWatcher) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - lw.updateCh.Replace(listenerUpdateErrTuple{err: err}) +func (lw *listenerWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + lw.updateCh.Replace(listenerUpdateErrTuple{err: update.Err}) onDone() return } - lw.updateCh.Send(listenerUpdateErrTuple{update: update.Resource}) + u := update.Data.(*xdsresource.ListenerResourceData) + lw.updateCh.Send(listenerUpdateErrTuple{update: u.Resource}) onDone() } @@ -97,13 +98,14 @@ func newListenerWatcherMultiple(size int) *listenerWatcherMultiple { return &listenerWatcherMultiple{updateCh: testutils.NewChannelWithSize(size)} } -func (lw *listenerWatcherMultiple) OnResourceChanged(update *xdsresource.ListenerResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - lw.updateCh.Send(listenerUpdateErrTuple{err: err}) +func (lw *listenerWatcherMultiple) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + lw.updateCh.Send(listenerUpdateErrTuple{err: update.Err}) onDone() return } - lw.updateCh.Send(listenerUpdateErrTuple{update: update.Resource}) + u := update.Data.(*xdsresource.ListenerResourceData) + lw.updateCh.Send(listenerUpdateErrTuple{update: u.Resource}) onDone() } diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go index 76e764421730..a77b59ae490a 100644 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ b/xds/internal/xdsclient/tests/misc_watchers_test.go @@ -69,13 +69,14 @@ func newTestRouteConfigWatcher(client xdsclient.XDSClient, name1, name2 string) } } -func (rw *testRouteConfigWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) +func (rw *testRouteConfigWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + rw.updateCh.Replace(routeConfigUpdateErrTuple{err: update.Err}) onDone() return } - rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) + rc := update.Data.(*xdsresource.RouteConfigResourceData) + rw.updateCh.Send(routeConfigUpdateErrTuple{update: rc.Resource}) rw.cancel1 = xdsresource.WatchRouteConfig(rw.client, rw.name1, rw.rcw1) rw.cancel2 = xdsresource.WatchRouteConfig(rw.client, rw.name2, rw.rcw2) diff --git a/xds/internal/xdsclient/tests/rds_watchers_test.go b/xds/internal/xdsclient/tests/rds_watchers_test.go index dfb161bb69a5..1facba7afbce 100644 --- a/xds/internal/xdsclient/tests/rds_watchers_test.go +++ b/xds/internal/xdsclient/tests/rds_watchers_test.go @@ -43,7 +43,7 @@ import ( type noopRouteConfigWatcher struct{} -func (noopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.RouteConfigResourceData, _ error, onDone xdsresource.OnDoneFunc) { +func (noopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { onDone() } func (noopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { @@ -63,13 +63,14 @@ func newRouteConfigWatcher() *routeConfigWatcher { return &routeConfigWatcher{updateCh: testutils.NewChannel()} } -func (rw *routeConfigWatcher) OnResourceChanged(update *xdsresource.RouteConfigResourceData, err error, onDone xdsresource.OnDoneFunc) { - if err != nil { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) +func (rw *routeConfigWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { + if update.Err != nil { + rw.updateCh.Replace(routeConfigUpdateErrTuple{err: update.Err}) onDone() return } - rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) + rc := update.Data.(*xdsresource.RouteConfigResourceData) + rw.updateCh.Send(routeConfigUpdateErrTuple{update: rc.Resource}) onDone() } diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index 0e43f0261cd4..1c45867a419e 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -121,7 +121,7 @@ type ClusterWatcher interface { // - resource validation error (if resource is not cached) // - ADS stream failure (if resource is not cached) // - connection failure (if resource is not cached) - OnResourceChanged(*ClusterResourceData, error, OnDoneFunc) + OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // If resource is already cached, it is invoked under different error // conditions including but not limited to the following: @@ -135,13 +135,13 @@ type delegatingClusterWatcher struct { watcher ClusterWatcher } -func (d *delegatingClusterWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { - if err != nil { - d.watcher.OnResourceChanged(nil, err, onDone) +func (d *delegatingClusterWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { + if update.Err != nil { + d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) return } - c := data.(*ClusterResourceData) - d.watcher.OnResourceChanged(c, nil, onDone) + c := update.Data.(*ClusterResourceData) + d.watcher.OnResourceChanged(&ResourceDataOrError{Data: c}, onDone) } func (d *delegatingClusterWatcher) OnAmbientError(err error, onDone OnDoneFunc) { diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index 2f0faf5b70aa..f92dc1a2734b 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -117,7 +117,7 @@ type EndpointsWatcher interface { // - resource validation error (if resource is not cached) // - ADS stream failure (if resource is not cached) // - connection failure (if resource is not cached) - OnResourceChanged(*EndpointsResourceData, error, OnDoneFunc) + OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // If resource is already cached, it is invoked under different error // conditions including but not limited to the following: @@ -131,13 +131,13 @@ type delegatingEndpointsWatcher struct { watcher EndpointsWatcher } -func (d *delegatingEndpointsWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { - if err != nil { - d.watcher.OnResourceChanged(nil, err, onDone) +func (d *delegatingEndpointsWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { + if update.Err != nil { + d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) return } - e := data.(*EndpointsResourceData) - d.watcher.OnResourceChanged(e, nil, onDone) + e := update.Data.(*EndpointsResourceData) + d.watcher.OnResourceChanged(&ResourceDataOrError{Data: e}, onDone) } func (d *delegatingEndpointsWatcher) OnAmbientError(err error, onDone OnDoneFunc) { diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index 07ddd5ae1bfc..af5a8564924d 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -154,7 +154,7 @@ type ListenerWatcher interface { // - resource validation error (if resource is not cached) // - ADS stream failure (if resource is not cached) // - connection failure (if resource is not cached) - OnResourceChanged(*ListenerResourceData, error, OnDoneFunc) + OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // If resource is already cached, it is invoked under different error // conditions including but not limited to the following: @@ -168,13 +168,13 @@ type delegatingListenerWatcher struct { watcher ListenerWatcher } -func (d *delegatingListenerWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { - if err != nil { - d.watcher.OnResourceChanged(nil, err, onDone) +func (d *delegatingListenerWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { + if update.Err != nil { + d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) return } - l := data.(*ListenerResourceData) - d.watcher.OnResourceChanged(l, nil, onDone) + l := update.Data.(*ListenerResourceData) + d.watcher.OnResourceChanged(&ResourceDataOrError{Data: l}, onDone) } func (d *delegatingListenerWatcher) OnAmbientError(err error, onDone OnDoneFunc) { diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index 55b5f4a88430..19542b8c1096 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -58,18 +58,25 @@ type Producer interface { // from the xDS server. type OnDoneFunc func() -// ResourceWatcher is an interface that can to be implemented to wrap the -// callbacks to be invoked for different events corresponding to the resource -// being watched. +// ResourceDataOrError is a struct that contains either ResourceData or error. +// It is used to represent the result of an xDS resource update. Exactly one of +// Data or Err will be non-nil. +type ResourceDataOrError struct { + Data ResourceData + Err error +} + +// ResourceWatcher wraps the callbacks to be invoked for different events +// corresponding to the resource being watched. type ResourceWatcher interface { // OnResourceChanged is invoked to notify the watcher of a new version of // the resource received from the xDS server or an error indicating the - // reason why the resource cannot be obtained. + // reason why the resource could not be obtained. // - // The ResourceData parameter needs to be type asserted to the appropriate - // type for the resource being watched. In case of error, the ResourceData - // is nil otherwise its not nil and error is nil but both will never be nil - // together. + // The ResourceData of the ResourceDataOrError needs to be type asserted to + // the appropriate type for the resource being watched. In case of error, + // the ResourceData is nil otherwise its not nil and error is nil but both + // will never be nil together. // // Watcher is expected to use the most recent value passed to // OnResourceChanged(), regardless of whether that's a resource or an error @@ -83,7 +90,7 @@ type ResourceWatcher interface { // - resource validation error (if resource is not cached) // - ADS stream failure (if resource is not cached) // - connection failure (if resource is not cached) - OnResourceChanged(ResourceData, error, OnDoneFunc) + OnResourceChanged(ResourceDataOrError, OnDoneFunc) // OnAmbientError is invoked to notify the watcher of an error that occurs // after a resource has been received (i.e. we already have a cached diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index 25576903d96d..2569e2b62a3a 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -118,7 +118,7 @@ type RouteConfigWatcher interface { // - resource validation error (if resource is not cached) // - ADS stream failure (if resource is not cached) // - connection failure (if resource is not cached) - OnResourceChanged(*RouteConfigResourceData, error, OnDoneFunc) + OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // If resource is already cached, it is invoked under different error // conditions including but not limited to the following: @@ -132,13 +132,13 @@ type delegatingRouteConfigWatcher struct { watcher RouteConfigWatcher } -func (d *delegatingRouteConfigWatcher) OnResourceChanged(data ResourceData, err error, onDone OnDoneFunc) { - if err != nil { - d.watcher.OnResourceChanged(nil, err, onDone) +func (d *delegatingRouteConfigWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { + if update.Err != nil { + d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) return } - rc := data.(*RouteConfigResourceData) - d.watcher.OnResourceChanged(rc, nil, onDone) + rc := update.Data.(*RouteConfigResourceData) + d.watcher.OnResourceChanged(&ResourceDataOrError{Data: rc}, onDone) } func (d *delegatingRouteConfigWatcher) OnAmbientError(err error, onDone OnDoneFunc) { From 1fdeb0da8576fc8697c6b94b68f1e49afff5f5c1 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Tue, 14 Jan 2025 01:31:29 +0530 Subject: [PATCH 05/16] easwars review 2 --- .../balancer/cdsbalancer/cdsbalancer.go | 2 ++ .../clusterresolver/resource_resolver_eds.go | 6 +---- xds/internal/resolver/xds_resolver.go | 12 ++-------- xds/internal/server/listener_wrapper.go | 14 +++++------ xds/internal/server/rds_handler.go | 14 +++++------ xds/internal/testutils/resource_watcher.go | 1 - xds/internal/xdsclient/authority.go | 6 ++--- .../xdsclient/tests/resource_update_test.go | 24 +++++++++---------- .../xdsresource/cluster_resource_type.go | 4 ++-- .../xdsresource/endpoints_resource_type.go | 4 ++-- .../xdsresource/listener_resource_type.go | 4 ++-- .../xdsclient/xdsresource/resource_type.go | 17 +++++++------ .../xdsresource/route_config_resource_type.go | 4 ++-- 13 files changed, 50 insertions(+), 62 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 6254833bd100..d72f365f8202 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -526,6 +526,8 @@ func (b *cdsBalancer) onClusterAmbientError(name string, err error) { // // Only executed in the context of a serializer callback. func (b *cdsBalancer) onClusterResourceChangedError(name string, err error) { + b.logger.Warningf("Cluster resource %q received error update: %v", name, err) + if b.childLB != nil { b.childLB.ResolverError(err) } else { diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go index 9bd551331a31..3aa757437d9b 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go @@ -84,11 +84,7 @@ func (er *edsDiscoveryMechanism) OnResourceChanged(update *xdsresource.ResourceD if update.Err != nil { if er.logger.V(2) { - if xdsresource.ErrType(update.Err) == xdsresource.ErrorTypeResourceNotFound { - er.logger.Infof("EDS discovery mechanism for resource %q reported resource-does-not-exist error", er.nameToWatch) - } else { - er.logger.Infof("EDS discovery mechanism for resource %q reported on resource changed error: %v", er.nameToWatch, update.Err) - } + er.logger.Infof("EDS discovery mechanism for resource %q reported on resource changed error: %v", er.nameToWatch, update.Err) } // Report an empty update that would result in no priority child being // created for this discovery mechanism. This would result in the priority diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index ea6a6bc62d4f..3795d625d526 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -528,11 +528,7 @@ func (r *xdsResolver) onListenerResourceAmbientError(err error) { // Only executed in the context of a serializer callback. func (r *xdsResolver) onListenerResourceChangedError(err error) { if r.logger.V(2) { - if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { - r.logger.Infof("Received resource-not-found-error for Listener resource %q", r.ldsResourceName) - } else { - r.logger.Infof("Received on-resource-changed error for Listener resource %q: %v", r.ldsResourceName, err) - } + r.logger.Infof("Received on-resource-changed error for Listener resource %q: %v", r.ldsResourceName, err) } r.listenerUpdateRecvd = false @@ -573,11 +569,7 @@ func (r *xdsResolver) onRouteConfigResourceAmbientError(name string, err error) // Only executed in the context of a serializer callback. func (r *xdsResolver) onRouteConfigResourceChangedError(name string, err error) { if r.logger.V(2) { - if xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { - r.logger.Infof("Received resource-not-found-error for RouteConfiguration resource %q", name) - } else { - r.logger.Infof("Received on-resource-changed error for RouteConfiguration resource %q: %v", name, err) - } + r.logger.Infof("Received on-resource-changed error for RouteConfiguration resource %q: %v", name, err) } if r.rdsResourceName != name { diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index 19fe2acfe957..b8c2fa477666 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -424,19 +424,17 @@ func (lw *ldsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, } return } - if lw.logger.V(2) { - if update.Err != nil { - lw.logger.Infof("LDS watch for resource %q received error: %#v", lw.name, update.Err) - } else { - u := update.Data.(*xdsresource.ListenerResourceData) - lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, u.Resource) - } - } if update.Err != nil { + if lw.logger.V(2) { + lw.logger.Infof("LDS watch for resource %q received error: %v", lw.name, update.Err) + } lw.parent.onLDSResourceChangedError(update.Err) return } u := update.Data.(*xdsresource.ListenerResourceData) + if update.Err != nil { + lw.logger.Infof("LDS watch for resource %q received update: %v", lw.name, u.Resource) + } lw.parent.handleLDSUpdate(u.Resource) } diff --git a/xds/internal/server/rds_handler.go b/xds/internal/server/rds_handler.go index 90ba071ec226..7999bcbc7140 100644 --- a/xds/internal/server/rds_handler.go +++ b/xds/internal/server/rds_handler.go @@ -155,19 +155,17 @@ func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, return } rw.mu.Unlock() - if rw.logger.V(2) { - if update.Err != nil { - rw.logger.Infof("RDS watch for resource %q received error: %#v", rw.routeName, update.Err) - } else { - u := update.Data.(*xdsresource.RouteConfigResourceData) - rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, u.Resource) - } - } if update.Err != nil { + if rw.logger.V(2) { + rw.logger.Infof("RDS watch for resource %q received error: %v", rw.routeName, update.Err) + } rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: update.Err}) return } u := update.Data.(*xdsresource.RouteConfigResourceData) + if rw.logger.V(2) { + rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, u.Resource) + } rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &u.Resource}) } diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index 1c9fa5143d43..50db5d63554a 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -54,7 +54,6 @@ func (w *TestResourceWatcher) OnResourceChanged(update xdsresource.ResourceDataO } w.ErrorCh <- update.Err return - } select { case <-w.UpdateCh: diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 977cdfa71e78..ed4540c4f5ed 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -466,7 +466,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher watcherCnt.Add(1) funcsToSchedule = append(funcsToSchedule, func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource has been removed")}, done) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName())}, done) }) } } @@ -510,7 +510,7 @@ func (a *authority) handleADSResourceDoesNotExist(rType xdsresource.Type, resour for watcher := range state.watchers { watcher := watcher a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName())}, func() {}) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName())}, func() {}) }) } } @@ -667,7 +667,7 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // server does not have this resource, notify the new watcher. if state.md.Status == xdsresource.ServiceStatusNotExist { a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %s does not exist", rType.TypeName())}, func() {}) + watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName())}, func() {}) }) } cleanup = a.unwatchResource(rType, resourceName, watcher) diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index 67681a1ce641..b66c66c35c69 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -161,7 +161,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "xds: resource ListenerResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ListenerResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -177,7 +177,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", VersionInfo: "1", }, - wantErr: "xds: resource ListenerResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ListenerResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -194,7 +194,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3routepb.RouteConfiguration{})}, }, - wantErr: "xds: resource ListenerResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ListenerResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -418,7 +418,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "xds: resource RouteConfigResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"RouteConfigResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -434,7 +434,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", VersionInfo: "1", }, - wantErr: "xds: resource RouteConfigResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"RouteConfigResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -451,7 +451,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3clusterpb.Cluster{})}, }, - wantErr: "xds: resource RouteConfigResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"RouteConfigResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -667,7 +667,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "xds: resource ClusterResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ClusterResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -683,7 +683,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", VersionInfo: "1", }, - wantErr: "xds: resource ClusterResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ClusterResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -700,7 +700,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3endpointpb.ClusterLoadAssignment{})}, }, - wantErr: "xds: resource ClusterResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ClusterResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -974,7 +974,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: "xds: resource EndpointsResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"EndpointsResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -990,7 +990,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", VersionInfo: "1", }, - wantErr: "xds: resource EndpointsResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"EndpointsResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1007,7 +1007,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3listenerpb.Listener{})}, }, - wantErr: "xds: resource EndpointsResource does not exist", + wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"EndpointsResource\" does not exist", resourceName1), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index 1c45867a419e..dbc107519b28 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -123,8 +123,8 @@ type ClusterWatcher interface { // - connection failure (if resource is not cached) OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - // If resource is already cached, it is invoked under different error - // conditions including but not limited to the following: + // OnAmbientError is invoked if resource is already cached under different + // error conditions including but not limited to the following: // - resource validation error // - ADS stream failure // - connection failure diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index f92dc1a2734b..1e21426e9ef3 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -119,8 +119,8 @@ type EndpointsWatcher interface { // - connection failure (if resource is not cached) OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - // If resource is already cached, it is invoked under different error - // conditions including but not limited to the following: + // OnAmbientError is invoked if resource is already cached under different + // error conditions including but not limited to the following: // - resource validation error // - ADS stream failure // - connection failure diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index af5a8564924d..5946082cfb66 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -156,8 +156,8 @@ type ListenerWatcher interface { // - connection failure (if resource is not cached) OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - // If resource is already cached, it is invoked under different error - // conditions including but not limited to the following: + // OnAmbientError is invoked if resource is already cached under different + // error conditions including but not limited to the following: // - resource validation error // - ADS stream failure // - connection failure diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index 19542b8c1096..6d79053b544f 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -73,15 +73,18 @@ type ResourceWatcher interface { // the resource received from the xDS server or an error indicating the // reason why the resource could not be obtained. // - // The ResourceData of the ResourceDataOrError needs to be type asserted to - // the appropriate type for the resource being watched. In case of error, - // the ResourceData is nil otherwise its not nil and error is nil but both - // will never be nil together. + // In the former case, this callback will be invoked with a non-nil + // ResourceData in ResourceDataOrError. The ResourceData of the + // ResourceDataOrError needs to be type asserted to the appropriate type + // for the resource being watched. + // + // In the latter case, this callback will be invoked with a non-nil error + // value in ResourceDataOrError. // // Watcher is expected to use the most recent value passed to - // OnResourceChanged(), regardless of whether that's a resource or an error - // i.e., if the watcher is given an error via OnResourceChanged(), that - // means it should stop using any previously delivered resource. + // OnResourceChanged(), regardless of whether that's a ResourceData or an + // error i.e., if the watcher is given an error via OnResourceChanged(), + // that means it should stop using any previously delivered resource. // // It is invoked under different error conditions including but not // limited to the following: diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index 2569e2b62a3a..8077748201ba 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -120,8 +120,8 @@ type RouteConfigWatcher interface { // - connection failure (if resource is not cached) OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - // If resource is already cached, it is invoked under different error - // conditions including but not limited to the following: + // OnAmbientError is invoked if resource is already cached under different + // error conditions including but not limited to the following: // - resource validation error // - ADS stream failure // - connection failure From 516a01f07db624e15c793ec7124d7bc9964fb98b Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Fri, 31 Jan 2025 16:43:15 +0530 Subject: [PATCH 06/16] update ResourceWatcher documentation --- .../xdsresource/cluster_resource_type.go | 21 ++++---- .../xdsresource/endpoints_resource_type.go | 21 ++++---- .../xdsresource/listener_resource_type.go | 21 ++++---- .../xdsclient/xdsresource/resource_type.go | 51 +++++-------------- .../xdsresource/route_config_resource_type.go | 22 ++++---- 5 files changed, 50 insertions(+), 86 deletions(-) diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index dbc107519b28..4c16900af86c 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -108,26 +108,23 @@ func (c *ClusterResourceData) Raw() *anypb.Any { } // ClusterWatcher wraps the callbacks to be invoked for different events -// corresponding to the cluster resource being watched. +// corresponding to the cluster resource being watched. gRFC A88 contains an +// exhaustive list of what method is invoked under what conditions. type ClusterWatcher interface { // OnResourceChanged is invoked to notify the watcher of a new version of // the resource received from the xDS server or an error indicating the // reason why the resource cannot be obtained. // - // It is invoked under different error conditions including but not - // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource validation error (if resource is not cached) - // - ADS stream failure (if resource is not cached) - // - connection failure (if resource is not cached) + // Upon receiving this, in case of an error, the watcher should + // stop using any previously seen resource. xDS client will remove the + // resource from its cache. OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // OnAmbientError is invoked if resource is already cached under different - // error conditions including but not limited to the following: - // - resource validation error - // - ADS stream failure - // - connection failure + // error conditions. + // + // Upon receiving this, the watcher may continue using the previously seen + // resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index 1e21426e9ef3..c74e26abc113 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -104,26 +104,23 @@ func (e *EndpointsResourceData) Raw() *anypb.Any { } // EndpointsWatcher wraps the callbacks to be invoked for different -// events corresponding to the endpoints resource being watched. +// events corresponding to the endpoints resource being watched. gRFC A88 +// contains an exhaustive list of what method is invoked under what conditions. type EndpointsWatcher interface { // OnResourceChanged is invoked to notify the watcher of a new version of // the resource received from the xDS server or an error indicating the // reason why the resource cannot be obtained. // - // It is invoked under different error conditions including but not - // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource validation error (if resource is not cached) - // - ADS stream failure (if resource is not cached) - // - connection failure (if resource is not cached) + // Upon receiving this, in case of an error, the watcher should + // stop using any previously seen resource. xDS client will remove the + // resource from its cache. OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // OnAmbientError is invoked if resource is already cached under different - // error conditions including but not limited to the following: - // - resource validation error - // - ADS stream failure - // - connection failure + // error conditions. + // + // Upon receiving this, the watcher may continue using the previously seen + // resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index 5946082cfb66..8cc6a307d8eb 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -141,26 +141,23 @@ func (l *ListenerResourceData) Raw() *anypb.Any { } // ListenerWatcher wraps the callbacks to be invoked for different -// events corresponding to the listener resource being watched. +// events corresponding to the listener resource being watched. gRFC A88 +// contains an exhaustive list of what method is invoked under what conditions. type ListenerWatcher interface { // OnResourceChanged is invoked to notify the watcher of a new version of // the resource received from the xDS server or an error indicating the // reason why the resource cannot be obtained. // - // It is invoked under different error conditions including but not - // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource validation error (if resource is not cached) - // - ADS stream failure (if resource is not cached) - // - connection failure (if resource is not cached) + // Upon receiving this, in case of an error, the watcher should + // stop using any previously seen resource. xDS client will remove the + // resource from its cache. OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // OnAmbientError is invoked if resource is already cached under different - // error conditions including but not limited to the following: - // - resource validation error - // - ADS stream failure - // - connection failure + // error conditions. + // + // Upon receiving this, the watcher may continue using the previously seen + // resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index 6d79053b544f..fcc85e2f699a 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -58,57 +58,32 @@ type Producer interface { // from the xDS server. type OnDoneFunc func() -// ResourceDataOrError is a struct that contains either ResourceData or error. -// It is used to represent the result of an xDS resource update. Exactly one of -// Data or Err will be non-nil. +// ResourceDataOrError contains either ResourceData or error. It is used to +// represent the result of an xDS resource update. Exactly one of Data or Err +// will be non-nil. type ResourceDataOrError struct { Data ResourceData Err error } // ResourceWatcher wraps the callbacks to be invoked for different events -// corresponding to the resource being watched. +// corresponding to the resource being watched. gRFC A88 contains an exhaustive +// list of what method is invoked under what conditions. type ResourceWatcher interface { // OnResourceChanged is invoked to notify the watcher of a new version of // the resource received from the xDS server or an error indicating the - // reason why the resource could not be obtained. + // reason why the resource cannot be obtained. // - // In the former case, this callback will be invoked with a non-nil - // ResourceData in ResourceDataOrError. The ResourceData of the - // ResourceDataOrError needs to be type asserted to the appropriate type - // for the resource being watched. - // - // In the latter case, this callback will be invoked with a non-nil error - // value in ResourceDataOrError. - // - // Watcher is expected to use the most recent value passed to - // OnResourceChanged(), regardless of whether that's a ResourceData or an - // error i.e., if the watcher is given an error via OnResourceChanged(), - // that means it should stop using any previously delivered resource. - // - // It is invoked under different error conditions including but not - // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource validation error (if resource is not cached) - // - ADS stream failure (if resource is not cached) - // - connection failure (if resource is not cached) + // Upon receiving this, in case of an error, the watcher should + // stop using any previously seen resource. xDS client will remove the + // resource from its cache. OnResourceChanged(ResourceDataOrError, OnDoneFunc) - // OnAmbientError is invoked to notify the watcher of an error that occurs - // after a resource has been received (i.e. we already have a cached - // resource) that should not modify the watcher’s use of that resource but - // that may be useful information about the ambient state of the XdsClient. - // In particular, the watcher should NOT stop using the previously seen - // resource, and the XdsClient will NOT remove the resource from its cache. - // However, the error message may be useful as additional context to - // include in errors that are being generated for other reasons. + // OnAmbientError is invoked if resource is already cached under different + // error conditions. // - // If resource is already cached, it is invoked under different error - // conditions including but not limited to the following: - // - resource validation error - // - ADS stream failure - // - connection failure + // Upon receiving this, the watcher may continue using the previously seen + // resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index 8077748201ba..60cddd9ba37e 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -105,26 +105,24 @@ func (r *RouteConfigResourceData) Raw() *anypb.Any { } // RouteConfigWatcher wraps the callbacks to be invoked for different -// events corresponding to the route configuration resource being watched. +// events corresponding to the route configuration resource being watched. gRFC +// A88 contains an exhaustive list of what method is invoked under what +// conditions. type RouteConfigWatcher interface { // OnResourceChanged is invoked to notify the watcher of a new version of // the resource received from the xDS server or an error indicating the // reason why the resource cannot be obtained. // - // It is invoked under different error conditions including but not - // limited to the following: - // - authority mentioned in the resource is not found - // - resource name parsing error - // - resource validation error (if resource is not cached) - // - ADS stream failure (if resource is not cached) - // - connection failure (if resource is not cached) + // Upon receiving this, in case of an error, the watcher should + // stop using any previously seen resource. xDS client will remove the + // resource from its cache. OnResourceChanged(*ResourceDataOrError, OnDoneFunc) // OnAmbientError is invoked if resource is already cached under different - // error conditions including but not limited to the following: - // - resource validation error - // - ADS stream failure - // - connection failure + // error conditions. + // + // Upon receiving this, the watcher may continue using the previously seen + // resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } From b9d2a920326072460d62453b6cf165067d317a30 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Tue, 4 Feb 2025 16:54:21 +0530 Subject: [PATCH 07/16] update watcher documentation to clarify stop/not stop using cached resource --- xds/internal/balancer/cdsbalancer/cdsbalancer.go | 12 ++++++------ .../xdsclient/xdsresource/cluster_resource_type.go | 4 ++-- .../xdsclient/xdsresource/endpoints_resource_type.go | 4 ++-- .../xdsclient/xdsresource/listener_resource_type.go | 4 ++-- xds/internal/xdsclient/xdsresource/resource_type.go | 4 ++-- .../xdsresource/route_config_resource_type.go | 4 ++-- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index d72f365f8202..43b2d88b2a4c 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -496,9 +496,9 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd } } -// Handles an error Cluster update from the xDS client. Propagates the error -// down to the child policy if one exists, or puts the channel in -// TRANSIENT_FAILURE. +// Handles an error Cluster update from the xDS client to not stop using the +// previously seen resource. Propagates the error down to the child policy +// if one exists, or puts the channel in TRANSIENT_FAILURE. // // Only executed in the context of a serializer callback. func (b *cdsBalancer) onClusterAmbientError(name string, err error) { @@ -520,9 +520,9 @@ func (b *cdsBalancer) onClusterAmbientError(name string, err error) { } } -// Handles a resource-not-found error from the xDS client. Propagates the error -// down to the child policy if one exists, or puts the channel in -// TRANSIENT_FAILURE. +// Handles an error Cluster update from the xDS client to stop using the +// previously seen resource. Propagates the error down to the child policy +// if one exists, or puts the channel in TRANSIENT_FAILURE. // // Only executed in the context of a serializer callback. func (b *cdsBalancer) onClusterResourceChangedError(name string, err error) { diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index 4c16900af86c..e28c5768fe3d 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -123,8 +123,8 @@ type ClusterWatcher interface { // OnAmbientError is invoked if resource is already cached under different // error conditions. // - // Upon receiving this, the watcher may continue using the previously seen - // resource. xDS client will not remove the resource from its cache. + // Upon receiving this, the watcher should not stop using the previously + // seen resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index c74e26abc113..8854bb590060 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -119,8 +119,8 @@ type EndpointsWatcher interface { // OnAmbientError is invoked if resource is already cached under different // error conditions. // - // Upon receiving this, the watcher may continue using the previously seen - // resource. xDS client will not remove the resource from its cache. + // Upon receiving this, the watcher should not stop using the previously + // seen resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index 8cc6a307d8eb..89146bfe8ec4 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -156,8 +156,8 @@ type ListenerWatcher interface { // OnAmbientError is invoked if resource is already cached under different // error conditions. // - // Upon receiving this, the watcher may continue using the previously seen - // resource. xDS client will not remove the resource from its cache. + // Upon receiving this, the watcher should not stop using the previously + // seen resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index fcc85e2f699a..2ed3cbe58d9b 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -82,8 +82,8 @@ type ResourceWatcher interface { // OnAmbientError is invoked if resource is already cached under different // error conditions. // - // Upon receiving this, the watcher may continue using the previously seen - // resource. xDS client will not remove the resource from its cache. + // Upon receiving this, the watcher should not stop using the previously + // seen resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index 60cddd9ba37e..45cb4db9b096 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -121,8 +121,8 @@ type RouteConfigWatcher interface { // OnAmbientError is invoked if resource is already cached under different // error conditions. // - // Upon receiving this, the watcher may continue using the previously seen - // resource. xDS client will not remove the resource from its cache. + // Upon receiving this, the watcher should not stop using the previously + // seen resource. xDS client will not remove the resource from its cache. OnAmbientError(error, OnDoneFunc) } From 726ce38c3c33db3fa94af73d7270e92d001e96d3 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Wed, 26 Mar 2025 22:16:05 +0530 Subject: [PATCH 08/16] change based on final resource watcher interface with separate callbacks for ambient and resource error --- xds/csds/csds_e2e_test.go | 43 ++++++++++----- .../balancer/cdsbalancer/cdsbalancer.go | 2 +- .../balancer/cdsbalancer/cluster_watcher.go | 17 +++--- .../clusterresolver/resource_resolver.go | 8 +-- .../clusterresolver/resource_resolver_eds.go | 42 +++++++------- xds/internal/resolver/watch_service.go | 34 ++++++------ xds/internal/resolver/xds_resolver.go | 12 ++-- xds/internal/server/listener_wrapper.go | 38 ++++++------- xds/internal/server/rds_handler.go | 30 +++++----- xds/internal/testutils/resource_watcher.go | 37 ++++++------- xds/internal/xdsclient/authority.go | 30 ++++++---- xds/internal/xdsclient/clientimpl_watchers.go | 4 +- .../tests/ads_stream_flow_control_test.go | 50 +++++++---------- .../xdsclient/tests/cds_watchers_test.go | 24 ++++---- .../xdsclient/tests/eds_watchers_test.go | 23 ++++---- .../xdsclient/tests/lds_watchers_test.go | 41 +++++++------- .../xdsclient/tests/misc_watchers_test.go | 17 +++--- .../xdsclient/tests/rds_watchers_test.go | 24 ++++---- .../xdsresource/cluster_resource_type.go | 47 ++++++++-------- .../xdsresource/endpoints_resource_type.go | 47 ++++++++-------- .../xdsresource/listener_resource_type.go | 46 ++++++++-------- .../xdsclient/xdsresource/resource_type.go | 55 ++++++++----------- .../xdsresource/route_config_resource_type.go | 47 ++++++++-------- 23 files changed, 361 insertions(+), 357 deletions(-) diff --git a/xds/csds/csds_e2e_test.go b/xds/csds/csds_e2e_test.go index 7d75a2f83339..4a776a54411b 100644 --- a/xds/csds/csds_e2e_test.go +++ b/xds/csds/csds_e2e_test.go @@ -70,37 +70,49 @@ func Test(t *testing.T) { type nopListenerWatcher struct{} -func (nopListenerWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (nopListenerWatcher) ResourceChanged(_ *xdsresource.ListenerResourceData, onDone func()) { onDone() } -func (nopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (nopListenerWatcher) ResourceError(_ error, onDone func()) { + onDone() +} +func (nopListenerWatcher) AmbientError(_ error, onDone func()) { onDone() } type nopRouteConfigWatcher struct{} -func (nopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (nopRouteConfigWatcher) ResourceChanged(_ *xdsresource.RouteConfigResourceData, onDone func()) { onDone() } -func (nopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (nopRouteConfigWatcher) ResourceError(_ error, onDone func()) { + onDone() +} +func (nopRouteConfigWatcher) AmbientError(_ error, onDone func()) { onDone() } type nopClusterWatcher struct{} -func (nopClusterWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (nopClusterWatcher) ResourceChanged(_ *xdsresource.ClusterResourceData, onDone func()) { + onDone() +} +func (nopClusterWatcher) ResourceError(_ error, onDone func()) { onDone() } -func (nopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (nopClusterWatcher) AmbientError(_ error, onDone func()) { onDone() } type nopEndpointsWatcher struct{} -func (nopEndpointsWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (nopEndpointsWatcher) ResourceChanged(_ *xdsresource.EndpointsResourceData, onDone func()) { + onDone() +} +func (nopEndpointsWatcher) ResourceError(_ error, onDone func()) { onDone() } -func (nopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (nopEndpointsWatcher) AmbientError(_ error, onDone func()) { onDone() } @@ -114,28 +126,31 @@ func (nopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc // for ADS stream level flow control), and was causing CSDS to not receive any // updates from the xDS client. type blockingListenerWatcher struct { - testCtxDone <-chan struct{} // Closed when the test is done. - onDoneCh chan xdsresource.OnDoneFunc // Channel to write the onDone callback to. + testCtxDone <-chan struct{} // Closed when the test is done. + onDoneCh chan func() // Channel to write the onDone callback to. } func newBlockingListenerWatcher(testCtxDone <-chan struct{}) *blockingListenerWatcher { return &blockingListenerWatcher{ testCtxDone: testCtxDone, - onDoneCh: make(chan xdsresource.OnDoneFunc, 1), + onDoneCh: make(chan func(), 1), } } -func (w *blockingListenerWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (w *blockingListenerWatcher) ResourceChanged(_ *xdsresource.ListenerResourceData, onDone func()) { + writeOnDone(w.testCtxDone, w.onDoneCh, onDone) +} +func (w *blockingListenerWatcher) ResourceError(_ error, onDone func()) { writeOnDone(w.testCtxDone, w.onDoneCh, onDone) } -func (w *blockingListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (w *blockingListenerWatcher) AmbientError(_ error, onDone func()) { writeOnDone(w.testCtxDone, w.onDoneCh, onDone) } // writeOnDone attempts to write the onDone callback on the onDone channel. It // returns when it can successfully write to the channel or when the test is // done, which is signalled by testCtxDone being closed. -func writeOnDone(testCtxDone <-chan struct{}, onDoneCh chan xdsresource.OnDoneFunc, onDone xdsresource.OnDoneFunc) { +func writeOnDone(testCtxDone <-chan struct{}, onDoneCh chan func(), onDone func()) { select { case <-testCtxDone: case onDoneCh <- onDone: diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 43b2d88b2a4c..a9cb27c9d8b3 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -525,7 +525,7 @@ func (b *cdsBalancer) onClusterAmbientError(name string, err error) { // if one exists, or puts the channel in TRANSIENT_FAILURE. // // Only executed in the context of a serializer callback. -func (b *cdsBalancer) onClusterResourceChangedError(name string, err error) { +func (b *cdsBalancer) onClusterResourceError(name string, err error) { b.logger.Warningf("Cluster resource %q received error update: %v", name, err) if b.childLB != nil { diff --git a/xds/internal/balancer/cdsbalancer/cluster_watcher.go b/xds/internal/balancer/cdsbalancer/cluster_watcher.go index f6aeff1f7ef0..a9adea0c8040 100644 --- a/xds/internal/balancer/cdsbalancer/cluster_watcher.go +++ b/xds/internal/balancer/cdsbalancer/cluster_watcher.go @@ -32,18 +32,17 @@ type clusterWatcher struct { parent *cdsBalancer } -func (cw *clusterWatcher) OnResourceChanged(u *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if u.Err != nil { - handleError := func(context.Context) { cw.parent.onClusterResourceChangedError(cw.name, u.Err); onDone() } - cw.parent.serializer.ScheduleOr(handleError, onDone) - return - } - update := u.Data.(*xdsresource.ClusterResourceData) - handleUpdate := func(context.Context) { cw.parent.onClusterUpdate(cw.name, update.Resource); onDone() } +func (cw *clusterWatcher) ResourceChanged(u *xdsresource.ClusterResourceData, onDone func()) { + handleUpdate := func(context.Context) { cw.parent.onClusterUpdate(cw.name, u.Resource); onDone() } cw.parent.serializer.ScheduleOr(handleUpdate, onDone) } -func (cw *clusterWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (cw *clusterWatcher) ResourceError(err error, onDone func()) { + handleResourceError := func(context.Context) { cw.parent.onClusterResourceError(cw.name, err); onDone() } + cw.parent.serializer.ScheduleOr(handleResourceError, onDone) +} + +func (cw *clusterWatcher) AmbientError(err error, onDone func()) { handleError := func(context.Context) { cw.parent.onClusterAmbientError(cw.name, err); onDone() } cw.parent.serializer.ScheduleOr(handleError, onDone) } diff --git a/xds/internal/balancer/clusterresolver/resource_resolver.go b/xds/internal/balancer/clusterresolver/resource_resolver.go index d9315c3acef5..c1a656c59726 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver.go @@ -38,7 +38,7 @@ type resourceUpdate struct { priorities []priorityConfig // To be invoked once the update is completely processed, or is dropped in // favor of a newer update. - onDone xdsresource.OnDoneFunc + onDone func() } // topLevelResolver is used by concrete endpointsResolver implementations for @@ -50,7 +50,7 @@ type topLevelResolver interface { // endpointsResolver implementation. The onDone callback is to be invoked // once the update is completely processed, or is dropped in favor of a // newer update. - onUpdate(onDone xdsresource.OnDoneFunc) + onUpdate(onDone func()) } // endpointsResolver wraps the functionality to resolve a given resource name to @@ -282,7 +282,7 @@ func (rr *resourceResolver) stop(closing bool) { // clusterresolver LB policy. // // Caller must hold rr.mu. -func (rr *resourceResolver) generateLocked(onDone xdsresource.OnDoneFunc) { +func (rr *resourceResolver) generateLocked(onDone func()) { var ret []priorityConfig for _, rDM := range rr.children { u, ok := rDM.r.lastUpdate() @@ -312,7 +312,7 @@ func (rr *resourceResolver) generateLocked(onDone xdsresource.OnDoneFunc) { rr.updateChannel <- &resourceUpdate{priorities: ret, onDone: onDone} } -func (rr *resourceResolver) onUpdate(onDone xdsresource.OnDoneFunc) { +func (rr *resourceResolver) onUpdate(onDone func()) { handleUpdate := func(context.Context) { rr.mu.Lock() rr.generateLocked(onDone) diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go index 3aa757437d9b..f5a514251491 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go @@ -75,47 +75,49 @@ func newEDSResolver(nameToWatch string, producer xdsresource.Producer, topLevelR return ret } -// OnUpdate is invoked to report an update for the resource being watched. -func (er *edsDiscoveryMechanism) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +// OnResourceChanged is invoked to report an update for the resource being watched. +func (er *edsDiscoveryMechanism) ResourceChanged(update *xdsresource.EndpointsResourceData, onDone func()) { if er.stopped.HasFired() { onDone() return } - if update.Err != nil { - if er.logger.V(2) { - er.logger.Infof("EDS discovery mechanism for resource %q reported on resource changed error: %v", er.nameToWatch, update.Err) - } - // Report an empty update that would result in no priority child being - // created for this discovery mechanism. This would result in the priority - // LB policy reporting TRANSIENT_FAILURE (as there would be no priorities or - // localities) if this was the only discovery mechanism, or would result in - // the priority LB policy using a lower priority discovery mechanism when - // that becomes available. - er.mu.Lock() - er.update = &xdsresource.EndpointsUpdate{} - er.mu.Unlock() + er.mu.Lock() + er.update = &update.Resource + er.mu.Unlock() + + er.topLevelResolver.onUpdate(onDone) +} - er.topLevelResolver.onUpdate(onDone) +func (er *edsDiscoveryMechanism) ResourceError(err error, onDone func()) { + if er.stopped.HasFired() { + onDone() return } + er.logger.Warningf("EDS discovery mechanism for resource %q reported resource error: %v", er.nameToWatch, err) + + // Report an empty update that would result in no priority child being + // created for this discovery mechanism. This would result in the priority + // LB policy reporting TRANSIENT_FAILURE (as there would be no priorities or + // localities) if this was the only discovery mechanism, or would result in + // the priority LB policy using a lower priority discovery mechanism when + // that becomes available. er.mu.Lock() - u := update.Data.(*xdsresource.EndpointsResourceData) - er.update = &u.Resource + er.update = &xdsresource.EndpointsUpdate{} er.mu.Unlock() er.topLevelResolver.onUpdate(onDone) } -func (er *edsDiscoveryMechanism) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (er *edsDiscoveryMechanism) AmbientError(err error, onDone func()) { if er.stopped.HasFired() { onDone() return } if er.logger.V(2) { - er.logger.Infof("EDS discovery mechanism for resource %q reported error: %v", er.nameToWatch, err) + er.logger.Infof("EDS discovery mechanism for resource %q reported ambient error: %v", er.nameToWatch, err) } er.mu.Lock() diff --git a/xds/internal/resolver/watch_service.go b/xds/internal/resolver/watch_service.go index cddd571cf4f3..e8d52d0e0730 100644 --- a/xds/internal/resolver/watch_service.go +++ b/xds/internal/resolver/watch_service.go @@ -36,18 +36,17 @@ func newListenerWatcher(resourceName string, parent *xdsResolver) *listenerWatch return lw } -func (l *listenerWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - handleError := func(context.Context) { l.parent.onListenerResourceChangedError(update.Err); onDone() } - l.parent.serializer.ScheduleOr(handleError, onDone) - return - } - u := update.Data.(*xdsresource.ListenerResourceData) - handleUpdate := func(context.Context) { l.parent.onListenerResourceUpdate(u.Resource); onDone() } +func (l *listenerWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, onDone func()) { + handleUpdate := func(context.Context) { l.parent.onListenerResourceUpdate(update.Resource); onDone() } l.parent.serializer.ScheduleOr(handleUpdate, onDone) } -func (l *listenerWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (l *listenerWatcher) ResourceError(err error, onDone func()) { + handleError := func(context.Context) { l.parent.onListenerResourceError(err); onDone() } + l.parent.serializer.ScheduleOr(handleError, onDone) +} + +func (l *listenerWatcher) AmbientError(err error, onDone func()) { handleError := func(context.Context) { l.parent.onListenerResourceAmbientError(err); onDone() } l.parent.serializer.ScheduleOr(handleError, onDone) } @@ -69,21 +68,20 @@ func newRouteConfigWatcher(resourceName string, parent *xdsResolver) *routeConfi return rw } -func (r *routeConfigWatcher) OnResourceChanged(u *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if u.Err != nil { - handleError := func(context.Context) { r.parent.onRouteConfigResourceChangedError(r.resourceName, u.Err); onDone() } - r.parent.serializer.ScheduleOr(handleError, onDone) - return - } +func (r *routeConfigWatcher) ResourceChanged(u *xdsresource.RouteConfigResourceData, onDone func()) { handleUpdate := func(context.Context) { - update := u.Data.(*xdsresource.RouteConfigResourceData) - r.parent.onRouteConfigResourceUpdate(r.resourceName, update.Resource) + r.parent.onRouteConfigResourceUpdate(r.resourceName, u.Resource) onDone() } r.parent.serializer.ScheduleOr(handleUpdate, onDone) } -func (r *routeConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (r *routeConfigWatcher) ResourceError(err error, onDone func()) { + handleError := func(context.Context) { r.parent.onRouteConfigResourceError(r.resourceName, err); onDone() } + r.parent.serializer.ScheduleOr(handleError, onDone) +} + +func (r *routeConfigWatcher) AmbientError(err error, onDone func()) { handleError := func(context.Context) { r.parent.onRouteConfigResourceAmbientError(r.resourceName, err); onDone() } r.parent.serializer.ScheduleOr(handleError, onDone) } diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index 3795d625d526..8bf8487a7a9b 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -520,15 +520,15 @@ func (r *xdsResolver) onListenerResourceUpdate(update xdsresource.ListenerUpdate func (r *xdsResolver) onListenerResourceAmbientError(err error) { if r.logger.V(2) { - r.logger.Infof("Received error for Listener resource %q: %v", r.ldsResourceName, err) + r.logger.Infof("Received ambient error for Listener resource %q: %v", r.ldsResourceName, err) } r.onError(err) } // Only executed in the context of a serializer callback. -func (r *xdsResolver) onListenerResourceChangedError(err error) { +func (r *xdsResolver) onListenerResourceError(err error) { if r.logger.V(2) { - r.logger.Infof("Received on-resource-changed error for Listener resource %q: %v", r.ldsResourceName, err) + r.logger.Infof("Received resource error for Listener resource %q: %v", r.ldsResourceName, err) } r.listenerUpdateRecvd = false @@ -561,15 +561,15 @@ func (r *xdsResolver) onRouteConfigResourceUpdate(name string, update xdsresourc // Only executed in the context of a serializer callback. func (r *xdsResolver) onRouteConfigResourceAmbientError(name string, err error) { if r.logger.V(2) { - r.logger.Infof("Received error for RouteConfiguration resource %q: %v", name, err) + r.logger.Infof("Received ambient error for RouteConfiguration resource %q: %v", name, err) } r.onError(err) } // Only executed in the context of a serializer callback. -func (r *xdsResolver) onRouteConfigResourceChangedError(name string, err error) { +func (r *xdsResolver) onRouteConfigResourceError(name string, err error) { if r.logger.V(2) { - r.logger.Infof("Received on-resource-changed error for RouteConfiguration resource %q: %v", name, err) + r.logger.Infof("Received resource error for RouteConfiguration resource %q: %v", name, err) } if r.rdsResourceName != name { diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index b8c2fa477666..eb381fddb48f 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -397,7 +397,7 @@ func (l *listenerWrapper) switchModeLocked(newMode connectivity.ServingMode, err } } -func (l *listenerWrapper) onLDSResourceChangedError(err error) { +func (l *listenerWrapper) onLDSResourceError(err error) { l.mu.Lock() defer l.mu.Unlock() l.switchModeLocked(connectivity.ServingModeNotServing, err) @@ -414,38 +414,38 @@ type ldsWatcher struct { name string } -func (lw *ldsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (lw *ldsWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, onDone func()) { defer onDone() if lw.parent.closed.HasFired() { - if update.Err != nil { - lw.logger.Warningf("Resource %q received err: %#v after listener was closed", lw.name, update.Err) - } else { - lw.logger.Warningf("Resource %q received update: %#v after listener was closed", lw.name, update) - } + lw.logger.Warningf("Resource %q received update: %#v after listener was closed", lw.name, update) return } - if update.Err != nil { - if lw.logger.V(2) { - lw.logger.Infof("LDS watch for resource %q received error: %v", lw.name, update.Err) - } - lw.parent.onLDSResourceChangedError(update.Err) + if lw.logger.V(2) { + lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, update.Resource) + } + lw.parent.handleLDSUpdate(update.Resource) +} + +func (lw *ldsWatcher) ResourceError(err error, onDone func()) { + defer onDone() + if lw.parent.closed.HasFired() { + lw.logger.Warningf("Resource %q received resource error: %v after listener was closed", lw.name, err) return } - u := update.Data.(*xdsresource.ListenerResourceData) - if update.Err != nil { - lw.logger.Infof("LDS watch for resource %q received update: %v", lw.name, u.Resource) + if lw.logger.V(2) { + lw.logger.Infof("LDS watch for resource %q reported resource error: %v", lw.name, err) } - lw.parent.handleLDSUpdate(u.Resource) + lw.parent.onLDSResourceError(err) } -func (lw *ldsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (lw *ldsWatcher) AmbientError(err error, onDone func()) { defer onDone() if lw.parent.closed.HasFired() { - lw.logger.Warningf("Resource %q received error: %v after listener was closed", lw.name, err) + lw.logger.Warningf("Resource %q received ambient error: %v after listener was closed", lw.name, err) return } if lw.logger.V(2) { - lw.logger.Infof("LDS watch for resource %q reported error: %v", lw.name, err) + lw.logger.Infof("LDS watch for resource %q reported ambient error: %v", lw.name, err) } // For errors which are anything other than "resource-not-found", we // continue to use the old configuration. diff --git a/xds/internal/server/rds_handler.go b/xds/internal/server/rds_handler.go index 7999bcbc7140..2afde20ac2ac 100644 --- a/xds/internal/server/rds_handler.go +++ b/xds/internal/server/rds_handler.go @@ -147,7 +147,7 @@ type rdsWatcher struct { canceled bool // eats callbacks if true } -func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (rw *rdsWatcher) ResourceChanged(update *xdsresource.RouteConfigResourceData, onDone func()) { defer onDone() rw.mu.Lock() if rw.canceled { @@ -155,21 +155,25 @@ func (rw *rdsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, return } rw.mu.Unlock() - if update.Err != nil { - if rw.logger.V(2) { - rw.logger.Infof("RDS watch for resource %q received error: %v", rw.routeName, update.Err) - } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: update.Err}) - return - } - u := update.Data.(*xdsresource.RouteConfigResourceData) if rw.logger.V(2) { - rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, u.Resource) + rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, update.Resource) } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &u.Resource}) + rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &update.Resource}) +} + +func (rw *rdsWatcher) ResourceError(err error, onDone func()) { + defer onDone() + rw.mu.Lock() + if rw.canceled { + rw.mu.Unlock() + return + } + rw.mu.Unlock() + rw.logger.Warningf("RDS watch for resource %q reported resource error", rw.routeName) + rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) } -func (rw *rdsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (rw *rdsWatcher) AmbientError(err error, onDone func()) { defer onDone() rw.mu.Lock() if rw.canceled { @@ -178,7 +182,7 @@ func (rw *rdsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { } rw.mu.Unlock() if rw.logger.V(2) { - rw.logger.Infof("RDS watch for resource %q reported error: %v", rw.routeName, err) + rw.logger.Infof("RDS watch for resource %q reported ambient error: %v", rw.routeName, err) } rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) } diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index 50db5d63554a..9b20b7319ed9 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -35,35 +35,30 @@ type TestResourceWatcher struct { ResourceDoesNotExistCh chan struct{} } -// OnResourceChanged is invoked by the xDS client to report the latest update +// ResourceChanged is invoked by the xDS client to report the latest update // or an error on the resource being watched. -func (w *TestResourceWatcher) OnResourceChanged(update xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (w *TestResourceWatcher) ResourceChanged(data xdsresource.ResourceData, onDone func()) { defer onDone() - if update.Err != nil { - if xdsresource.ErrType(update.Err) == xdsresource.ErrorTypeResourceNotFound { - select { - case <-w.ResourceDoesNotExistCh: - default: - } - w.ResourceDoesNotExistCh <- struct{}{} - return - } - select { - case <-w.ErrorCh: - default: - } - w.ErrorCh <- update.Err - return - } select { case <-w.UpdateCh: default: } - w.UpdateCh <- &update.Data + w.UpdateCh <- &data + +} + +// ResourceError is invoked by the xDS client to report the latest error. +func (w *TestResourceWatcher) ResourceError(_ error, onDone func()) { + defer onDone() + select { + case <-w.ResourceDoesNotExistCh: + default: + } + w.ResourceDoesNotExistCh <- struct{}{} } -// OnAmbientError is invoked by the xDS client to report the latest error. -func (w *TestResourceWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +// AmbientError is invoked by the xDS client to report the latest error. +func (w *TestResourceWatcher) AmbientError(err error, onDone func()) { defer onDone() select { case <-w.ErrorCh: diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index ed4540c4f5ed..d72acb1d37ec 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -193,9 +193,15 @@ func (a *authority) handleADSStreamFailure(serverConfig *bootstrap.ServerConfig, for _, state := range rType { for watcher := range state.watchers { watcher := watcher - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnAmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) - }) + if state.cache == nil { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) + }) + } else { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { + watcher.AmbientError(xdsresource.NewErrorf(xdsresource.ErrorTypeConnection, "xds: error received from xDS stream: %v", err), func() {}) + }) + } } } } @@ -363,7 +369,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher err := uErr.Err watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.OnAmbientError(err, done) }) + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) }) } continue } @@ -389,7 +395,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig resource := uErr.Resource watcherCnt.Add(1) funcsToSchedule = append(funcsToSchedule, func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Data: resource}, done) + watcher.ResourceChanged(resource, done) }) } } @@ -466,7 +472,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher watcherCnt.Add(1) funcsToSchedule = append(funcsToSchedule, func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName())}, done) + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q has been removed", name, rType.TypeName()), done) }) } } @@ -510,7 +516,7 @@ func (a *authority) handleADSResourceDoesNotExist(rType xdsresource.Type, resour for watcher := range state.watchers { watcher := watcher a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName())}, func() {}) + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName()), func() {}) }) } } @@ -648,7 +654,7 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // resource here for watchCallbackSerializer. resource := state.cache a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Data: resource}, func() {}) + watcher.ResourceChanged(resource, func() {}) }) } // If last update was NACK'd, notify the new watcher of error @@ -661,13 +667,17 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w // xdsClientSerializer callback. Hence making a copy of the error // here for watchCallbackSerializer. err := state.md.ErrState.Err - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.OnAmbientError(err, func() {}) }) + if state.cache == nil { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) + } else { + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) }) + } } // If the metadata field is updated to indicate that the management // server does not have this resource, notify the new watcher. if state.md.Status == xdsresource.ServiceStatusNotExist { a.watcherCallbackSerializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName())}, func() {}) + watcher.ResourceError(xdsresource.NewErrorf(xdsresource.ErrorTypeResourceNotFound, "xds: resource %q of type %q does not exist", resourceName, rType.TypeName()), func() {}) }) } cleanup = a.unwatchResource(rType, resourceName, watcher) diff --git a/xds/internal/xdsclient/clientimpl_watchers.go b/xds/internal/xdsclient/clientimpl_watchers.go index 22292dbbdc7d..324c5ea55761 100644 --- a/xds/internal/xdsclient/clientimpl_watchers.go +++ b/xds/internal/xdsclient/clientimpl_watchers.go @@ -46,7 +46,7 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, if err := c.resourceTypes.maybeRegister(rType); err != nil { logger.Warningf("Watch registered for name %q of type %q which is already registered", rType.TypeName(), resourceName) c.serializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: err}, func() {}) + watcher.ResourceError(err, func() {}) }) return func() {} } @@ -56,7 +56,7 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, if a == nil { logger.Warningf("Watch registered for name %q of type %q, authority %q is not found", rType.TypeName(), resourceName, n.Authority) c.serializer.TrySchedule(func(context.Context) { - watcher.OnResourceChanged(xdsresource.ResourceDataOrError{Err: fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName)}, func() {}) + watcher.ResourceError(fmt.Errorf("authority %q not found in bootstrap config for resource %q", n.Authority, resourceName), func() {}) }) return func() {} } diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go index ee6b610ecb55..d22f4bdcae79 100644 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go @@ -45,43 +45,22 @@ import ( // DoneNotifier passed to the callback available to the test, thereby enabling // the test to block this watcher for as long as required. type blockingListenerWatcher struct { - doneNotifierCh chan xdsresource.OnDoneFunc // DoneNotifier passed to the callback. - updateCh chan struct{} // Written to when an update is received. - errorCh chan struct{} // Written to when an error is received. - notFoundCh chan struct{} // Written to when the resource is not found. + doneNotifierCh chan func() // DoneNotifier passed to the callback. + updateCh chan struct{} // Written to when an update is received. + errorCh chan struct{} // Written to when an error is received. + notFoundCh chan struct{} // Written to when the resource is not found. } func newBLockingListenerWatcher() *blockingListenerWatcher { return &blockingListenerWatcher{ - doneNotifierCh: make(chan xdsresource.OnDoneFunc, 1), + doneNotifierCh: make(chan func(), 1), updateCh: make(chan struct{}, 1), errorCh: make(chan struct{}, 1), notFoundCh: make(chan struct{}, 1), } } -func (lw *blockingListenerWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, done xdsresource.OnDoneFunc) { - if update.Err != nil { - if xdsresource.ErrType(update.Err) == xdsresource.ErrorTypeResourceNotFound { - // Notify receipt of resource not found. - select { - case lw.notFoundCh <- struct{}{}: - default: - } - } else { - select { - case lw.errorCh <- struct{}{}: - default: - } - } - - select { - case lw.doneNotifierCh <- done: - default: - } - - return - } +func (lw *blockingListenerWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, done func()) { // Notify receipt of the update. select { case lw.updateCh <- struct{}{}: @@ -94,7 +73,20 @@ func (lw *blockingListenerWatcher) OnResourceChanged(update *xdsresource.Resourc } } -func (lw *blockingListenerWatcher) OnAmbientError(err error, done xdsresource.OnDoneFunc) { +func (lw *blockingListenerWatcher) ResourceError(err error, done func()) { + // Notify receipt of an error. + select { + case lw.errorCh <- struct{}{}: + default: + } + + select { + case lw.doneNotifierCh <- done: + default: + } +} + +func (lw *blockingListenerWatcher) AmbientError(err error, done func()) { // Notify receipt of an error. select { case lw.errorCh <- struct{}{}: @@ -402,7 +394,7 @@ func (s) TestADSFlowControl_ResourceUpdates_MultipleResources(t *testing.T) { // guaranteed. So, we select on both of them and unblock the first watcher // whose callback is invoked. var otherWatcherUpdateCh chan struct{} - var otherWatcherDoneCh chan xdsresource.OnDoneFunc + var otherWatcherDoneCh chan func() select { case <-watcher1.updateCh: onDone := <-watcher1.doneNotifierCh diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index 229e821d9d5a..b5e5fa35919d 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -44,10 +44,13 @@ import ( type noopClusterWatcher struct{} -func (noopClusterWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (noopClusterWatcher) ResourceChanged(_ *xdsresource.ClusterResourceData, onDone func()) { onDone() } -func (noopClusterWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (noopClusterWatcher) ResourceError(_ error, onDone func()) { + onDone() +} +func (noopClusterWatcher) AmbientError(_ error, onDone func()) { onDone() } @@ -64,18 +67,12 @@ func newClusterWatcher() *clusterWatcher { return &clusterWatcher{updateCh: testutils.NewChannel()} } -func (cw *clusterWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - cw.updateCh.Replace(clusterUpdateErrTuple{err: update.Err}) - onDone() - return - } - u := update.Data.(*xdsresource.ClusterResourceData) - cw.updateCh.Send(clusterUpdateErrTuple{update: u.Resource}) +func (cw *clusterWatcher) ResourceChanged(update *xdsresource.ClusterResourceData, onDone func()) { + cw.updateCh.Send(clusterUpdateErrTuple{update: update.Resource}) onDone() } -func (cw *clusterWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (cw *clusterWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -84,6 +81,11 @@ func (cw *clusterWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFun onDone() } +func (cw *clusterWatcher) AmbientError(err error, onDone func()) { + cw.updateCh.Replace(clusterUpdateErrTuple{err: err}) + onDone() +} + // badClusterResource returns a cluster resource for the given name which // contains a config_source_specifier for the `lrs_server` field which is not // set to `self`, and hence is expected to be NACKed by the client. diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index 12b9b004b76d..ff67f05d257a 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -53,13 +53,13 @@ const ( type noopEndpointsWatcher struct{} -func (noopEndpointsWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (noopEndpointsWatcher) ResourceChanged(_ *xdsresource.EndpointsResourceData, onDone func()) { onDone() } -func (noopEndpointsWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (noopEndpointsWatcher) ResourceError(_ error, onDone func()) { onDone() } -func (noopEndpointsWatcher) OnResourceDoesNotExist(onDone xdsresource.OnDoneFunc) { +func (noopEndpointsWatcher) AmbientError(_ error, onDone func()) { onDone() } @@ -76,18 +76,12 @@ func newEndpointsWatcher() *endpointsWatcher { return &endpointsWatcher{updateCh: testutils.NewChannel()} } -func (ew *endpointsWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - ew.updateCh.Replace(endpointsUpdateErrTuple{err: update.Err}) - onDone() - return - } - u := update.Data.(*xdsresource.EndpointsResourceData) - ew.updateCh.Send(endpointsUpdateErrTuple{update: u.Resource}) +func (ew *endpointsWatcher) ResourceChanged(update *xdsresource.EndpointsResourceData, onDone func()) { + ew.updateCh.Send(endpointsUpdateErrTuple{update: update.Resource}) onDone() } -func (ew *endpointsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (ew *endpointsWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -96,6 +90,11 @@ func (ew *endpointsWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneF onDone() } +func (ew *endpointsWatcher) AmbientError(err error, onDone func()) { + ew.updateCh.Replace(endpointsUpdateErrTuple{err: err}) + onDone() +} + // badEndpointsResource returns a endpoints resource for the given // edsServiceName which contains an endpoint with a load_balancing weight of // `0`. This is expected to be NACK'ed by the xDS client. diff --git a/xds/internal/xdsclient/tests/lds_watchers_test.go b/xds/internal/xdsclient/tests/lds_watchers_test.go index ac913e01512f..920e6848962b 100644 --- a/xds/internal/xdsclient/tests/lds_watchers_test.go +++ b/xds/internal/xdsclient/tests/lds_watchers_test.go @@ -48,10 +48,13 @@ import ( type noopListenerWatcher struct{} -func (noopListenerWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (noopListenerWatcher) ResourceChanged(_ *xdsresource.ListenerResourceData, onDone func()) { onDone() } -func (noopListenerWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (noopListenerWatcher) ResourceError(_ error, onDone func()) { + onDone() +} +func (noopListenerWatcher) AmbientError(_ error, onDone func()) { onDone() } @@ -68,18 +71,12 @@ func newListenerWatcher() *listenerWatcher { return &listenerWatcher{updateCh: testutils.NewChannel()} } -func (lw *listenerWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - lw.updateCh.Replace(listenerUpdateErrTuple{err: update.Err}) - onDone() - return - } - u := update.Data.(*xdsresource.ListenerResourceData) - lw.updateCh.Send(listenerUpdateErrTuple{update: u.Resource}) +func (lw *listenerWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, onDone func()) { + lw.updateCh.Send(listenerUpdateErrTuple{update: update.Resource}) onDone() } -func (lw *listenerWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (lw *listenerWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -88,6 +85,11 @@ func (lw *listenerWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFu onDone() } +func (lw *listenerWatcher) AmbientError(err error, onDone func()) { + lw.updateCh.Replace(listenerUpdateErrTuple{err: err}) + onDone() +} + type listenerWatcherMultiple struct { updateCh *testutils.Channel } @@ -98,18 +100,17 @@ func newListenerWatcherMultiple(size int) *listenerWatcherMultiple { return &listenerWatcherMultiple{updateCh: testutils.NewChannelWithSize(size)} } -func (lw *listenerWatcherMultiple) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - lw.updateCh.Send(listenerUpdateErrTuple{err: update.Err}) - onDone() - return - } - u := update.Data.(*xdsresource.ListenerResourceData) - lw.updateCh.Send(listenerUpdateErrTuple{update: u.Resource}) +func (lw *listenerWatcherMultiple) ResourceChanged(update *xdsresource.ListenerResourceData, onDone func()) { + lw.updateCh.Send(listenerUpdateErrTuple{update: update.Resource}) + onDone() +} + +func (lw *listenerWatcherMultiple) ResourceError(err error, onDone func()) { + lw.updateCh.Send(listenerUpdateErrTuple{err: err}) onDone() } -func (lw *listenerWatcherMultiple) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (lw *listenerWatcherMultiple) AmbientError(err error, onDone func()) { lw.updateCh.Send(listenerUpdateErrTuple{err: err}) onDone() } diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go index a77b59ae490a..6091fb14438c 100644 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ b/xds/internal/xdsclient/tests/misc_watchers_test.go @@ -69,21 +69,15 @@ func newTestRouteConfigWatcher(client xdsclient.XDSClient, name1, name2 string) } } -func (rw *testRouteConfigWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: update.Err}) - onDone() - return - } - rc := update.Data.(*xdsresource.RouteConfigResourceData) - rw.updateCh.Send(routeConfigUpdateErrTuple{update: rc.Resource}) +func (rw *testRouteConfigWatcher) ResourceChanged(update *xdsresource.RouteConfigResourceData, onDone func()) { + rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) rw.cancel1 = xdsresource.WatchRouteConfig(rw.client, rw.name1, rw.rcw1) rw.cancel2 = xdsresource.WatchRouteConfig(rw.client, rw.name2, rw.rcw2) onDone() } -func (rw *testRouteConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (rw *testRouteConfigWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -92,6 +86,11 @@ func (rw *testRouteConfigWatcher) OnAmbientError(err error, onDone xdsresource.O onDone() } +func (rw *testRouteConfigWatcher) AmbientError(err error, onDone func()) { + rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) + onDone() +} + func (rw *testRouteConfigWatcher) cancel() { rw.cancel1() rw.cancel2() diff --git a/xds/internal/xdsclient/tests/rds_watchers_test.go b/xds/internal/xdsclient/tests/rds_watchers_test.go index 1facba7afbce..60b871a16077 100644 --- a/xds/internal/xdsclient/tests/rds_watchers_test.go +++ b/xds/internal/xdsclient/tests/rds_watchers_test.go @@ -43,10 +43,13 @@ import ( type noopRouteConfigWatcher struct{} -func (noopRouteConfigWatcher) OnResourceChanged(_ *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { +func (noopRouteConfigWatcher) ResourceChanged(_ *xdsresource.RouteConfigResourceData, onDone func()) { onDone() } -func (noopRouteConfigWatcher) OnAmbientError(_ error, onDone xdsresource.OnDoneFunc) { +func (noopRouteConfigWatcher) ResourceError(_ error, onDone func()) { + onDone() +} +func (noopRouteConfigWatcher) AmbientError(_ error, onDone func()) { onDone() } @@ -63,18 +66,12 @@ func newRouteConfigWatcher() *routeConfigWatcher { return &routeConfigWatcher{updateCh: testutils.NewChannel()} } -func (rw *routeConfigWatcher) OnResourceChanged(update *xdsresource.ResourceDataOrError, onDone xdsresource.OnDoneFunc) { - if update.Err != nil { - rw.updateCh.Replace(routeConfigUpdateErrTuple{err: update.Err}) - onDone() - return - } - rc := update.Data.(*xdsresource.RouteConfigResourceData) - rw.updateCh.Send(routeConfigUpdateErrTuple{update: rc.Resource}) +func (rw *routeConfigWatcher) ResourceChanged(update *xdsresource.RouteConfigResourceData, onDone func()) { + rw.updateCh.Send(routeConfigUpdateErrTuple{update: update.Resource}) onDone() } -func (rw *routeConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDoneFunc) { +func (rw *routeConfigWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` // here and in OnResourceDoesNotExist() simplifies tests which will have @@ -83,6 +80,11 @@ func (rw *routeConfigWatcher) OnAmbientError(err error, onDone xdsresource.OnDon onDone() } +func (rw *routeConfigWatcher) AmbientError(err error, onDone func()) { + rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) + onDone() +} + // badRouteConfigResource returns a RouteConfiguration resource for the given // routeName which contains a retry config with num_retries set to `0`. This is // expected to be NACK'ed by the xDS client. diff --git a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go index e28c5768fe3d..3d85c31ff433 100644 --- a/xds/internal/xdsclient/xdsresource/cluster_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/cluster_resource_type.go @@ -111,38 +111,37 @@ func (c *ClusterResourceData) Raw() *anypb.Any { // corresponding to the cluster resource being watched. gRFC A88 contains an // exhaustive list of what method is invoked under what conditions. type ClusterWatcher interface { - // OnResourceChanged is invoked to notify the watcher of a new version of - // the resource received from the xDS server or an error indicating the - // reason why the resource cannot be obtained. - // - // Upon receiving this, in case of an error, the watcher should - // stop using any previously seen resource. xDS client will remove the - // resource from its cache. - OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - - // OnAmbientError is invoked if resource is already cached under different - // error conditions. - // - // Upon receiving this, the watcher should not stop using the previously - // seen resource. xDS client will not remove the resource from its cache. - OnAmbientError(error, OnDoneFunc) + // ResourceChanged indicates a new version of the resource is available. + ResourceChanged(resource *ClusterResourceData, done func()) + + // ResourceError indicates an error occurred while trying to fetch or + // decode the associated resource. The previous version of the resource + // should be considered invalid. + ResourceError(err error, done func()) + + // AmbientError indicates an error occurred after a resource has been + // received that should not modify the use of that resource but may provide + // useful information about the state of the XDSClient for debugging + // purposes. The previous version of the resource should still be + // considered valid. + AmbientError(err error, done func()) } type delegatingClusterWatcher struct { watcher ClusterWatcher } -func (d *delegatingClusterWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { - if update.Err != nil { - d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) - return - } - c := update.Data.(*ClusterResourceData) - d.watcher.OnResourceChanged(&ResourceDataOrError{Data: c}, onDone) +func (d *delegatingClusterWatcher) ResourceChanged(data ResourceData, onDone func()) { + c := data.(*ClusterResourceData) + d.watcher.ResourceChanged(c, onDone) +} + +func (d *delegatingClusterWatcher) ResourceError(err error, onDone func()) { + d.watcher.ResourceError(err, onDone) } -func (d *delegatingClusterWatcher) OnAmbientError(err error, onDone OnDoneFunc) { - d.watcher.OnAmbientError(err, onDone) +func (d *delegatingClusterWatcher) AmbientError(err error, onDone func()) { + d.watcher.AmbientError(err, onDone) } // WatchCluster uses xDS to discover the configuration associated with the diff --git a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go index 8854bb590060..de574dd8d345 100644 --- a/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/endpoints_resource_type.go @@ -107,38 +107,37 @@ func (e *EndpointsResourceData) Raw() *anypb.Any { // events corresponding to the endpoints resource being watched. gRFC A88 // contains an exhaustive list of what method is invoked under what conditions. type EndpointsWatcher interface { - // OnResourceChanged is invoked to notify the watcher of a new version of - // the resource received from the xDS server or an error indicating the - // reason why the resource cannot be obtained. - // - // Upon receiving this, in case of an error, the watcher should - // stop using any previously seen resource. xDS client will remove the - // resource from its cache. - OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - - // OnAmbientError is invoked if resource is already cached under different - // error conditions. - // - // Upon receiving this, the watcher should not stop using the previously - // seen resource. xDS client will not remove the resource from its cache. - OnAmbientError(error, OnDoneFunc) + // ResourceChanged indicates a new version of the resource is available. + ResourceChanged(resource *EndpointsResourceData, done func()) + + // ResourceError indicates an error occurred while trying to fetch or + // decode the associated resource. The previous version of the resource + // should be considered invalid. + ResourceError(err error, done func()) + + // AmbientError indicates an error occurred after a resource has been + // received that should not modify the use of that resource but may provide + // useful information about the state of the XDSClient for debugging + // purposes. The previous version of the resource should still be + // considered valid. + AmbientError(err error, done func()) } type delegatingEndpointsWatcher struct { watcher EndpointsWatcher } -func (d *delegatingEndpointsWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { - if update.Err != nil { - d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) - return - } - e := update.Data.(*EndpointsResourceData) - d.watcher.OnResourceChanged(&ResourceDataOrError{Data: e}, onDone) +func (d *delegatingEndpointsWatcher) ResourceChanged(data ResourceData, onDone func()) { + e := data.(*EndpointsResourceData) + d.watcher.ResourceChanged(e, onDone) +} + +func (d *delegatingEndpointsWatcher) ResourceError(err error, onDone func()) { + d.watcher.ResourceError(err, onDone) } -func (d *delegatingEndpointsWatcher) OnAmbientError(err error, onDone OnDoneFunc) { - d.watcher.OnAmbientError(err, onDone) +func (d *delegatingEndpointsWatcher) AmbientError(err error, onDone func()) { + d.watcher.AmbientError(err, onDone) } // WatchEndpoints uses xDS to discover the configuration associated with the diff --git a/xds/internal/xdsclient/xdsresource/listener_resource_type.go b/xds/internal/xdsclient/xdsresource/listener_resource_type.go index 89146bfe8ec4..0f49e6c56a3a 100644 --- a/xds/internal/xdsclient/xdsresource/listener_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/listener_resource_type.go @@ -144,38 +144,36 @@ func (l *ListenerResourceData) Raw() *anypb.Any { // events corresponding to the listener resource being watched. gRFC A88 // contains an exhaustive list of what method is invoked under what conditions. type ListenerWatcher interface { - // OnResourceChanged is invoked to notify the watcher of a new version of - // the resource received from the xDS server or an error indicating the - // reason why the resource cannot be obtained. - // - // Upon receiving this, in case of an error, the watcher should - // stop using any previously seen resource. xDS client will remove the - // resource from its cache. - OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - - // OnAmbientError is invoked if resource is already cached under different - // error conditions. - // - // Upon receiving this, the watcher should not stop using the previously - // seen resource. xDS client will not remove the resource from its cache. - OnAmbientError(error, OnDoneFunc) + // ResourceChanged indicates a new version of the resource is available. + ResourceChanged(resource *ListenerResourceData, done func()) + + // ResourceError indicates an error occurred while trying to fetch or + // decode the associated resource. The previous version of the resource + // should be considered invalid. + ResourceError(err error, done func()) + + // AmbientError indicates an error occurred after a resource has been + // received that should not modify the use of that resource but may provide + // useful information about the state of the XDSClient for debugging + // purposes. The previous version of the resource should still be + // considered valid. + AmbientError(err error, done func()) } type delegatingListenerWatcher struct { watcher ListenerWatcher } -func (d *delegatingListenerWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { - if update.Err != nil { - d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) - return - } - l := update.Data.(*ListenerResourceData) - d.watcher.OnResourceChanged(&ResourceDataOrError{Data: l}, onDone) +func (d *delegatingListenerWatcher) ResourceChanged(data ResourceData, onDone func()) { + l := data.(*ListenerResourceData) + d.watcher.ResourceChanged(l, onDone) +} +func (d *delegatingListenerWatcher) ResourceError(err error, onDone func()) { + d.watcher.ResourceError(err, onDone) } -func (d *delegatingListenerWatcher) OnAmbientError(err error, onDone OnDoneFunc) { - d.watcher.OnAmbientError(err, onDone) +func (d *delegatingListenerWatcher) AmbientError(err error, onDone func()) { + d.watcher.AmbientError(err, onDone) } // WatchListener uses xDS to discover the configuration associated with the diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index 2ed3cbe58d9b..52da29a2f6ef 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -52,39 +52,30 @@ type Producer interface { WatchResource(rType Type, resourceName string, watcher ResourceWatcher) (cancel func()) } -// OnDoneFunc is a function to be invoked by watcher implementations upon -// completing the processing of a callback from the xDS client. Failure to -// invoke this callback prevents the xDS client from reading further messages -// from the xDS server. -type OnDoneFunc func() - -// ResourceDataOrError contains either ResourceData or error. It is used to -// represent the result of an xDS resource update. Exactly one of Data or Err -// will be non-nil. -type ResourceDataOrError struct { - Data ResourceData - Err error -} - -// ResourceWatcher wraps the callbacks to be invoked for different events -// corresponding to the resource being watched. gRFC A88 contains an exhaustive -// list of what method is invoked under what conditions. +// ResourceWatcher is notified of the resource updates and errors that are +// received by the xDS client from the management server. +// +// All methods contain a done parameter which should be called when processing +// of the update has completed. For example, if processing a resource requires +// watching new resources, those watches should be completed before done is +// called, which can happen after the ResourceWatcher method has returned. +// Failure to call done will prevent the xDS client from providing future +// ResourceWatcher notifications. type ResourceWatcher interface { - // OnResourceChanged is invoked to notify the watcher of a new version of - // the resource received from the xDS server or an error indicating the - // reason why the resource cannot be obtained. - // - // Upon receiving this, in case of an error, the watcher should - // stop using any previously seen resource. xDS client will remove the - // resource from its cache. - OnResourceChanged(ResourceDataOrError, OnDoneFunc) - - // OnAmbientError is invoked if resource is already cached under different - // error conditions. - // - // Upon receiving this, the watcher should not stop using the previously - // seen resource. xDS client will not remove the resource from its cache. - OnAmbientError(error, OnDoneFunc) + // ResourceChanged indicates a new version of the resource is available. + ResourceChanged(resourceData ResourceData, done func()) + + // ResourceError indicates an error occurred while trying to fetch or + // decode the associated resource. The previous version of the resource + // should be considered invalid. + ResourceError(err error, done func()) + + // AmbientError indicates an error occurred after a resource has been + // received that should not modify the use of that resource but may provide + // useful information about the state of the XDSClient for debugging + // purposes. The previous version of the resource should still be + // considered valid. + AmbientError(err error, done func()) } // TODO: Once the implementation is complete, rename this interface as diff --git a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go index 45cb4db9b096..c292b1b8ef2c 100644 --- a/xds/internal/xdsclient/xdsresource/route_config_resource_type.go +++ b/xds/internal/xdsclient/xdsresource/route_config_resource_type.go @@ -109,38 +109,37 @@ func (r *RouteConfigResourceData) Raw() *anypb.Any { // A88 contains an exhaustive list of what method is invoked under what // conditions. type RouteConfigWatcher interface { - // OnResourceChanged is invoked to notify the watcher of a new version of - // the resource received from the xDS server or an error indicating the - // reason why the resource cannot be obtained. - // - // Upon receiving this, in case of an error, the watcher should - // stop using any previously seen resource. xDS client will remove the - // resource from its cache. - OnResourceChanged(*ResourceDataOrError, OnDoneFunc) - - // OnAmbientError is invoked if resource is already cached under different - // error conditions. - // - // Upon receiving this, the watcher should not stop using the previously - // seen resource. xDS client will not remove the resource from its cache. - OnAmbientError(error, OnDoneFunc) + // ResourceChanged indicates a new version of the resource is available. + ResourceChanged(resource *RouteConfigResourceData, done func()) + + // ResourceError indicates an error occurred while trying to fetch or + // decode the associated resource. The previous version of the resource + // should be considered invalid. + ResourceError(err error, done func()) + + // AmbientError indicates an error occurred after a resource has been + // received that should not modify the use of that resource but may provide + // useful information about the state of the XDSClient for debugging + // purposes. The previous version of the resource should still be + // considered valid. + AmbientError(err error, done func()) } type delegatingRouteConfigWatcher struct { watcher RouteConfigWatcher } -func (d *delegatingRouteConfigWatcher) OnResourceChanged(update ResourceDataOrError, onDone OnDoneFunc) { - if update.Err != nil { - d.watcher.OnResourceChanged(&ResourceDataOrError{Err: update.Err}, onDone) - return - } - rc := update.Data.(*RouteConfigResourceData) - d.watcher.OnResourceChanged(&ResourceDataOrError{Data: rc}, onDone) +func (d *delegatingRouteConfigWatcher) ResourceChanged(data ResourceData, onDone func()) { + rc := data.(*RouteConfigResourceData) + d.watcher.ResourceChanged(rc, onDone) +} + +func (d *delegatingRouteConfigWatcher) ResourceError(err error, onDone func()) { + d.watcher.ResourceError(err, onDone) } -func (d *delegatingRouteConfigWatcher) OnAmbientError(err error, onDone OnDoneFunc) { - d.watcher.OnAmbientError(err, onDone) +func (d *delegatingRouteConfigWatcher) AmbientError(err error, onDone func()) { + d.watcher.AmbientError(err, onDone) } // WatchRouteConfig uses xDS to discover the configuration associated with the From 33280a08db2a642cf3f17e6e14e4a26d169528ce Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Thu, 27 Mar 2025 00:33:26 +0530 Subject: [PATCH 09/16] fix some tests --- internal/xds/bootstrap/bootstrap.go | 2 +- .../balancer/cdsbalancer/cdsbalancer_test.go | 4 ++-- .../clusterresolver/e2e_test/balancer_test.go | 2 +- .../clusterresolver/resource_resolver_eds.go | 2 +- xds/internal/resolver/xds_resolver.go | 2 +- xds/internal/testutils/resource_watcher.go | 15 +++++++++------ xds/internal/xdsclient/authority.go | 2 +- .../tests/ads_stream_flow_control_test.go | 4 ++-- xds/internal/xdsclient/tests/cds_watchers_test.go | 2 +- xds/internal/xdsclient/tests/eds_watchers_test.go | 2 +- xds/internal/xdsclient/tests/lds_watchers_test.go | 2 +- .../xdsclient/tests/misc_watchers_test.go | 2 +- xds/internal/xdsclient/tests/rds_watchers_test.go | 2 +- 13 files changed, 23 insertions(+), 20 deletions(-) diff --git a/internal/xds/bootstrap/bootstrap.go b/internal/xds/bootstrap/bootstrap.go index 69b7ee80dc86..142e803930e1 100644 --- a/internal/xds/bootstrap/bootstrap.go +++ b/internal/xds/bootstrap/bootstrap.go @@ -206,7 +206,7 @@ func (sc *ServerConfig) ServerFeatures() []string { // // This feature controls the behavior of the xDS client when the server deletes // a previously sent Listener or Cluster resource. If set, the xDS client will -// not invoke the watchers' OnResourceDoesNotExist() method when a resource is +// not invoke the watchers' ResourceError() method when a resource is // deleted, nor will it remove the existing resource value from its cache. func (sc *ServerConfig) ServerFeaturesIgnoreResourceDeletion() bool { for _, sf := range sc.serverFeatures { diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go index f76e99919479..eec9d378c297 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go @@ -813,7 +813,7 @@ func (s) TestResolverError(t *testing.T) { // Grab the wrapped connection from the listener wrapper. This will be used // to verify the connection is closed. - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout*100000) defer cancel() val, err := lis.NewConnCh.Receive(ctx) if err != nil { @@ -1003,7 +1003,7 @@ func (s) TestClusterUpdate_ResourceNotFound(t *testing.T) { // Ensure RPC fails with Unavailable status code and the error message is // meaningful and contains the xDS node ID. - wantErr := fmt.Sprintf("cluster %q not found", clusterName) + wantErr := fmt.Sprintf("resource %q of type %q has been removed", clusterName, "ClusterResource") _, err := client.EmptyCall(ctx, &testpb.Empty{}) if err := verifyRPCError(err, codes.Unavailable, wantErr, nodeID); err != nil { t.Fatal(err) diff --git a/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go b/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go index b9eed392e492..517915dd0a0c 100644 --- a/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go +++ b/xds/internal/balancer/clusterresolver/e2e_test/balancer_test.go @@ -279,7 +279,7 @@ func (s) TestErrorFromParentLB_ResourceNotFound(t *testing.T) { } // Ensure that RPCs start to fail with expected error. - wantErr := fmt.Sprintf("cluster %q not found", clusterName) + wantErr := fmt.Sprintf("resource %q of type %q has been removed", clusterName, "ClusterResource") for ; ctx.Err() == nil; <-time.After(defaultTestShortTimeout) { sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) defer sCancel() diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go index f5a514251491..eae98b2c5b53 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go @@ -75,7 +75,7 @@ func newEDSResolver(nameToWatch string, producer xdsresource.Producer, topLevelR return ret } -// OnResourceChanged is invoked to report an update for the resource being watched. +// ResourceChanged is invoked to report an update for the resource being watched. func (er *edsDiscoveryMechanism) ResourceChanged(update *xdsresource.EndpointsResourceData, onDone func()) { if er.stopped.HasFired() { onDone() diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index dc279be6294f..d30224a7aa53 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -593,7 +593,7 @@ func (r *xdsResolver) onRouteConfigResourceAmbientError(name string, err error) // Only executed in the context of a serializer callback. func (r *xdsResolver) onRouteConfigResourceError(name string, err error) { - r.logger.Warningf("Received resource error for RouteConfiguration resource %q", name) + r.logger.Warningf("Received resource error for RouteConfiguration resource %q: %v", name, err) if r.rdsResourceName != name { return diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index 9b20b7319ed9..b601386917d6 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -31,12 +31,11 @@ type TestResourceWatcher struct { UpdateCh chan *xdsresource.ResourceData // ErrorCh is the channel on which errors from the xDS client are delivered. ErrorCh chan error - // ResourceDoesNotExistCh is the channel used to indicate calls to OnResourceDoesNotExist + // ResourceDoesNotExistCh is the channel used to indicate calls to ResourceError ResourceDoesNotExistCh chan struct{} } -// ResourceChanged is invoked by the xDS client to report the latest update -// or an error on the resource being watched. +// ResourceChanged is invoked by the xDS client to report the latest update. func (w *TestResourceWatcher) ResourceChanged(data xdsresource.ResourceData, onDone func()) { defer onDone() select { @@ -47,17 +46,21 @@ func (w *TestResourceWatcher) ResourceChanged(data xdsresource.ResourceData, onD } -// ResourceError is invoked by the xDS client to report the latest error. -func (w *TestResourceWatcher) ResourceError(_ error, onDone func()) { +// ResourceError is invoked by the xDS client to report the latest error to +// stop watching the resource. +func (w *TestResourceWatcher) ResourceError(err error, onDone func()) { defer onDone() select { case <-w.ResourceDoesNotExistCh: + case <-w.ErrorCh: default: } + w.ErrorCh <- err w.ResourceDoesNotExistCh <- struct{}{} } -// AmbientError is invoked by the xDS client to report the latest error. +// AmbientError is invoked by the xDS client to report the latest ambient +// error. func (w *TestResourceWatcher) AmbientError(err error, onDone func()) { defer onDone() select { diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 299987a268da..868c0dc6311b 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -460,7 +460,7 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig // `ignore_resource_deletion` server feature is enabled through the // bootstrap configuration. If the resource deletion is to be // ignored, the resource is not removed from the cache and the - // corresponding OnResourceDoesNotExist() callback is not invoked on + // corresponding ResourceError() callback is not invoked on // the watchers. if !state.deletionIgnored { state.deletionIgnored = true diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go index 192209a74327..33b4011c21f0 100644 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go @@ -76,7 +76,7 @@ func (lw *blockingListenerWatcher) ResourceChanged(update *xdsresource.ListenerR func (lw *blockingListenerWatcher) ResourceError(err error, done func()) { // Notify receipt of an error. select { - case lw.errorCh <- struct{}{}: + case lw.notFoundCh <- struct{}{}: default: } @@ -518,7 +518,7 @@ func (s) TestADSFlowControl_ResourceErrors(t *testing.T) { } // Test ADS stream flow control with a single resource that is deleted from the -// management server and therefore the watcher's OnResourceDoesNotExist() +// management server and therefore the watcher's ResourceError() // callback is expected to be invoked. Verifies that no further reads are // attempted until the callback is completely handled by the watcher. func (s) TestADSFlowControl_ResourceDoesNotExist(t *testing.T) { diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index baba282c5325..02d50679e72a 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -75,7 +75,7 @@ func (cw *clusterWatcher) ResourceChanged(update *xdsresource.ClusterResourceDat func (cw *clusterWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in OnResourceDoesNotExist() simplifies tests which will have + // here and in ResourceError() simplifies tests which will have // access to the most recently received error. cw.updateCh.Replace(clusterUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index 9d7d2864d97e..063460f726ce 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -84,7 +84,7 @@ func (ew *endpointsWatcher) ResourceChanged(update *xdsresource.EndpointsResourc func (ew *endpointsWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in OnResourceDoesNotExist() simplifies tests which will have + // here and in ResourceError() simplifies tests which will have // access to the most recently received error. ew.updateCh.Replace(endpointsUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/lds_watchers_test.go b/xds/internal/xdsclient/tests/lds_watchers_test.go index 9bac4a1d2987..11fc0e3384df 100644 --- a/xds/internal/xdsclient/tests/lds_watchers_test.go +++ b/xds/internal/xdsclient/tests/lds_watchers_test.go @@ -79,7 +79,7 @@ func (lw *listenerWatcher) ResourceChanged(update *xdsresource.ListenerResourceD func (lw *listenerWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in OnResourceDoesNotExist() simplifies tests which will have + // here and in ResourceError() simplifies tests which will have // access to the most recently received error. lw.updateCh.Replace(listenerUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go index 524d4425f2c3..6709fe484f38 100644 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ b/xds/internal/xdsclient/tests/misc_watchers_test.go @@ -83,7 +83,7 @@ func (rw *testRouteConfigWatcher) ResourceChanged(update *xdsresource.RouteConfi func (rw *testRouteConfigWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in OnResourceDoesNotExist() simplifies tests which will have + // here and in ResourceError() simplifies tests which will have // access to the most recently received error. rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/rds_watchers_test.go b/xds/internal/xdsclient/tests/rds_watchers_test.go index 9d8f6d7882cb..30ff4da85720 100644 --- a/xds/internal/xdsclient/tests/rds_watchers_test.go +++ b/xds/internal/xdsclient/tests/rds_watchers_test.go @@ -74,7 +74,7 @@ func (rw *routeConfigWatcher) ResourceChanged(update *xdsresource.RouteConfigRes func (rw *routeConfigWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in OnResourceDoesNotExist() simplifies tests which will have + // here and in ResourceError() simplifies tests which will have // access to the most recently received error. rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) onDone() From 466d5fe75e7b5d0f5ec9d891ed712801bf1310b7 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Thu, 27 Mar 2025 02:02:42 +0530 Subject: [PATCH 10/16] ambient error when channel not found --- xds/internal/xdsclient/authority.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 868c0dc6311b..97efd7892b6f 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -616,7 +616,7 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w xdsChannel, err := a.xdsChannelToUse() if err != nil { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) }) return } From 479b7d350604c719bccb103fd5ba7ebc14428d2b Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Thu, 27 Mar 2025 09:39:46 +0530 Subject: [PATCH 11/16] handle lds error only for resource-not-found in listener wrapper --- xds/internal/server/listener_wrapper.go | 5 +++++ xds/internal/xdsclient/authority.go | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index bf1eaacf3fd2..b1fb7bfc2837 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -450,6 +450,11 @@ func (lw *ldsWatcher) ResourceError(err error, onDone func()) { if lw.logger.V(2) { lw.logger.Infof("LDS watch for resource %q reported resource error: %v", lw.name, err) } + if xdsresource.ErrType(err) != xdsresource.ErrorTypeResourceNotFound { + // For errors which are anything other than "resource-not-found", we + // continue to use the old configuration. + return + } lw.parent.onLDSResourceError(err) } diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 97efd7892b6f..868c0dc6311b 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -616,7 +616,7 @@ func (a *authority) watchResource(rType xdsresource.Type, resourceName string, w xdsChannel, err := a.xdsChannelToUse() if err != nil { - a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.AmbientError(err, func() {}) }) + a.watcherCallbackSerializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) return } From fcd69faa57ac6d48a25b11ba10fa1be261f143b7 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Fri, 4 Apr 2025 11:40:31 +0530 Subject: [PATCH 12/16] easwars review 3 --- xds/internal/balancer/cdsbalancer/cdsbalancer_test.go | 2 +- xds/internal/xdsclient/metrics_test.go | 6 +++--- xds/internal/xdsclient/tests/cds_watchers_test.go | 2 +- xds/internal/xdsclient/tests/eds_watchers_test.go | 2 +- xds/internal/xdsclient/tests/lds_watchers_test.go | 2 +- xds/internal/xdsclient/tests/misc_watchers_test.go | 2 +- xds/internal/xdsclient/tests/rds_watchers_test.go | 2 +- xds/internal/xdsclient/tests/resource_update_test.go | 6 +++--- xds/internal/xdsclient/xdsresource/resource_type.go | 8 ++++---- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go index eec9d378c297..78d8e11852b3 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go @@ -813,7 +813,7 @@ func (s) TestResolverError(t *testing.T) { // Grab the wrapped connection from the listener wrapper. This will be used // to verify the connection is closed. - ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout*100000) + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() val, err := lis.NewConnCh.Receive(ctx) if err != nil { diff --git a/xds/internal/xdsclient/metrics_test.go b/xds/internal/xdsclient/metrics_test.go index 027029b20b39..369f7216411e 100644 --- a/xds/internal/xdsclient/metrics_test.go +++ b/xds/internal/xdsclient/metrics_test.go @@ -75,9 +75,9 @@ func (s) TestResourceUpdateMetrics(t *testing.T) { bootstrapContents, err := bootstrap.NewContentsForTesting(bootstrap.ConfigOptionsForTesting{ Servers: []byte(fmt.Sprintf(`[{ - "server_uri": %q, - "channel_creds": [{"type": "insecure"}] - }]`, mgmtServer.Address)), + "server_uri": %q, + "channel_creds": [{"type": "insecure"}] + }]`, mgmtServer.Address)), Node: []byte(fmt.Sprintf(`{"id": "%s"}`, nodeID)), Authorities: map[string]json.RawMessage{ "authority": []byte("{}"), diff --git a/xds/internal/xdsclient/tests/cds_watchers_test.go b/xds/internal/xdsclient/tests/cds_watchers_test.go index 02d50679e72a..82d7a06b91cf 100644 --- a/xds/internal/xdsclient/tests/cds_watchers_test.go +++ b/xds/internal/xdsclient/tests/cds_watchers_test.go @@ -75,7 +75,7 @@ func (cw *clusterWatcher) ResourceChanged(update *xdsresource.ClusterResourceDat func (cw *clusterWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in ResourceError() simplifies tests which will have + // here and in AmbientError() simplifies tests which will have // access to the most recently received error. cw.updateCh.Replace(clusterUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/eds_watchers_test.go b/xds/internal/xdsclient/tests/eds_watchers_test.go index 063460f726ce..fcb7de4c4a65 100644 --- a/xds/internal/xdsclient/tests/eds_watchers_test.go +++ b/xds/internal/xdsclient/tests/eds_watchers_test.go @@ -84,7 +84,7 @@ func (ew *endpointsWatcher) ResourceChanged(update *xdsresource.EndpointsResourc func (ew *endpointsWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in ResourceError() simplifies tests which will have + // here and in AmbientError() simplifies tests which will have // access to the most recently received error. ew.updateCh.Replace(endpointsUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/lds_watchers_test.go b/xds/internal/xdsclient/tests/lds_watchers_test.go index 11fc0e3384df..f75b572a4c22 100644 --- a/xds/internal/xdsclient/tests/lds_watchers_test.go +++ b/xds/internal/xdsclient/tests/lds_watchers_test.go @@ -79,7 +79,7 @@ func (lw *listenerWatcher) ResourceChanged(update *xdsresource.ListenerResourceD func (lw *listenerWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in ResourceError() simplifies tests which will have + // here and in AmbientError() simplifies tests which will have // access to the most recently received error. lw.updateCh.Replace(listenerUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go index 6709fe484f38..18fb091e0ab2 100644 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ b/xds/internal/xdsclient/tests/misc_watchers_test.go @@ -83,7 +83,7 @@ func (rw *testRouteConfigWatcher) ResourceChanged(update *xdsresource.RouteConfi func (rw *testRouteConfigWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in ResourceError() simplifies tests which will have + // here and in AmbientError() simplifies tests which will have // access to the most recently received error. rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/rds_watchers_test.go b/xds/internal/xdsclient/tests/rds_watchers_test.go index 30ff4da85720..2086c018855d 100644 --- a/xds/internal/xdsclient/tests/rds_watchers_test.go +++ b/xds/internal/xdsclient/tests/rds_watchers_test.go @@ -74,7 +74,7 @@ func (rw *routeConfigWatcher) ResourceChanged(update *xdsresource.RouteConfigRes func (rw *routeConfigWatcher) ResourceError(err error, onDone func()) { // When used with a go-control-plane management server that continuously // resends resources which are NACKed by the xDS client, using a `Replace()` - // here and in ResourceError() simplifies tests which will have + // here and in AmbientError() simplifies tests which will have // access to the most recently received error. rw.updateCh.Replace(routeConfigUpdateErrTuple{err: err}) onDone() diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index 5ff9819b1e1c..284de9918c5a 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -161,7 +161,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ListenerResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ListenerResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -177,7 +177,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ListenerResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ListenerResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", @@ -194,7 +194,7 @@ func (s) TestHandleListenerResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3routepb.RouteConfiguration{})}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ListenerResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ListenerResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.listener.v3.Listener", diff --git a/xds/internal/xdsclient/xdsresource/resource_type.go b/xds/internal/xdsclient/xdsresource/resource_type.go index 52da29a2f6ef..c22c5a6a3a84 100644 --- a/xds/internal/xdsclient/xdsresource/resource_type.go +++ b/xds/internal/xdsclient/xdsresource/resource_type.go @@ -57,10 +57,10 @@ type Producer interface { // // All methods contain a done parameter which should be called when processing // of the update has completed. For example, if processing a resource requires -// watching new resources, those watches should be completed before done is -// called, which can happen after the ResourceWatcher method has returned. -// Failure to call done will prevent the xDS client from providing future -// ResourceWatcher notifications. +// watching new resources, registration of those new watchers should be +// completed before done is called, which can happen after the ResourceWatcher +// method has returned. Failure to call done will prevent the xDS client from +// providing future ResourceWatcher notifications. type ResourceWatcher interface { // ResourceChanged indicates a new version of the resource is available. ResourceChanged(resourceData ResourceData, done func()) From e29cc822de2e1448596f46b40b3176026abab691 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Fri, 4 Apr 2025 22:06:03 +0530 Subject: [PATCH 13/16] for nack and channel failure, send resource error if not cached --- xds/internal/resolver/xds_resolver_test.go | 59 +++++++--- xds/internal/server/listener_wrapper.go | 5 - xds/internal/xdsclient/authority.go | 6 +- .../tests/ads_stream_flow_control_test.go | 11 +- xds/server_ext_test.go | 3 +- xds/server_security_ext_test.go | 107 +++++++++++------- xds/server_test.go | 27 +++-- 7 files changed, 146 insertions(+), 72 deletions(-) diff --git a/xds/internal/resolver/xds_resolver_test.go b/xds/internal/resolver/xds_resolver_test.go index 49ddf8140008..1389dd4fa30b 100644 --- a/xds/internal/resolver/xds_resolver_test.go +++ b/xds/internal/resolver/xds_resolver_test.go @@ -288,13 +288,10 @@ func (s) TestResolverCloseClosesXDSClient(t *testing.T) { } } -// Tests the case where a resource returned by the management server is NACKed -// by the xDS client, which then returns an update containing an error to the -// resolver. Verifies that the update is propagated to the ClientConn by the -// resolver. It also tests the cases where the resolver gets a good update -// subsequently, and another error after the good update. The test also verifies -// that these are propagated to the ClientConn. -func (s) TestResolverBadServiceUpdate(t *testing.T) { +// Tests the case where a resource, not present in cache, returned by the +// management server is NACKed by the xDS client, which then returns an update +// containing a resource error to the resolver. +func (s) TestResolverBadServiceUpdate_NACKedWithoutCache(t *testing.T) { // Spin up an xDS management server for the test. ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() @@ -319,12 +316,29 @@ func (s) TestResolverBadServiceUpdate(t *testing.T) { } configureResourcesOnManagementServer(ctx, t, mgmtServer, nodeID, []*v3listenerpb.Listener{lis}, nil) - // Build the resolver and expect an error update from it. - stateCh, errCh, _ := buildResolverForTarget(t, resolver.Target{URL: *testutils.MustParseURL("xds:///" + defaultTestServiceName)}, bc) - wantErr := "no RouteSpecifier" - if err := waitForErrorFromResolver(ctx, errCh, wantErr, nodeID); err != nil { + // Build the resolver and expect an error update from it. Since the + // resource is not cached, it should be received as resource error. + _, errCh, _ := buildResolverForTarget(t, resolver.Target{URL: *testutils.MustParseURL("xds:///" + defaultTestServiceName)}, bc) + if err := waitForErrorFromResolver(ctx, errCh, "no valid clusters", nodeID); err != nil { t.Fatal(err) } +} + +// Tests the case where a resource, present in cache, returned by the +// management server is NACKed by the xDS client, which then returns +// an update containing an ambient error to the resolver. Verifies that the +// update is propagated to the ClientConn by the resolver. It tests the +// case where the resolver gets a good update first, and an error +// after the good update. The test also verifies that these are propagated to +// the ClientConn. +func (s) TestResolverBadServiceUpdate_NACKedWithCache(t *testing.T) { + // Spin up an xDS management server for the test. + ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) + defer cancel() + nodeID := uuid.New().String() + mgmtServer, _, _, bc := setupManagementServerForTest(t, nodeID) + + stateCh, errCh, _ := buildResolverForTarget(t, resolver.Target{URL: *testutils.MustParseURL("xds:///" + defaultTestServiceName)}, bc) // Configure good listener and route configuration resources on the // management server. @@ -335,10 +349,27 @@ func (s) TestResolverBadServiceUpdate(t *testing.T) { // Expect a good update from the resolver. verifyUpdateFromResolver(ctx, t, stateCh, wantDefaultServiceConfig) - // Configure another bad resource on the management server and expect an - // error update from the resolver. + // Configure a listener resource that is expected to be NACKed because it + // does not contain the `RouteSpecifier` field in the HTTPConnectionManager. + hcm := testutils.MarshalAny(t, &v3httppb.HttpConnectionManager{ + HttpFilters: []*v3httppb.HttpFilter{e2e.HTTPFilter("router", &v3routerpb.Router{})}, + }) + lis := &v3listenerpb.Listener{ + Name: defaultTestServiceName, + ApiListener: &v3listenerpb.ApiListener{ApiListener: hcm}, + FilterChains: []*v3listenerpb.FilterChain{{ + Name: "filter-chain-name", + Filters: []*v3listenerpb.Filter{{ + Name: wellknown.HTTPConnectionManager, + ConfigType: &v3listenerpb.Filter_TypedConfig{TypedConfig: hcm}, + }}, + }}, + } + + // Expect an error update from the resolver. Since the resource is cached, + // it should be received as an ambient error. configureResourcesOnManagementServer(ctx, t, mgmtServer, nodeID, []*v3listenerpb.Listener{lis}, nil) - if err := waitForErrorFromResolver(ctx, errCh, wantErr, nodeID); err != nil { + if err := waitForErrorFromResolver(ctx, errCh, "no RouteSpecifier", nodeID); err != nil { t.Fatal(err) } } diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index b1fb7bfc2837..bf1eaacf3fd2 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -450,11 +450,6 @@ func (lw *ldsWatcher) ResourceError(err error, onDone func()) { if lw.logger.V(2) { lw.logger.Infof("LDS watch for resource %q reported resource error: %v", lw.name, err) } - if xdsresource.ErrType(err) != xdsresource.ErrorTypeResourceNotFound { - // For errors which are anything other than "resource-not-found", we - // continue to use the old configuration. - return - } lw.parent.onLDSResourceError(err) } diff --git a/xds/internal/xdsclient/authority.go b/xds/internal/xdsclient/authority.go index 868c0dc6311b..ec3a7352f9b9 100644 --- a/xds/internal/xdsclient/authority.go +++ b/xds/internal/xdsclient/authority.go @@ -375,7 +375,11 @@ func (a *authority) handleADSResourceUpdate(serverConfig *bootstrap.ServerConfig watcher := watcher err := uErr.Err watcherCnt.Add(1) - funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) }) + if state.cache == nil { + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.ResourceError(err, done) }) + } else { + funcsToSchedule = append(funcsToSchedule, func(context.Context) { watcher.AmbientError(err, done) }) + } } continue } diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go index 33b4011c21f0..7b5817b28107 100644 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go @@ -438,9 +438,10 @@ func (s) TestADSFlowControl_ResourceUpdates_MultipleResources(t *testing.T) { } // Test ADS stream flow control with a single resource that is expected to be -// NACKed by the xDS client and the watcher's OnError() callback is expected to -// be invoked. Verifies that no further reads are attempted until the error is -// completely processed by the watcher. +// NACKed by the xDS client and the watcher's ResourceError() callback is +// expected to be invoked because resource is not cached. Verifies that no +// further reads are attempted until the error is completely processed by the +// watcher. func (s) TestADSFlowControl_ResourceErrors(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() @@ -490,9 +491,9 @@ func (s) TestADSFlowControl_ResourceErrors(t *testing.T) { t.Fatalf("Timed out waiting for ADS stream to be read from") } - // Wait for the error to reach the watcher. + // Wait for the resource error to reach the watcher. select { - case <-watcher.errorCh: + case <-watcher.notFoundCh: case <-ctx.Done(): t.Fatalf("Timed out waiting for error to reach watcher") } diff --git a/xds/server_ext_test.go b/xds/server_ext_test.go index 80f33a0bc9a7..756466b84203 100644 --- a/xds/server_ext_test.go +++ b/xds/server_ext_test.go @@ -169,7 +169,8 @@ func waitForFailedRPCWithStatus(ctx context.Context, t *testing.T, cc *grpc.Clie t.Fatalf("RPCs failed with most recent error: %v. Want status code %v, error: %s, node id: %s", err, wantCode, wantErr, wantNodeID) case <-time.After(defaultTestShortTimeout): _, err = client.EmptyCall(ctx, &testpb.Empty{}) - if gotCode := status.Code(err); gotCode != wantCode { + gotCode := status.Code(err) + if gotCode != wantCode { continue } if gotErr := err.Error(); !strings.Contains(gotErr, wantErr) { diff --git a/xds/server_security_ext_test.go b/xds/server_security_ext_test.go index 28685b0227b9..47bb4155f9be 100644 --- a/xds/server_security_ext_test.go +++ b/xds/server_security_ext_test.go @@ -23,12 +23,12 @@ import ( "fmt" "net" "strconv" + "strings" "testing" - "time" "github.com/google/uuid" "google.golang.org/grpc" - "google.golang.org/grpc/codes" + "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" xdscreds "google.golang.org/grpc/credentials/xds" "google.golang.org/grpc/internal/testutils" @@ -128,7 +128,8 @@ func (s) TestServer_Security_NoCertProvidersInBootstrap_Success(t *testing.T) { // client is expected to NACK this resource because the certificate provider // instance name specified in the Listener resource will not be present in the // bootstrap file. The test verifies that server creation does not fail and that -// the xDS-enabled gRPC server does not enter "serving" mode. +// the xDS-enabled gRPC server does not enter "serving" mode but serving mode +// changes to "not serving" because the listener resource is not cached. func (s) TestServer_Security_NoCertificateProvidersInBootstrap_Failure(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() @@ -214,25 +215,38 @@ func (s) TestServer_Security_NoCertificateProvidersInBootstrap_Failure(t *testin t.Fatal("Timeout when waiting for an NACK from the xDS client for the LDS response") } - // Wait a short duration and ensure that the server does not enter "serving" - // mode. - sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) - defer sCancel() - select { - case <-sCtx.Done(): - case <-modeChangeHandler.modeCh: - t.Fatal("Server started serving RPCs before the route config was received") - } - - // Create a client that uses insecure creds and verify that RPCs don't - // succeed. - cc, err := grpc.NewClient(lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - t.Fatalf("Failed to dial local test server: %v", err) + // Since the listener resource from the management server is invalid and is + // not cached, it should cause the server to go to NOT_SERVING and the + // error message should contain the xDS node ID. + // + // Even though the server is currently NOT_SERVING, in the case (where we + // are NOT_SERVING and the new mode is also NOT_SERVING), the update is not + // suppressed as: + // 1. the error may have change + // 2. it provides a timestamp of the last backoff attempt + // The loop ensures that we drain all the mode change notifications but + // at the same time verify that the server is NOT_SERVING and the error + // message contains the xDS node ID. + exit := false + for { + select { + case <-ctx.Done(): + t.Fatalf("Timed out waiting for server to go NOT_SERVING") + case gotMode := <-modeChangeHandler.modeCh: + if gotMode != connectivity.ServingModeNotServing { + t.Fatalf("Mode changed to %v, want %v", gotMode, connectivity.ServingModeNotServing) + } + gotErr := <-modeChangeHandler.errCh + if gotErr == nil || !strings.Contains(gotErr.Error(), nodeID) { + t.Fatalf("Unexpected error: %v, want xDS Node id: %s", gotErr, nodeID) + } + default: + exit = true + } + if exit { + break + } } - defer cc.Close() - - waitForFailedRPCWithStatus(ctx, t, cc, codes.Unavailable, "", "") } // Tests the case where the bootstrap configuration contains one certificate @@ -245,7 +259,8 @@ func (s) TestServer_Security_NoCertificateProvidersInBootstrap_Failure(t *testin // certificate provider instance // // The test verifies that an RPC to the first listener succeeds, while the -// second listener never moves to "serving" mode and RPCs to it fail. +// second listener never moves to "serving" mode and RPCs but serving mode +// changes to "not serving" because the listener resource is not cached. func (s) TestServer_Security_WithValidAndInvalidSecurityConfiguration(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() @@ -448,21 +463,37 @@ func (s) TestServer_Security_WithValidAndInvalidSecurityConfiguration(t *testing t.Fatal("Timeout when waiting for an NACK from the xDS client for the LDS response") } - // Wait a short duration and ensure that the server does not enter "serving" - // mode. - select { - case <-time.After(2 * defaultTestShortTimeout): - case <-modeChangeHandler2.modeCh: - t.Fatal("Server changed to serving mode when not expected to") - } - - // Create a client that uses insecure creds and verify that RPCs don't - // succeed to listener2. - cc2, err := grpc.NewClient(lis2.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - t.Fatalf("Failed to dial local test server: %v", err) + // Since the listener resource from the management server is invalid and is + // not cached, it should cause the server to go to NOT_SERVING and the + // error message should contain the xDS node ID. + // + // Even though the server is currently NOT_SERVING, in the case (where we + // are NOT_SERVING and the new mode is also NOT_SERVING), the update is not + // suppressed as: + // 1. the error may have change + // 2. it provides a timestamp of the last backoff attempt + // + // The loop ensures that we drain all the mode change notifications but + // at the same time verify that the server is NOT_SERVING and the error + // message contains the xDS node ID. + exit := false + for { + select { + case <-ctx.Done(): + t.Fatalf("Timed out waiting for server to go NOT_SERVING") + case gotMode := <-modeChangeHandler2.modeCh: + if gotMode != connectivity.ServingModeNotServing { + t.Fatalf("Mode changed to %v, want %v", gotMode, connectivity.ServingModeNotServing) + } + gotErr := <-modeChangeHandler2.errCh + if gotErr == nil || !strings.Contains(gotErr.Error(), nodeID) { + t.Fatalf("Unexpected error: %v, want xDS Node id: %s", gotErr, nodeID) + } + default: + exit = true + } + if exit { + break + } } - defer cc2.Close() - - waitForFailedRPCWithStatus(ctx, t, cc2, codes.Unavailable, "", "") } diff --git a/xds/server_test.go b/xds/server_test.go index 51579333a21a..191231d91920 100644 --- a/xds/server_test.go +++ b/xds/server_test.go @@ -655,15 +655,13 @@ func (s) TestHandleListenerUpdate_ErrorUpdate(t *testing.T) { t.Fatal(err) } - // Also make sure that no serving mode updates are received. The serving - // mode does not change until the server comes to the conclusion that the - // requested resource is not present in the management server. This happens - // when the watch timer expires or when the resource is explicitly deleted - // by the management server. + // Also make sure that serving mode updates are received. The serving + // mode changes to NOT_SERVING. This happens because watcher received an + // invalid resource from the server which is not present in cache. sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) defer sCancel() - if _, err := modeChangeCh.Receive(sCtx); err != context.DeadlineExceeded { - t.Fatal("Serving mode changed received when none expected") + if _, err := modeChangeCh.Receive(sCtx); err == context.DeadlineExceeded { + t.Fatal("Serving mode did not change when expected to change") } } @@ -697,10 +695,23 @@ func (s) TestServeAndCloseDoNotRace(t *testing.T) { // Generate bootstrap contents up front for all servers. bootstrapContents := generateBootstrapContents(t, uuid.NewString(), nonExistentManagementServer) + // Override the default ServingModeCallback with a noop function because the + // invalid listener resource will be immediately NACKed by the xDS client + // and since the listener resource is not cached, it will trigger multiple + // resource error notifications for the same listener resource in quick + // successions, leading to service mode change to "not serving" each time. + // + // Even if the the server is currently NOT_SERVING, in the case (where we + // are NOT_SERVING and the new mode is also NOT_SERVING), the update is not + // suppressed as: + // 1. the error may have change + // 2. it provides a timestamp of the last backoff attempt + noopModeChangeCallback := func(_ net.Addr, _ ServingModeChangeArgs) {} + wg := sync.WaitGroup{} wg.Add(200) for i := 0; i < 100; i++ { - server, err := NewGRPCServer(BootstrapContentsForTesting(bootstrapContents)) + server, err := NewGRPCServer(BootstrapContentsForTesting(bootstrapContents), ServingModeCallback(noopModeChangeCallback)) if err != nil { t.Fatalf("Failed to create an xDS enabled gRPC server: %v", err) } From 9b0f57ecef85e2171462c72e4f38c981c8259b41 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Tue, 8 Apr 2025 23:38:11 +0530 Subject: [PATCH 14/16] easwars review 3 --- .../balancer/cdsbalancer/cdsbalancer.go | 46 ++++---- .../clusterresolver/resource_resolver_eds.go | 24 +--- xds/internal/resolver/serviceconfig.go | 4 +- xds/internal/resolver/xds_resolver.go | 16 ++- xds/internal/resolver/xds_resolver_test.go | 30 +++-- xds/internal/server/listener_wrapper.go | 83 ++++++-------- xds/internal/server/rds_handler.go | 36 +++--- xds/internal/testutils/resource_watcher.go | 26 ++--- xds/internal/xdsclient/clientimpl_watchers.go | 2 +- .../tests/ads_stream_flow_control_test.go | 16 +-- .../xdsclient/tests/authority_test.go | 20 ++-- .../xdsclient/tests/misc_watchers_test.go | 12 +- .../xdsclient/tests/resource_update_test.go | 18 +-- xds/server_ext_test.go | 22 +++- xds/server_security_ext_test.go | 108 +++++++----------- xds/server_test.go | 12 +- 16 files changed, 228 insertions(+), 247 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 0aeb09da2daf..076f823db0e4 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -357,7 +357,11 @@ func (b *cdsBalancer) ResolverError(err error) { if b.lbCfg != nil { root = b.lbCfg.ClusterName } - b.onClusterAmbientError(root, err) + if b.childLB != nil { + b.onClusterAmbientError(root, err) + return + } + b.onClusterResourceError(root, err) }) } @@ -474,7 +478,7 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd // If the security config is invalid, for example, if the provider // instance is not found in the bootstrap config, we need to put the // channel in transient failure. - b.onClusterAmbientError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) + b.onClusterResourceError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) return } } @@ -482,12 +486,20 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd clustersSeen := make(map[string]bool) dms, ok, err := b.generateDMsForCluster(b.lbCfg.ClusterName, 0, nil, clustersSeen) if err != nil { - b.onClusterAmbientError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err))) + if b.childLB != nil { + b.onClusterAmbientError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err))) + } else { + b.onClusterResourceError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err))) + } return } if ok { if len(dms) == 0 { - b.onClusterAmbientError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters"))) + if b.childLB != nil { + b.onClusterAmbientError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters"))) + } else { + b.onClusterResourceError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters"))) + } return } // Child policy is built the first time we resolve the cluster graph. @@ -542,33 +554,23 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd } } -// Handles an error Cluster update from the xDS client to not stop using the -// previously seen resource. Propagates the error down to the child policy -// if one exists, or puts the channel in TRANSIENT_FAILURE. +// Handles an ambient error Cluster update from the xDS client to not stop +// using the previously seen resource. // // Only executed in the context of a serializer callback. func (b *cdsBalancer) onClusterAmbientError(name string, err error) { - b.logger.Warningf("Cluster resource %q received error update: %v", name, err) + b.logger.Warningf("Cluster resource %q received ambient error update: %v", name, err) - if b.childLB != nil { - if xdsresource.ErrType(err) != xdsresource.ErrorTypeConnection { - // Connection errors will be sent to the child balancers directly. - // There's no need to forward them. - b.childLB.ResolverError(err) - } - } else { - // If child balancer was never created, fail the RPCs with - // errors. - b.ccw.UpdateState(balancer.State{ - ConnectivityState: connectivity.TransientFailure, - Picker: base.NewErrPicker(fmt.Errorf("%q: %v", name, err)), - }) + if xdsresource.ErrType(err) != xdsresource.ErrorTypeConnection && b.childLB != nil { + // Connection errors will be sent to the child balancers directly. + // There's no need to forward them. + b.childLB.ResolverError(err) } } // Handles an error Cluster update from the xDS client to stop using the // previously seen resource. Propagates the error down to the child policy -// if one exists, or puts the channel in TRANSIENT_FAILURE. +// if one exists, and puts the channel in TRANSIENT_FAILURE. // // Only executed in the context of a serializer callback. func (b *cdsBalancer) onClusterResourceError(name string, err error) { diff --git a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go index eae98b2c5b53..043def95079d 100644 --- a/xds/internal/balancer/clusterresolver/resource_resolver_eds.go +++ b/xds/internal/balancer/clusterresolver/resource_resolver_eds.go @@ -95,7 +95,9 @@ func (er *edsDiscoveryMechanism) ResourceError(err error, onDone func()) { return } - er.logger.Warningf("EDS discovery mechanism for resource %q reported resource error: %v", er.nameToWatch, err) + if er.logger.V(2) { + er.logger.Infof("EDS discovery mechanism for resource %q reported resource error: %v", er.nameToWatch, err) + } // Report an empty update that would result in no priority child being // created for this discovery mechanism. This would result in the priority @@ -119,24 +121,4 @@ func (er *edsDiscoveryMechanism) AmbientError(err error, onDone func()) { if er.logger.V(2) { er.logger.Infof("EDS discovery mechanism for resource %q reported ambient error: %v", er.nameToWatch, err) } - - er.mu.Lock() - if er.update != nil { - // Continue using a previously received good configuration if one - // exists. - er.mu.Unlock() - onDone() - return - } - - // Else report an empty update that would result in no priority child being - // created for this discovery mechanism. This would result in the priority - // LB policy reporting TRANSIENT_FAILURE (as there would be no priorities or - // localities) if this was the only discovery mechanism, or would result in - // the priority LB policy using a lower priority discovery mechanism when - // that becomes available. - er.update = &xdsresource.EndpointsUpdate{} - er.mu.Unlock() - - er.topLevelResolver.onUpdate(onDone) } diff --git a/xds/internal/resolver/serviceconfig.go b/xds/internal/resolver/serviceconfig.go index 02e6a73eccc7..7913f8205159 100644 --- a/xds/internal/resolver/serviceconfig.go +++ b/xds/internal/resolver/serviceconfig.go @@ -141,8 +141,8 @@ type erroringConfigSelector struct { err error } -func newErroringConfigSelector(xdsNodeID string) *erroringConfigSelector { - return &erroringConfigSelector{err: annotateErrorWithNodeID(status.Errorf(codes.Unavailable, "no valid clusters"), xdsNodeID)} +func newErroringConfigSelector(err error, xdsNodeID string) *erroringConfigSelector { + return &erroringConfigSelector{err: annotateErrorWithNodeID(status.Errorf(codes.Unavailable, err.Error()), xdsNodeID)} } func (cs *erroringConfigSelector) SelectConfig(iresolver.RPCInfo) (*iresolver.RPCConfig, error) { diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index d30224a7aa53..f51c7cf19a2c 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -482,7 +482,7 @@ func (r *xdsResolver) onError(err error) { // are removed. // // Only executed in the context of a serializer callback. -func (r *xdsResolver) onResourceNotFound() { +func (r *xdsResolver) onResourceError(err error) { // We cannot remove clusters from the service config that have ongoing RPCs. // Instead, what we can do is to send an erroring config selector // along with normal service config. This will ensure that new RPCs will @@ -491,7 +491,7 @@ func (r *xdsResolver) onResourceNotFound() { // service config with no addresses. This results in the pick-first // LB policy being configured on the channel, and since there are no // address, pick-first will put the channel in TRANSIENT_FAILURE. - cs := newErroringConfigSelector(r.xdsClient.BootstrapConfig().Node().GetId()) + cs := newErroringConfigSelector(err, r.xdsClient.BootstrapConfig().Node().GetId()) r.sendNewServiceConfig(cs) // Stop and dereference the active config selector, if one exists. @@ -555,7 +555,9 @@ func (r *xdsResolver) onListenerResourceAmbientError(err error) { // Only executed in the context of a serializer callback. func (r *xdsResolver) onListenerResourceError(err error) { - r.logger.Warningf("Received resource error for Listener resource %q: %v", r.ldsResourceName, err) + if r.logger.V(2) { + r.logger.Infof("Received resource error for Listener resource %q: %v", r.ldsResourceName, err) + } r.listenerUpdateRecvd = false if r.routeConfigWatcher != nil { @@ -566,7 +568,7 @@ func (r *xdsResolver) onListenerResourceError(err error) { r.routeConfigUpdateRecvd = false r.routeConfigWatcher = nil - r.onResourceNotFound() + r.onResourceError(err) } // Only executed in the context of a serializer callback. @@ -593,12 +595,14 @@ func (r *xdsResolver) onRouteConfigResourceAmbientError(name string, err error) // Only executed in the context of a serializer callback. func (r *xdsResolver) onRouteConfigResourceError(name string, err error) { - r.logger.Warningf("Received resource error for RouteConfiguration resource %q: %v", name, err) + if r.logger.V(2) { + r.logger.Infof("Received resource error for RouteConfiguration resource %q: %v", name, err) + } if r.rdsResourceName != name { return } - r.onResourceNotFound() + r.onResourceError(err) } // Only executed in the context of a serializer callback. diff --git a/xds/internal/resolver/xds_resolver_test.go b/xds/internal/resolver/xds_resolver_test.go index 1389dd4fa30b..79bd4e01ba41 100644 --- a/xds/internal/resolver/xds_resolver_test.go +++ b/xds/internal/resolver/xds_resolver_test.go @@ -290,7 +290,9 @@ func (s) TestResolverCloseClosesXDSClient(t *testing.T) { // Tests the case where a resource, not present in cache, returned by the // management server is NACKed by the xDS client, which then returns an update -// containing a resource error to the resolver. +// containing a resource error to the resolver. It tests the case where the +// resolver gets an error update without any previous good update. The test +// also verifies that these are propagated to the ClientConn. func (s) TestResolverBadServiceUpdate_NACKedWithoutCache(t *testing.T) { // Spin up an xDS management server for the test. ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) @@ -319,7 +321,7 @@ func (s) TestResolverBadServiceUpdate_NACKedWithoutCache(t *testing.T) { // Build the resolver and expect an error update from it. Since the // resource is not cached, it should be received as resource error. _, errCh, _ := buildResolverForTarget(t, resolver.Target{URL: *testutils.MustParseURL("xds:///" + defaultTestServiceName)}, bc) - if err := waitForErrorFromResolver(ctx, errCh, "no valid clusters", nodeID); err != nil { + if err := waitForErrorFromResolver(ctx, errCh, "no RouteSpecifier", nodeID); err != nil { t.Fatal(err) } } @@ -330,7 +332,8 @@ func (s) TestResolverBadServiceUpdate_NACKedWithoutCache(t *testing.T) { // update is propagated to the ClientConn by the resolver. It tests the // case where the resolver gets a good update first, and an error // after the good update. The test also verifies that these are propagated to -// the ClientConn. +// the ClientConn and that RPC succeeds as expected after receiving good update +// as well as ambient error. func (s) TestResolverBadServiceUpdate_NACKedWithCache(t *testing.T) { // Spin up an xDS management server for the test. ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) @@ -347,7 +350,13 @@ func (s) TestResolverBadServiceUpdate_NACKedWithCache(t *testing.T) { configureResourcesOnManagementServer(ctx, t, mgmtServer, nodeID, listeners, routes) // Expect a good update from the resolver. - verifyUpdateFromResolver(ctx, t, stateCh, wantDefaultServiceConfig) + cs := verifyUpdateFromResolver(ctx, t, stateCh, wantDefaultServiceConfig) + + // "Make an RPC" by invoking the config selector. + _, err := cs.SelectConfig(iresolver.RPCInfo{Context: ctx, Method: "/service/method"}) + if err != nil { + t.Fatalf("cs.SelectConfig(): %v", err) + } // Configure a listener resource that is expected to be NACKed because it // does not contain the `RouteSpecifier` field in the HTTPConnectionManager. @@ -372,6 +381,13 @@ func (s) TestResolverBadServiceUpdate_NACKedWithCache(t *testing.T) { if err := waitForErrorFromResolver(ctx, errCh, "no RouteSpecifier", nodeID); err != nil { t.Fatal(err) } + + // "Make an RPC" by invoking the config selector which should succeed by + // continuing to use the previously cached resource. + _, err = cs.SelectConfig(iresolver.RPCInfo{Context: ctx, Method: "/service/method"}) + if err != nil { + t.Fatalf("cs.SelectConfig(): %v", err) + } } // TestResolverGoodServiceUpdate tests the case where the resource returned by @@ -571,7 +587,7 @@ func (s) TestResolverRemovedWithRPCs(t *testing.T) { // return an erroring config selector which will fail new RPCs. cs = verifyUpdateFromResolver(ctx, t, stateCh, wantDefaultServiceConfig) _, err = cs.SelectConfig(iresolver.RPCInfo{Context: ctx, Method: "/service/method"}) - if err := verifyResolverError(err, codes.Unavailable, "no valid clusters", nodeID); err != nil { + if err := verifyResolverError(err, codes.Unavailable, "has been removed", nodeID); err != nil { t.Fatal(err) } @@ -678,7 +694,7 @@ func (s) TestResolverRemovedResource(t *testing.T) { // "Make another RPC" by invoking the config selector. _, err = cs.SelectConfig(iresolver.RPCInfo{Context: ctx, Method: "/service/method"}) - if err := verifyResolverError(err, codes.Unavailable, "no valid clusters", nodeID); err != nil { + if err := verifyResolverError(err, codes.Unavailable, "has been removed", nodeID); err != nil { t.Fatal(err) } @@ -702,7 +718,7 @@ func (s) TestResolverRemovedResource(t *testing.T) { case <-ctx.Done(): t.Fatalf("Timeout waiting for an error from the resolver: %v", ctx.Err()) case err := <-errCh: - if err := verifyResolverError(err, codes.Unavailable, "no valid clusters", nodeID); err != nil { + if err := verifyResolverError(err, codes.Unavailable, "has been removed", nodeID); err != nil { t.Fatal(err) } } diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index bf1eaacf3fd2..58682517b13c 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -159,36 +159,6 @@ type listenerWrapper struct { rdsHandler *rdsHandler } -func (l *listenerWrapper) handleLDSUpdate(update xdsresource.ListenerUpdate) { - ilc := update.InboundListenerCfg - // Make sure that the socket address on the received Listener resource - // matches the address of the net.Listener passed to us by the user. This - // check is done here instead of at the XDSClient layer because of the - // following couple of reasons: - // - XDSClient cannot know the listening address of every listener in the - // system, and hence cannot perform this check. - // - this is a very context-dependent check and only the server has the - // appropriate context to perform this check. - // - // What this means is that the XDSClient has ACKed a resource which can push - // the server into a "not serving" mode. This is not ideal, but this is - // what we have decided to do. - if ilc.Address != l.addr || ilc.Port != l.port { - l.mu.Lock() - err := l.annotateErrorWithNodeID(fmt.Errorf("address (%s:%s) in Listener update does not match listening address: (%s:%s)", ilc.Address, ilc.Port, l.addr, l.port)) - l.switchModeLocked(connectivity.ServingModeNotServing, err) - l.mu.Unlock() - return - } - - l.pendingFilterChainManager = ilc.FilterChains - l.rdsHandler.updateRouteNamesToWatch(ilc.FilterChains.RouteConfigNames) - - if l.rdsHandler.determineRouteConfigurationReady() { - l.maybeUpdateFilterChains() - } -} - // maybeUpdateFilterChains swaps in the pending filter chain manager to the // active one if the pending filter chain manager is present. If a swap occurs, // it also drains (gracefully stops) any connections that were accepted on the @@ -408,19 +378,6 @@ func (l *listenerWrapper) switchModeLocked(newMode connectivity.ServingMode, err } } -func (l *listenerWrapper) annotateErrorWithNodeID(err error) error { - return fmt.Errorf("[xDS node id: %v]: %w", l.xdsNodeID, err) -} - -func (l *listenerWrapper) onLDSResourceError(err error) { - l.mu.Lock() - defer l.mu.Unlock() - l.switchModeLocked(connectivity.ServingModeNotServing, err) - l.activeFilterChainManager = nil - l.pendingFilterChainManager = nil - l.rdsHandler.updateRouteNamesToWatch(make(map[string]bool)) -} - // ldsWatcher implements the xdsresource.ListenerWatcher interface and is // passed to the WatchListener API. type ldsWatcher struct { @@ -438,7 +395,34 @@ func (lw *ldsWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, if lw.logger.V(2) { lw.logger.Infof("LDS watch for resource %q received update: %#v", lw.name, update.Resource) } - lw.parent.handleLDSUpdate(update.Resource) + l := lw.parent + ilc := update.Resource.InboundListenerCfg + // Make sure that the socket address on the received Listener resource + // matches the address of the net.Listener passed to us by the user. This + // check is done here instead of at the XDSClient layer because of the + // following couple of reasons: + // - XDSClient cannot know the listening address of every listener in the + // system, and hence cannot perform this check. + // - this is a very context-dependent check and only the server has the + // appropriate context to perform this check. + // + // What this means is that the XDSClient has ACKed a resource which can push + // the server into a "not serving" mode. This is not ideal, but this is + // what we have decided to do. + if ilc.Address != l.addr || ilc.Port != l.port { + l.mu.Lock() + err := fmt.Errorf("[xDS node id: %v]: %w", l.xdsNodeID, fmt.Errorf("address (%s:%s) in Listener update does not match listening address: (%s:%s)", ilc.Address, ilc.Port, l.addr, l.port)) + l.switchModeLocked(connectivity.ServingModeNotServing, err) + l.mu.Unlock() + return + } + + l.pendingFilterChainManager = ilc.FilterChains + l.rdsHandler.updateRouteNamesToWatch(ilc.FilterChains.RouteConfigNames) + + if l.rdsHandler.determineRouteConfigurationReady() { + l.maybeUpdateFilterChains() + } } func (lw *ldsWatcher) ResourceError(err error, onDone func()) { @@ -450,7 +434,14 @@ func (lw *ldsWatcher) ResourceError(err error, onDone func()) { if lw.logger.V(2) { lw.logger.Infof("LDS watch for resource %q reported resource error: %v", lw.name, err) } - lw.parent.onLDSResourceError(err) + + l := lw.parent + l.mu.Lock() + defer l.mu.Unlock() + l.switchModeLocked(connectivity.ServingModeNotServing, err) + l.activeFilterChainManager = nil + l.pendingFilterChainManager = nil + l.rdsHandler.updateRouteNamesToWatch(make(map[string]bool)) } func (lw *ldsWatcher) AmbientError(err error, onDone func()) { @@ -462,6 +453,4 @@ func (lw *ldsWatcher) AmbientError(err error, onDone func()) { if lw.logger.V(2) { lw.logger.Infof("LDS watch for resource %q reported ambient error: %v", lw.name, err) } - // For errors which are anything other than "resource-not-found", we - // continue to use the old configuration. } diff --git a/xds/internal/server/rds_handler.go b/xds/internal/server/rds_handler.go index 579ec2cdca72..d66db507c2b7 100644 --- a/xds/internal/server/rds_handler.go +++ b/xds/internal/server/rds_handler.go @@ -108,21 +108,6 @@ func (rh *rdsHandler) determineRouteConfigurationReady() bool { return len(rh.updates) == len(rh.cancels) } -// Must be called from an xDS Client Callback. -func (rh *rdsHandler) handleRouteUpdate(routeName string, update rdsWatcherUpdate) { - rwu := rh.updates[routeName] - - // Accept the new update if any of the following are true: - // 1. we had no valid update data. - // 2. the update is valid. - // 3. the update error is ResourceNotFound. - if rwu.data == nil || update.err == nil || xdsresource.ErrType(update.err) == xdsresource.ErrorTypeResourceNotFound { - rwu = update - } - rh.updates[routeName] = rwu - rh.callback(routeName, rwu) -} - // close() is meant to be called by wrapped listener when the wrapped listener // is closed, and it cleans up resources by canceling all the active RDS // watches. @@ -161,7 +146,11 @@ func (rw *rdsWatcher) ResourceChanged(update *xdsresource.RouteConfigResourceDat if rw.logger.V(2) { rw.logger.Infof("RDS watch for resource %q received update: %#v", rw.routeName, update.Resource) } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{data: &update.Resource}) + + routeName := rw.routeName + rwu := rdsWatcherUpdate{data: &update.Resource} + rw.parent.updates[routeName] = rwu + rw.parent.callback(routeName, rwu) } func (rw *rdsWatcher) ResourceError(err error, onDone func()) { @@ -172,8 +161,14 @@ func (rw *rdsWatcher) ResourceError(err error, onDone func()) { return } rw.mu.Unlock() - rw.logger.Warningf("RDS watch for resource %q reported resource error", rw.routeName) - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) + if rw.logger.V(2) { + rw.logger.Infof("RDS watch for resource %q reported resource error", rw.routeName) + } + + routeName := rw.routeName + rwu := rdsWatcherUpdate{err: err} + rw.parent.updates[routeName] = rwu + rw.parent.callback(routeName, rwu) } func (rw *rdsWatcher) AmbientError(err error, onDone func()) { @@ -187,5 +182,8 @@ func (rw *rdsWatcher) AmbientError(err error, onDone func()) { if rw.logger.V(2) { rw.logger.Infof("RDS watch for resource %q reported ambient error: %v", rw.routeName, err) } - rw.parent.handleRouteUpdate(rw.routeName, rdsWatcherUpdate{err: err}) + routeName := rw.routeName + rwu := rw.parent.updates[routeName] + rw.parent.updates[routeName] = rwu + rw.parent.callback(routeName, rwu) } diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index b601386917d6..98f4a0e6e137 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -29,10 +29,10 @@ import "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" type TestResourceWatcher struct { // UpdateCh is the channel on which xDS client updates are delivered. UpdateCh chan *xdsresource.ResourceData - // ErrorCh is the channel on which errors from the xDS client are delivered. - ErrorCh chan error - // ResourceDoesNotExistCh is the channel used to indicate calls to ResourceError - ResourceDoesNotExistCh chan struct{} + // AmbientErrorCh is the channel on which errors from the xDS client are delivered. + AmbientErrorCh chan error + // ResourceErrorCh is the channel used to indicate calls to ResourceError. + ResourceErrorCh chan struct{} } // ResourceChanged is invoked by the xDS client to report the latest update. @@ -51,12 +51,12 @@ func (w *TestResourceWatcher) ResourceChanged(data xdsresource.ResourceData, onD func (w *TestResourceWatcher) ResourceError(err error, onDone func()) { defer onDone() select { - case <-w.ResourceDoesNotExistCh: - case <-w.ErrorCh: + case <-w.ResourceErrorCh: + case <-w.AmbientErrorCh: default: } - w.ErrorCh <- err - w.ResourceDoesNotExistCh <- struct{}{} + w.AmbientErrorCh <- err + w.ResourceErrorCh <- struct{}{} } // AmbientError is invoked by the xDS client to report the latest ambient @@ -64,18 +64,18 @@ func (w *TestResourceWatcher) ResourceError(err error, onDone func()) { func (w *TestResourceWatcher) AmbientError(err error, onDone func()) { defer onDone() select { - case <-w.ErrorCh: + case <-w.AmbientErrorCh: default: } - w.ErrorCh <- err + w.AmbientErrorCh <- err } // NewTestResourceWatcher returns a TestResourceWatcher to watch for resources // via the xDS client. func NewTestResourceWatcher() *TestResourceWatcher { return &TestResourceWatcher{ - UpdateCh: make(chan *xdsresource.ResourceData, 1), - ErrorCh: make(chan error, 1), - ResourceDoesNotExistCh: make(chan struct{}, 1), + UpdateCh: make(chan *xdsresource.ResourceData, 1), + AmbientErrorCh: make(chan error, 1), + ResourceErrorCh: make(chan struct{}, 1), } } diff --git a/xds/internal/xdsclient/clientimpl_watchers.go b/xds/internal/xdsclient/clientimpl_watchers.go index 7ed52089426e..2cce17b05a24 100644 --- a/xds/internal/xdsclient/clientimpl_watchers.go +++ b/xds/internal/xdsclient/clientimpl_watchers.go @@ -64,7 +64,7 @@ func (c *clientImpl) WatchResource(rType xdsresource.Type, resourceName string, } if err := c.resourceTypes.maybeRegister(rType); err != nil { - logger.Warningf("Watch registered for name %q of type %q which is already registered", rType.TypeName(), resourceName) + logger.Warningf("Watch registered for type %q, which is already registered", rType.TypeName()) c.serializer.TrySchedule(func(context.Context) { watcher.ResourceError(err, func() {}) }) return func() {} } diff --git a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go index 7b5817b28107..ec1e3cef4d71 100644 --- a/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go +++ b/xds/internal/xdsclient/tests/ads_stream_flow_control_test.go @@ -47,16 +47,16 @@ import ( type blockingListenerWatcher struct { doneNotifierCh chan func() // DoneNotifier passed to the callback. updateCh chan struct{} // Written to when an update is received. - errorCh chan struct{} // Written to when an error is received. - notFoundCh chan struct{} // Written to when the resource is not found. + ambientErrCh chan struct{} // Written to when an ambient error is received. + resourceErrCh chan struct{} // Written to when a resource error is received. } func newBLockingListenerWatcher() *blockingListenerWatcher { return &blockingListenerWatcher{ doneNotifierCh: make(chan func(), 1), updateCh: make(chan struct{}, 1), - errorCh: make(chan struct{}, 1), - notFoundCh: make(chan struct{}, 1), + ambientErrCh: make(chan struct{}, 1), + resourceErrCh: make(chan struct{}, 1), } } @@ -76,7 +76,7 @@ func (lw *blockingListenerWatcher) ResourceChanged(update *xdsresource.ListenerR func (lw *blockingListenerWatcher) ResourceError(err error, done func()) { // Notify receipt of an error. select { - case lw.notFoundCh <- struct{}{}: + case lw.resourceErrCh <- struct{}{}: default: } @@ -89,7 +89,7 @@ func (lw *blockingListenerWatcher) ResourceError(err error, done func()) { func (lw *blockingListenerWatcher) AmbientError(err error, done func()) { // Notify receipt of an error. select { - case lw.errorCh <- struct{}{}: + case lw.ambientErrCh <- struct{}{}: default: } @@ -493,7 +493,7 @@ func (s) TestADSFlowControl_ResourceErrors(t *testing.T) { // Wait for the resource error to reach the watcher. select { - case <-watcher.notFoundCh: + case <-watcher.resourceErrCh: case <-ctx.Done(): t.Fatalf("Timed out waiting for error to reach watcher") } @@ -599,7 +599,7 @@ func (s) TestADSFlowControl_ResourceDoesNotExist(t *testing.T) { // Wait for the resource not found callback to be invoked. select { - case <-watcher.notFoundCh: + case <-watcher.resourceErrCh: case <-ctx.Done(): t.Fatalf("Timed out waiting for resource not found callback to be invoked on the watcher") } diff --git a/xds/internal/xdsclient/tests/authority_test.go b/xds/internal/xdsclient/tests/authority_test.go index daff7d4bc0b9..187fb4d1956d 100644 --- a/xds/internal/xdsclient/tests/authority_test.go +++ b/xds/internal/xdsclient/tests/authority_test.go @@ -307,7 +307,7 @@ func (s) TestAuthority_Fallback(t *testing.T) { // Ensure that the connectivity error callback is not called. sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) defer sCancel() - if v, err := watcher.errCh.Receive(sCtx); err != context.DeadlineExceeded { + if v, err := watcher.ambientErrCh.Receive(sCtx); err != context.DeadlineExceeded { t.Fatalf("Error callback on the watcher with error: %v", v.(error)) } @@ -330,7 +330,7 @@ func (s) TestAuthority_Fallback(t *testing.T) { secondaryLis.Close() // Ensure that the connectivity error callback is called. - if _, err := watcher.errCh.Receive(ctx); err != nil { + if _, err := watcher.ambientErrCh.Receive(ctx); err != nil { t.Fatal("Timeout when waiting for error callback on the watcher") } } @@ -338,16 +338,16 @@ func (s) TestAuthority_Fallback(t *testing.T) { // TODO: Get rid of the clusterWatcher type in cds_watchers_test.go and use this // one instead. Also, rename this to clusterWatcher as part of that refactor. type clusterWatcherV2 struct { - updateCh *testutils.Channel // Messages of type xdsresource.ClusterUpdate - errCh *testutils.Channel // Messages of type error - resourceNotFoundCh *testutils.Channel // Messages of type error + updateCh *testutils.Channel // Messages of type xdsresource.ClusterUpdate + ambientErrCh *testutils.Channel // Messages of type ambient error + resourceErrCh *testutils.Channel // Messages of type resource error } func newClusterWatcherV2() *clusterWatcherV2 { return &clusterWatcherV2{ - updateCh: testutils.NewChannel(), - errCh: testutils.NewChannel(), - resourceNotFoundCh: testutils.NewChannel(), + updateCh: testutils.NewChannel(), + ambientErrCh: testutils.NewChannel(), + resourceErrCh: testutils.NewChannel(), } } @@ -361,7 +361,7 @@ func (cw *clusterWatcherV2) AmbientError(err error, onDone func()) { // resends resources which are NACKed by the xDS client, using a `Replace()` // here simplifies tests that want access to the most recently received // error. - cw.errCh.Replace(err) + cw.ambientErrCh.Replace(err) onDone() } @@ -370,6 +370,6 @@ func (cw *clusterWatcherV2) ResourceError(err error, onDone func()) { // resends resources which are NACKed by the xDS client, using a `Replace()` // here simplifies tests that want access to the most recently received // error. - cw.resourceNotFoundCh.Replace(err) + cw.resourceErrCh.Replace(err) onDone() } diff --git a/xds/internal/xdsclient/tests/misc_watchers_test.go b/xds/internal/xdsclient/tests/misc_watchers_test.go index 18fb091e0ab2..f448a3430e74 100644 --- a/xds/internal/xdsclient/tests/misc_watchers_test.go +++ b/xds/internal/xdsclient/tests/misc_watchers_test.go @@ -322,7 +322,7 @@ func (s) TestNodeProtoSentOnlyInFirstRequest(t *testing.T) { select { case <-ctx.Done(): t.Fatal("Timeout when waiting for the connection error to be propagated to the watcher") - case <-watcher.ErrorCh: + case <-watcher.AmbientErrorCh: } // Restart the management server. @@ -427,9 +427,9 @@ func (s) TestWatchErrorsContainNodeID(t *testing.T) { case <-sCtx.Done(): case <-watcher.UpdateCh: t.Fatal("Unexpected resource update") - case <-watcher.ErrorCh: + case <-watcher.AmbientErrorCh: t.Fatal("Unexpected resource error") - case <-watcher.ResourceDoesNotExistCh: + case <-watcher.ResourceErrorCh: t.Fatal("Unexpected resource does not exist") } @@ -437,7 +437,7 @@ func (s) TestWatchErrorsContainNodeID(t *testing.T) { select { case <-ctx.Done(): t.Fatal("Timeout when waiting for error callback to be invoked") - case err := <-watcher.ErrorCh: + case err := <-watcher.AmbientErrorCh: if err == nil || !strings.Contains(err.Error(), nodeID) { t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) } @@ -452,7 +452,7 @@ func (s) TestWatchErrorsContainNodeID(t *testing.T) { select { case <-ctx.Done(): t.Fatal("Timeout when waiting for error callback to be invoked") - case err := <-watcher.ErrorCh: + case err := <-watcher.AmbientErrorCh: if err == nil || !strings.Contains(err.Error(), nodeID) { t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) } @@ -500,7 +500,7 @@ func (s) TestWatchErrorsContainNodeID_ChannelCreationFailure(t *testing.T) { select { case <-ctx.Done(): t.Fatal("Timeout when waiting for error callback to be invoked") - case err := <-watcher.ErrorCh: + case err := <-watcher.AmbientErrorCh: if err == nil || !strings.Contains(err.Error(), nodeID) { t.Fatalf("Unexpected error: %v, want error with node ID: %q", err, nodeID) } diff --git a/xds/internal/xdsclient/tests/resource_update_test.go b/xds/internal/xdsclient/tests/resource_update_test.go index 284de9918c5a..a9fce90c8756 100644 --- a/xds/internal/xdsclient/tests/resource_update_test.go +++ b/xds/internal/xdsclient/tests/resource_update_test.go @@ -422,7 +422,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"RouteConfigResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "RouteConfigResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -438,7 +438,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"RouteConfigResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "RouteConfigResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -455,7 +455,7 @@ func (s) TestHandleRouteConfigResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3clusterpb.Cluster{})}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"RouteConfigResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "RouteConfigResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration", @@ -675,7 +675,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ClusterResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ClusterResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -691,7 +691,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ClusterResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ClusterResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -708,7 +708,7 @@ func (s) TestHandleClusterResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3endpointpb.ClusterLoadAssignment{})}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"ClusterResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "ClusterResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.cluster.v3.Cluster", @@ -986,7 +986,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { Value: []byte{1, 2, 3, 4}, }}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"EndpointsResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "EndpointsResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1002,7 +1002,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", VersionInfo: "1", }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"EndpointsResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "EndpointsResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", @@ -1019,7 +1019,7 @@ func (s) TestHandleEndpointsResponseFromManagementServer(t *testing.T) { VersionInfo: "1", Resources: []*anypb.Any{testutils.MarshalAny(t, &v3listenerpb.Listener{})}, }, - wantErr: fmt.Sprintf("xds: resource \"%v\" of type \"EndpointsResource\" does not exist", resourceName1), + wantErr: fmt.Sprintf("xds: resource %q of type %q does not exist", resourceName1, "EndpointsResource"), wantGenericXDSConfig: []*v3statuspb.ClientConfig_GenericXdsConfig{ { TypeUrl: "type.googleapis.com/envoy.config.endpoint.v3.ClusterLoadAssignment", diff --git a/xds/server_ext_test.go b/xds/server_ext_test.go index 756466b84203..a58bc63d6e3c 100644 --- a/xds/server_ext_test.go +++ b/xds/server_ext_test.go @@ -84,8 +84,11 @@ type servingModeChangeHandler struct { logger interface { Logf(format string, args ...any) } - modeCh chan connectivity.ServingMode - errCh chan error + currentMode connectivity.ServingMode + currentErr error + mu sync.Mutex + modeCh chan connectivity.ServingMode + errCh chan error } func newServingModeChangeHandler(t *testing.T) *servingModeChangeHandler { @@ -97,10 +100,22 @@ func newServingModeChangeHandler(t *testing.T) *servingModeChangeHandler { } func (m *servingModeChangeHandler) modeChangeCallback(addr net.Addr, args xds.ServingModeChangeArgs) { + m.mu.Lock() + defer m.mu.Unlock() + // Suppress pushing duplicate mode change and error if the mode is staying + // in NOT_SERVING and the error is the same. + if m.currentMode == args.Mode && m.currentMode == connectivity.ServingModeNotServing && m.currentErr.Error() == args.Err.Error() { + return + } m.logger.Logf("Serving mode for listener %q changed to %q, err: %v", addr.String(), args.Mode, args.Err) m.modeCh <- args.Mode + if args.Mode == connectivity.ServingModeServing { + m.currentErr = nil + } + m.currentMode = args.Mode if args.Err != nil { m.errCh <- args.Err + m.currentErr = args.Err } } @@ -169,8 +184,7 @@ func waitForFailedRPCWithStatus(ctx context.Context, t *testing.T, cc *grpc.Clie t.Fatalf("RPCs failed with most recent error: %v. Want status code %v, error: %s, node id: %s", err, wantCode, wantErr, wantNodeID) case <-time.After(defaultTestShortTimeout): _, err = client.EmptyCall(ctx, &testpb.Empty{}) - gotCode := status.Code(err) - if gotCode != wantCode { + if gotCode := status.Code(err); gotCode != wantCode { continue } if gotErr := err.Error(); !strings.Contains(gotErr, wantErr) { diff --git a/xds/server_security_ext_test.go b/xds/server_security_ext_test.go index 47bb4155f9be..992d8ddd7155 100644 --- a/xds/server_security_ext_test.go +++ b/xds/server_security_ext_test.go @@ -23,11 +23,12 @@ import ( "fmt" "net" "strconv" - "strings" "testing" + "time" "github.com/google/uuid" "google.golang.org/grpc" + "google.golang.org/grpc/codes" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/credentials/insecure" xdscreds "google.golang.org/grpc/credentials/xds" @@ -128,8 +129,8 @@ func (s) TestServer_Security_NoCertProvidersInBootstrap_Success(t *testing.T) { // client is expected to NACK this resource because the certificate provider // instance name specified in the Listener resource will not be present in the // bootstrap file. The test verifies that server creation does not fail and that -// the xDS-enabled gRPC server does not enter "serving" mode but serving mode -// changes to "not serving" because the listener resource is not cached. +// if the xDS-enabled gRPC server receives resource error causing mode change, +// it does not enter "serving" mode. func (s) TestServer_Security_NoCertificateProvidersInBootstrap_Failure(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() @@ -215,38 +216,27 @@ func (s) TestServer_Security_NoCertificateProvidersInBootstrap_Failure(t *testin t.Fatal("Timeout when waiting for an NACK from the xDS client for the LDS response") } - // Since the listener resource from the management server is invalid and is - // not cached, it should cause the server to go to NOT_SERVING and the - // error message should contain the xDS node ID. - // - // Even though the server is currently NOT_SERVING, in the case (where we - // are NOT_SERVING and the new mode is also NOT_SERVING), the update is not - // suppressed as: - // 1. the error may have change - // 2. it provides a timestamp of the last backoff attempt - // The loop ensures that we drain all the mode change notifications but - // at the same time verify that the server is NOT_SERVING and the error - // message contains the xDS node ID. - exit := false - for { - select { - case <-ctx.Done(): - t.Fatalf("Timed out waiting for server to go NOT_SERVING") - case gotMode := <-modeChangeHandler.modeCh: - if gotMode != connectivity.ServingModeNotServing { - t.Fatalf("Mode changed to %v, want %v", gotMode, connectivity.ServingModeNotServing) - } - gotErr := <-modeChangeHandler.errCh - if gotErr == nil || !strings.Contains(gotErr.Error(), nodeID) { - t.Fatalf("Unexpected error: %v, want xDS Node id: %s", gotErr, nodeID) - } - default: - exit = true - } - if exit { - break + // Wait a short duration and ensure that if the server receive mode change + // it does not enter "serving" mode. + sCtx, sCancel := context.WithTimeout(ctx, defaultTestShortTimeout) + defer sCancel() + select { + case <-sCtx.Done(): + case stateCh := <-modeChangeHandler.modeCh: + if stateCh == connectivity.ServingModeServing { + t.Fatal("Server entered serving mode before the route config was received") } } + + // Create a client that uses insecure creds and verify that RPCs don't + // succeed. + cc, err := grpc.NewClient(lis.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + t.Fatalf("Failed to dial local test server: %v", err) + } + defer cc.Close() + + waitForFailedRPCWithStatus(ctx, t, cc, codes.Unavailable, "", "") } // Tests the case where the bootstrap configuration contains one certificate @@ -259,8 +249,8 @@ func (s) TestServer_Security_NoCertificateProvidersInBootstrap_Failure(t *testin // certificate provider instance // // The test verifies that an RPC to the first listener succeeds, while the -// second listener never moves to "serving" mode and RPCs but serving mode -// changes to "not serving" because the listener resource is not cached. +// second listener receive a resource error which cause the server mode change +// but never moves to "serving" mode. func (s) TestServer_Security_WithValidAndInvalidSecurityConfiguration(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer cancel() @@ -463,37 +453,23 @@ func (s) TestServer_Security_WithValidAndInvalidSecurityConfiguration(t *testing t.Fatal("Timeout when waiting for an NACK from the xDS client for the LDS response") } - // Since the listener resource from the management server is invalid and is - // not cached, it should cause the server to go to NOT_SERVING and the - // error message should contain the xDS node ID. - // - // Even though the server is currently NOT_SERVING, in the case (where we - // are NOT_SERVING and the new mode is also NOT_SERVING), the update is not - // suppressed as: - // 1. the error may have change - // 2. it provides a timestamp of the last backoff attempt - // - // The loop ensures that we drain all the mode change notifications but - // at the same time verify that the server is NOT_SERVING and the error - // message contains the xDS node ID. - exit := false - for { - select { - case <-ctx.Done(): - t.Fatalf("Timed out waiting for server to go NOT_SERVING") - case gotMode := <-modeChangeHandler2.modeCh: - if gotMode != connectivity.ServingModeNotServing { - t.Fatalf("Mode changed to %v, want %v", gotMode, connectivity.ServingModeNotServing) - } - gotErr := <-modeChangeHandler2.errCh - if gotErr == nil || !strings.Contains(gotErr.Error(), nodeID) { - t.Fatalf("Unexpected error: %v, want xDS Node id: %s", gotErr, nodeID) - } - default: - exit = true - } - if exit { - break + // Wait a short duration and ensure that if the server receives mode change + // it does not enter "serving" mode. + select { + case <-time.After(2 * defaultTestShortTimeout): + case <-modeChangeHandler2.modeCh: + if modeChangeHandler2.currentMode == connectivity.ServingModeServing { + t.Fatal("Server changed to serving mode when not expected to") } } + + // Create a client that uses insecure creds and verify that RPCs don't + // succeed to listener2. + cc2, err := grpc.NewClient(lis2.Addr().String(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + t.Fatalf("Failed to dial local test server: %v", err) + } + defer cc2.Close() + + waitForFailedRPCWithStatus(ctx, t, cc2, codes.Unavailable, "", "") } diff --git a/xds/server_test.go b/xds/server_test.go index 191231d91920..205f2646d826 100644 --- a/xds/server_test.go +++ b/xds/server_test.go @@ -656,8 +656,8 @@ func (s) TestHandleListenerUpdate_ErrorUpdate(t *testing.T) { } // Also make sure that serving mode updates are received. The serving - // mode changes to NOT_SERVING. This happens because watcher received an - // invalid resource from the server which is not present in cache. + // mode changes to NOT_SERVING. This happens because watcher received a + // resource error for the invalid resource from the server. sCtx, sCancel := context.WithTimeout(context.Background(), defaultTestShortTimeout) defer sCancel() if _, err := modeChangeCh.Receive(sCtx); err == context.DeadlineExceeded { @@ -696,10 +696,10 @@ func (s) TestServeAndCloseDoNotRace(t *testing.T) { bootstrapContents := generateBootstrapContents(t, uuid.NewString(), nonExistentManagementServer) // Override the default ServingModeCallback with a noop function because the - // invalid listener resource will be immediately NACKed by the xDS client - // and since the listener resource is not cached, it will trigger multiple - // resource error notifications for the same listener resource in quick - // successions, leading to service mode change to "not serving" each time. + // serverURI is invalid which will result in xDS channel creation failure + // while registering the watch for listener resource. This will trigger + // resource error notifications for the invalid listener resource leading + // to service mode change to "not serving" each time. // // Even if the the server is currently NOT_SERVING, in the case (where we // are NOT_SERVING and the new mode is also NOT_SERVING), the update is not From 536bebeb17a58b4f6cd86dabf03819910ff52e17 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Thu, 10 Apr 2025 22:51:31 +0530 Subject: [PATCH 15/16] easwards review #4 --- xds/internal/balancer/cdsbalancer/cdsbalancer.go | 6 +++++- xds/internal/resolver/xds_resolver.go | 15 +++++++++------ xds/internal/server/listener_wrapper.go | 2 ++ xds/internal/server/rds_handler.go | 1 - xds/internal/testutils/resource_watcher.go | 6 ++++-- xds/server_ext_test.go | 9 +++++---- 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 076f823db0e4..f1c1e681a272 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -478,7 +478,11 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd // If the security config is invalid, for example, if the provider // instance is not found in the bootstrap config, we need to put the // channel in transient failure. - b.onClusterResourceError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) + if b.childLB != nil { + b.onClusterAmbientError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) + } else { + b.onClusterResourceError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) + } return } } diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index f51c7cf19a2c..8e4b6796a179 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -459,7 +459,10 @@ func (r *xdsResolver) onResolutionComplete() { func (r *xdsResolver) applyRouteConfigUpdate(update xdsresource.RouteConfigUpdate) { matchVh := xdsresource.FindBestMatchingVirtualHost(r.dataplaneAuthority, update.VirtualHosts) if matchVh == nil { - r.onError(fmt.Errorf("no matching virtual host found for %q", r.dataplaneAuthority)) + // TODO: Should this be a resource or ambient error? Note that its + // being called only from resource update methods when we have finished + // removing the previous update. + r.onAmbientError(fmt.Errorf("no matching virtual host found for %q", r.dataplaneAuthority)) return } r.currentRouteConfig = update @@ -469,12 +472,12 @@ func (r *xdsResolver) applyRouteConfigUpdate(update xdsresource.RouteConfigUpdat r.onResolutionComplete() } -// onError propagates the error up to the channel. And since this is invoked -// only for non resource-not-found errors, we don't have to update resolver +// onAmbientError propagates the error up to the channel. And since this is +// invoked only for non resource errors, we don't have to update resolver // state and we can keep using the old config. // // Only executed in the context of a serializer callback. -func (r *xdsResolver) onError(err error) { +func (r *xdsResolver) onAmbientError(err error) { r.cc.ReportError(err) } @@ -550,7 +553,7 @@ func (r *xdsResolver) onListenerResourceAmbientError(err error) { if r.logger.V(2) { r.logger.Infof("Received ambient error for Listener resource %q: %v", r.ldsResourceName, err) } - r.onError(err) + r.onAmbientError(err) } // Only executed in the context of a serializer callback. @@ -590,7 +593,7 @@ func (r *xdsResolver) onRouteConfigResourceAmbientError(name string, err error) if r.logger.V(2) { r.logger.Infof("Received ambient error for RouteConfiguration resource %q: %v", name, err) } - r.onError(err) + r.onAmbientError(err) } // Only executed in the context of a serializer callback. diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index 58682517b13c..692bd56ba8c6 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -410,6 +410,8 @@ func (lw *ldsWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, // the server into a "not serving" mode. This is not ideal, but this is // what we have decided to do. if ilc.Address != l.addr || ilc.Port != l.port { + // TODO: Are there any other cases where this can be treated as an + // ambient error? l.mu.Lock() err := fmt.Errorf("[xDS node id: %v]: %w", l.xdsNodeID, fmt.Errorf("address (%s:%s) in Listener update does not match listening address: (%s:%s)", ilc.Address, ilc.Port, l.addr, l.port)) l.switchModeLocked(connectivity.ServingModeNotServing, err) diff --git a/xds/internal/server/rds_handler.go b/xds/internal/server/rds_handler.go index d66db507c2b7..4b8eb22de8db 100644 --- a/xds/internal/server/rds_handler.go +++ b/xds/internal/server/rds_handler.go @@ -184,6 +184,5 @@ func (rw *rdsWatcher) AmbientError(err error, onDone func()) { } routeName := rw.routeName rwu := rw.parent.updates[routeName] - rw.parent.updates[routeName] = rwu rw.parent.callback(routeName, rwu) } diff --git a/xds/internal/testutils/resource_watcher.go b/xds/internal/testutils/resource_watcher.go index 98f4a0e6e137..e8f7830a7641 100644 --- a/xds/internal/testutils/resource_watcher.go +++ b/xds/internal/testutils/resource_watcher.go @@ -29,9 +29,11 @@ import "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" type TestResourceWatcher struct { // UpdateCh is the channel on which xDS client updates are delivered. UpdateCh chan *xdsresource.ResourceData - // AmbientErrorCh is the channel on which errors from the xDS client are delivered. + // AmbientErrorCh is the channel on which ambient errors from the xDS + // client are delivered. AmbientErrorCh chan error - // ResourceErrorCh is the channel used to indicate calls to ResourceError. + // ResourceErrorCh is the channel on which resource errors from the xDS + // client are delivered. ResourceErrorCh chan struct{} } diff --git a/xds/server_ext_test.go b/xds/server_ext_test.go index a58bc63d6e3c..18fcee8d6d3f 100644 --- a/xds/server_ext_test.go +++ b/xds/server_ext_test.go @@ -104,19 +104,20 @@ func (m *servingModeChangeHandler) modeChangeCallback(addr net.Addr, args xds.Se defer m.mu.Unlock() // Suppress pushing duplicate mode change and error if the mode is staying // in NOT_SERVING and the error is the same. + // + // TODO: Should we move this check to listener wrapper? This shouldn't + // happen in practice a lot. But we never know what kind of management + // servers users run. if m.currentMode == args.Mode && m.currentMode == connectivity.ServingModeNotServing && m.currentErr.Error() == args.Err.Error() { return } m.logger.Logf("Serving mode for listener %q changed to %q, err: %v", addr.String(), args.Mode, args.Err) m.modeCh <- args.Mode - if args.Mode == connectivity.ServingModeServing { - m.currentErr = nil - } m.currentMode = args.Mode if args.Err != nil { m.errCh <- args.Err - m.currentErr = args.Err } + m.currentErr = args.Err } // createStubServer creates a new xDS-enabled gRPC server and returns a From c43cd62f8ff3c7cd266c0cf319f0689bab782829 Mon Sep 17 00:00:00 2001 From: Purnesh Dixit Date: Sat, 12 Apr 2025 15:06:05 +0530 Subject: [PATCH 16/16] TODO and helper --- .../balancer/cdsbalancer/cdsbalancer.go | 32 +++++++------------ xds/internal/resolver/xds_resolver.go | 6 ++-- xds/internal/server/listener_wrapper.go | 4 +-- xds/server_ext_test.go | 6 ++-- 4 files changed, 20 insertions(+), 28 deletions(-) diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index f1c1e681a272..fa34748c8822 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -357,11 +357,7 @@ func (b *cdsBalancer) ResolverError(err error) { if b.lbCfg != nil { root = b.lbCfg.ClusterName } - if b.childLB != nil { - b.onClusterAmbientError(root, err) - return - } - b.onClusterResourceError(root, err) + b.onClusterError(root, err) }) } @@ -478,11 +474,7 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd // If the security config is invalid, for example, if the provider // instance is not found in the bootstrap config, we need to put the // channel in transient failure. - if b.childLB != nil { - b.onClusterAmbientError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) - } else { - b.onClusterResourceError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) - } + b.onClusterError(name, b.annotateErrorWithNodeID(fmt.Errorf("received Cluster resource contains invalid security config: %v", err))) return } } @@ -490,20 +482,12 @@ func (b *cdsBalancer) onClusterUpdate(name string, update xdsresource.ClusterUpd clustersSeen := make(map[string]bool) dms, ok, err := b.generateDMsForCluster(b.lbCfg.ClusterName, 0, nil, clustersSeen) if err != nil { - if b.childLB != nil { - b.onClusterAmbientError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err))) - } else { - b.onClusterResourceError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err))) - } + b.onClusterError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("failed to generate discovery mechanisms: %v", err))) return } if ok { if len(dms) == 0 { - if b.childLB != nil { - b.onClusterAmbientError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters"))) - } else { - b.onClusterResourceError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters"))) - } + b.onClusterError(b.lbCfg.ClusterName, b.annotateErrorWithNodeID(fmt.Errorf("aggregate cluster graph has no leaf clusters"))) return } // Child policy is built the first time we resolve the cluster graph. @@ -682,6 +666,14 @@ func (b *cdsBalancer) generateDMsForCluster(name string, depth int, dms []cluste return append(dms, dm), true, nil } +func (b *cdsBalancer) onClusterError(name string, err error) { + if b.childLB != nil { + b.onClusterAmbientError(name, err) + } else { + b.onClusterResourceError(name, err) + } +} + // ccWrapper wraps the balancer.ClientConn passed to the CDS balancer at // creation and intercepts the NewSubConn() and UpdateAddresses() call from the // child policy to add security configuration required by xDS credentials. diff --git a/xds/internal/resolver/xds_resolver.go b/xds/internal/resolver/xds_resolver.go index 8e4b6796a179..a66719d0685f 100644 --- a/xds/internal/resolver/xds_resolver.go +++ b/xds/internal/resolver/xds_resolver.go @@ -459,9 +459,9 @@ func (r *xdsResolver) onResolutionComplete() { func (r *xdsResolver) applyRouteConfigUpdate(update xdsresource.RouteConfigUpdate) { matchVh := xdsresource.FindBestMatchingVirtualHost(r.dataplaneAuthority, update.VirtualHosts) if matchVh == nil { - // TODO: Should this be a resource or ambient error? Note that its - // being called only from resource update methods when we have finished - // removing the previous update. + // TODO(purnesh42h): Should this be a resource or ambient error? Note + // that its being called only from resource update methods when we have + // finished removing the previous update. r.onAmbientError(fmt.Errorf("no matching virtual host found for %q", r.dataplaneAuthority)) return } diff --git a/xds/internal/server/listener_wrapper.go b/xds/internal/server/listener_wrapper.go index 692bd56ba8c6..2c32ace8ab05 100644 --- a/xds/internal/server/listener_wrapper.go +++ b/xds/internal/server/listener_wrapper.go @@ -410,8 +410,8 @@ func (lw *ldsWatcher) ResourceChanged(update *xdsresource.ListenerResourceData, // the server into a "not serving" mode. This is not ideal, but this is // what we have decided to do. if ilc.Address != l.addr || ilc.Port != l.port { - // TODO: Are there any other cases where this can be treated as an - // ambient error? + // TODO(purnesh42h): Are there any other cases where this can be + // treated as an ambient error? l.mu.Lock() err := fmt.Errorf("[xDS node id: %v]: %w", l.xdsNodeID, fmt.Errorf("address (%s:%s) in Listener update does not match listening address: (%s:%s)", ilc.Address, ilc.Port, l.addr, l.port)) l.switchModeLocked(connectivity.ServingModeNotServing, err) diff --git a/xds/server_ext_test.go b/xds/server_ext_test.go index 18fcee8d6d3f..81ead009e17e 100644 --- a/xds/server_ext_test.go +++ b/xds/server_ext_test.go @@ -105,9 +105,9 @@ func (m *servingModeChangeHandler) modeChangeCallback(addr net.Addr, args xds.Se // Suppress pushing duplicate mode change and error if the mode is staying // in NOT_SERVING and the error is the same. // - // TODO: Should we move this check to listener wrapper? This shouldn't - // happen in practice a lot. But we never know what kind of management - // servers users run. + // TODO(purnesh42h): Should we move this check to listener wrapper? This + // shouldn't happen in practice a lot. But we never know what kind of + // management servers users run. if m.currentMode == args.Mode && m.currentMode == connectivity.ServingModeNotServing && m.currentErr.Error() == args.Err.Error() { return }