Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: migrate contextual logging #1613

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/descheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
rs.SecureServing.AddFlags(fs)
}

func (rs *DeschedulerServer) Apply() error {
func (rs *DeschedulerServer) Apply(logger klog.Logger) error {
err := features.DefaultMutableFeatureGate.SetFromMap(rs.FeatureGates)
if err != nil {
return err
Expand All @@ -138,7 +138,7 @@ func (rs *DeschedulerServer) Apply() error {
var loopbackClientConfig *restclient.Config
var secureServing *apiserver.SecureServingInfo
if err := rs.SecureServing.ApplyTo(&secureServing, &loopbackClientConfig); err != nil {
klog.ErrorS(err, "failed to apply secure server configuration")
logger.Error(err, "failed to apply secure server configuration")
return err
}

Expand Down
15 changes: 9 additions & 6 deletions cmd/descheduler/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,10 @@ import (

// NewDeschedulerCommand creates a *cobra.Command object with default parameters
func NewDeschedulerCommand(out io.Writer) *cobra.Command {
logger := klog.FromContext(context.TODO())
s, err := options.NewDeschedulerServer()
if err != nil {
klog.ErrorS(err, "unable to initialize server")
logger.Error(err, "unable to initialize server")
}

featureGate := featuregate.NewFeatureGate()
Expand All @@ -64,13 +65,14 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
return nil
},
RunE: func(cmd *cobra.Command, args []string) error {
if err = s.Apply(); err != nil {
klog.ErrorS(err, "failed to apply")
logger := klog.FromContext(cmd.Context())
if err = s.Apply(logger); err != nil {
logger.Error(err, "failed to apply")
return err
}

if err = Run(cmd.Context(), s); err != nil {
klog.ErrorS(err, "failed to run descheduler server")
logger.Error(err, "failed to run descheduler server")
return err
}

Expand All @@ -89,6 +91,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {

func Run(rootCtx context.Context, rs *options.DeschedulerServer) error {
ctx, done := signal.NotifyContext(rootCtx, syscall.SIGINT, syscall.SIGTERM)
logger := klog.FromContext(ctx)

pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !rs.DisableMetrics {
Expand All @@ -99,13 +102,13 @@ func Run(rootCtx context.Context, rs *options.DeschedulerServer) error {

stoppedCh, _, err := rs.SecureServingInfo.Serve(pathRecorderMux, 0, ctx.Done())
if err != nil {
klog.Fatalf("failed to start secure server: %v", err)
logger.Error(err, "failed to start secure server")
return err
}

err = tracing.NewTracerProvider(ctx, rs.Tracing.CollectorEndpoint, rs.Tracing.TransportCert, rs.Tracing.ServiceName, rs.Tracing.ServiceNamespace, rs.Tracing.SampleRate, rs.Tracing.FallbackToNoOpProviderOnError)
if err != nil {
klog.ErrorS(err, "failed to create tracer provider")
logger.Error(err, "failed to create tracer provider")
}
defer tracing.Shutdown(ctx)

Expand Down
37 changes: 21 additions & 16 deletions pkg/descheduler/descheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,11 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
defer func(loopStartDuration time.Time) {
metrics.DeschedulerLoopDuration.With(map[string]string{}).Observe(time.Since(loopStartDuration).Seconds())
}(time.Now())
logger := klog.FromContext(ctx)

// if len is still <= 1 error out
if len(nodes) <= 1 {
klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
logger.V(1).Info("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
return fmt.Errorf("the cluster size is 0 or 1")
}

Expand All @@ -210,7 +211,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
// So when evicting pods while running multiple strategies in a row have the cummulative effect
// as is when evicting pods for real.
if d.rs.DryRun {
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
logger.V(3).Info("Building a cached client from the cluster for the dry run")
// Create a new cache so we start from scratch without any leftovers
fakeClient := fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
Expand Down Expand Up @@ -240,13 +241,13 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
client = d.rs.Client
}

klog.V(3).Infof("Setting up the pod evictor")
logger.V(3).Info("Setting up the pod evictor")
d.podEvictor.SetClient(client)
d.podEvictor.ResetCounters()

d.runProfiles(ctx, client, nodes)

klog.V(1).InfoS("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())
logger.V(1).Info("Number of evictions/requests", "totalEvicted", d.podEvictor.TotalEvicted(), "evictionRequests", d.podEvictor.TotalEvictionRequests())

return nil
}
Expand All @@ -259,8 +260,10 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
ctx, span = tracing.Tracer().Start(ctx, "runProfiles")
defer span.End()
var profileRunners []profileRunner
logger := klog.FromContext(ctx)
for _, profile := range d.deschedulerPolicy.Profiles {
currProfile, err := frameworkprofile.NewProfile(
ctx,
profile,
pluginregistry.PluginRegistry,
frameworkprofile.WithClientSet(client),
Expand All @@ -270,7 +273,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
frameworkprofile.WithMetricsCollector(d.metricsCollector),
)
if err != nil {
klog.ErrorS(err, "unable to create a profile", "profile", profile.Name)
logger.Error(err, "unable to create a profile", "profile", profile.Name)
continue
}
profileRunners = append(profileRunners, profileRunner{profile.Name, currProfile.RunDeschedulePlugins, currProfile.RunBalancePlugins})
Expand All @@ -281,7 +284,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
status := profileR.descheduleEPs(ctx, nodes)
if status != nil && status.Err != nil {
span.AddEvent("failed to perform deschedule operations", trace.WithAttributes(attribute.String("err", status.Err.Error()), attribute.String("profile", profileR.name), attribute.String("operation", tracing.DescheduleOperation)))
klog.ErrorS(status.Err, "running deschedule extension point failed with error", "profile", profileR.name)
logger.Error(status.Err, "running deschedule extension point failed with error", "profile", profileR.name)
continue
}
}
Expand All @@ -291,7 +294,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
status := profileR.balanceEPs(ctx, nodes)
if status != nil && status.Err != nil {
span.AddEvent("failed to perform balance operations", trace.WithAttributes(attribute.String("err", status.Err.Error()), attribute.String("profile", profileR.name), attribute.String("operation", tracing.BalanceOperation)))
klog.ErrorS(status.Err, "running balance extension point failed with error", "profile", profileR.name)
logger.Error(status.Err, "running balance extension point failed with error", "profile", profileR.name)
continue
}
}
Expand All @@ -302,6 +305,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
ctx, span = tracing.Tracer().Start(ctx, "Run")
defer span.End()
metrics.Register()
logger := klog.FromContext(ctx)

clientConnection := rs.ClientConnection
if rs.KubeconfigFile != "" && clientConnection.Kubeconfig == "" {
Expand All @@ -314,7 +318,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
rs.Client = rsclient
rs.EventClient = eventClient

deschedulerPolicy, err := LoadPolicyConfig(rs.PolicyConfigFile, rs.Client, pluginregistry.PluginRegistry)
deschedulerPolicy, err := LoadPolicyConfig(ctx, rs.PolicyConfigFile, rs.Client, pluginregistry.PluginRegistry)
if err != nil {
return err
}
Expand All @@ -324,7 +328,7 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {

// Add k8s compatibility warnings to logs
if err := validateVersionCompatibility(rs.Client.Discovery(), version.Get()); err != nil {
klog.Warning(err.Error())
logger.Error(err, "validate version error")
}

evictionPolicyGroupVersion, err := eutils.SupportEviction(rs.Client)
Expand All @@ -350,11 +354,11 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
}

if rs.LeaderElection.LeaderElect && rs.DryRun {
klog.V(1).Info("Warning: DryRun is set to True. You need to disable it to use Leader Election.")
logger.V(1).Info("Warning: DryRun is set to True. You need to disable it to use Leader Election.")
}

if rs.LeaderElection.LeaderElect && !rs.DryRun {
if err := NewLeaderElection(runFn, rsclient, &rs.LeaderElection, ctx); err != nil {
if err := NewLeaderElection(ctx, runFn, rsclient, &rs.LeaderElection); err != nil {
span.AddEvent("Leader Election Failure", trace.WithAttributes(attribute.String("err", err.Error())))
return fmt.Errorf("leaderElection: %w", err)
}
Expand Down Expand Up @@ -416,6 +420,7 @@ func podEvictionReactionFnc(fakeClient *fakeclientset.Clientset) func(action cor
}

func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string) error {
logger := klog.FromContext(ctx)
var span trace.Span
ctx, span = tracing.Tracer().Start(ctx, "RunDeschedulerStrategies")
defer span.End()
Expand Down Expand Up @@ -450,11 +455,11 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer

if deschedulerPolicy.MetricsCollector.Enabled {
go func() {
klog.V(2).Infof("Starting metrics collector")
logger.V(2).Info("Starting metrics collector")
descheduler.metricsCollector.Run(ctx)
klog.V(2).Infof("Stopped metrics collector")
logger.V(2).Info("Stopped metrics collector")
}()
klog.V(2).Infof("Waiting for metrics collector to sync")
logger.V(2).Info("Waiting for metrics collector to sync")
if err := wait.PollWithContext(ctx, time.Second, time.Minute, func(context.Context) (done bool, err error) {
return descheduler.metricsCollector.HasSynced(), nil
}); err != nil {
Expand All @@ -470,14 +475,14 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
nodes, err := nodeutil.ReadyNodes(sCtx, rs.Client, descheduler.sharedInformerFactory.Core().V1().Nodes().Lister(), nodeSelector)
if err != nil {
sSpan.AddEvent("Failed to detect ready nodes", trace.WithAttributes(attribute.String("err", err.Error())))
klog.Error(err)
logger.Error(err, "Failed to retrieve ready nodes from the cluster")
cancel()
return
}
err = descheduler.runDeschedulerLoop(sCtx, nodes)
if err != nil {
sSpan.AddEvent("Failed to run descheduler loop", trace.WithAttributes(attribute.String("err", err.Error())))
klog.Error(err)
logger.Error(err, "Failed to execute the descheduler loop with the retrieved nodes")
cancel()
return
}
Expand Down
43 changes: 21 additions & 22 deletions pkg/descheduler/descheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/component-base/featuregate"
"k8s.io/klog/v2"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
metricsclient "k8s.io/metrics/pkg/client/clientset/versioned"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
Expand Down Expand Up @@ -501,7 +500,7 @@ func TestPodEvictorReset(t *testing.T) {
}

// a single pod eviction expected
klog.Infof("2 pod eviction expected per a descheduling cycle, 2 real evictions in total")
t.Logf("2 pod eviction expected per a descheduling cycle, 2 real evictions in total")
if err := descheduler.runDeschedulerLoop(ctx, nodes); err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
}
Expand All @@ -510,7 +509,7 @@ func TestPodEvictorReset(t *testing.T) {
}

// a single pod eviction expected
klog.Infof("2 pod eviction expected per a descheduling cycle, 4 real evictions in total")
t.Logf("2 pod eviction expected per a descheduling cycle, 4 real evictions in total")
if err := descheduler.runDeschedulerLoop(ctx, nodes); err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
}
Expand All @@ -519,19 +518,19 @@ func TestPodEvictorReset(t *testing.T) {
}

// check the fake client syncing and the right pods evicted
klog.Infof("Enabling the dry run mode")
t.Logf("Enabling the dry run mode")
rs.DryRun = true
evictedPods = []string{}

klog.Infof("2 pod eviction expected per a descheduling cycle, 2 fake evictions in total")
t.Logf("2 pod eviction expected per a descheduling cycle, 2 fake evictions in total")
if err := descheduler.runDeschedulerLoop(ctx, nodes); err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
}
if descheduler.podEvictor.TotalEvicted() != 2 || len(evictedPods) != 0 || len(fakeEvictedPods) != 2 {
t.Fatalf("Expected (2,0,2) pods evicted, got (%v, %v, %v) instead", descheduler.podEvictor.TotalEvicted(), len(evictedPods), len(fakeEvictedPods))
}

klog.Infof("2 pod eviction expected per a descheduling cycle, 4 fake evictions in total")
t.Logf("2 pod eviction expected per a descheduling cycle, 4 fake evictions in total")
if err := descheduler.runDeschedulerLoop(ctx, nodes); err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
}
Expand Down Expand Up @@ -602,73 +601,73 @@ func TestEvictionRequestsCache(t *testing.T) {
var evictedPods []string
client.PrependReactor("create", "pods", podEvictionReactionTestingFnc(&evictedPods, func(name string) bool { return name == "p1" || name == "p2" }, nil))

klog.Infof("2 evictions in background expected, 2 normal evictions")
t.Logf("2 evictions in background expected, 2 normal evictions")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 2, 2)

klog.Infof("Repeat the same as previously to confirm no more evictions in background are requested")
t.Logf("Repeat the same as previously to confirm no more evictions in background are requested")
// No evicted pod is actually deleted on purpose so the test can run the descheduling cycle repeatedly
// without recreating the pods.
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 2, 2)

klog.Infof("Scenario: Eviction in background got initiated")
t.Logf("Scenario: Eviction in background got initiated")
p2.Annotations[evictions.EvictionInProgressAnnotationKey] = ""
if _, err := client.CoreV1().Pods(p2.Namespace).Update(context.TODO(), p2, metav1.UpdateOptions{}); err != nil {
t.Fatalf("unable to update a pod: %v", err)
}
time.Sleep(100 * time.Millisecond)

klog.Infof("Repeat the same as previously to confirm no more evictions in background are requested")
t.Logf("Repeat the same as previously to confirm no more evictions in background are requested")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 2, 2)

klog.Infof("Scenario: Another eviction in background got initiated")
t.Logf("Scenario: Another eviction in background got initiated")
p1.Annotations[evictions.EvictionInProgressAnnotationKey] = ""
if _, err := client.CoreV1().Pods(p1.Namespace).Update(context.TODO(), p1, metav1.UpdateOptions{}); err != nil {
t.Fatalf("unable to update a pod: %v", err)
}
time.Sleep(100 * time.Millisecond)

klog.Infof("Repeat the same as previously to confirm no more evictions in background are requested")
t.Logf("Repeat the same as previously to confirm no more evictions in background are requested")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 2, 2)

klog.Infof("Scenario: Eviction in background completed")
t.Logf("Scenario: Eviction in background completed")
if err := client.CoreV1().Pods(p1.Namespace).Delete(context.TODO(), p1.Name, metav1.DeleteOptions{}); err != nil {
t.Fatalf("unable to delete a pod: %v", err)
}
time.Sleep(100 * time.Millisecond)

klog.Infof("Check the number of evictions in background decreased")
t.Logf("Check the number of evictions in background decreased")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 1, 2)

klog.Infof("Scenario: A new pod without eviction in background added")
t.Logf("Scenario: A new pod without eviction in background added")
if _, err := client.CoreV1().Pods(p5.Namespace).Create(context.TODO(), p5, metav1.CreateOptions{}); err != nil {
t.Fatalf("unable to create a pod: %v", err)
}
time.Sleep(100 * time.Millisecond)

klog.Infof("Check the number of evictions increased after running a descheduling cycle")
t.Logf("Check the number of evictions increased after running a descheduling cycle")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 1, 3)

klog.Infof("Scenario: Eviction in background canceled => eviction in progress annotation removed")
t.Logf("Scenario: Eviction in background canceled => eviction in progress annotation removed")
delete(p2.Annotations, evictions.EvictionInProgressAnnotationKey)
if _, err := client.CoreV1().Pods(p2.Namespace).Update(context.TODO(), p2, metav1.UpdateOptions{}); err != nil {
t.Fatalf("unable to update a pod: %v", err)
}
time.Sleep(100 * time.Millisecond)

klog.Infof("Check the number of evictions in background decreased")
t.Logf("Check the number of evictions in background decreased")
checkTotals(t, ctx, descheduler, 0, 3)

klog.Infof("Scenario: Re-run the descheduling cycle to re-request eviction in background")
t.Logf("Scenario: Re-run the descheduling cycle to re-request eviction in background")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 1, 3)

klog.Infof("Scenario: Eviction in background completed with a pod in completed state")
t.Logf("Scenario: Eviction in background completed with a pod in completed state")
p2.Status.Phase = v1.PodSucceeded
if _, err := client.CoreV1().Pods(p2.Namespace).Update(context.TODO(), p2, metav1.UpdateOptions{}); err != nil {
t.Fatalf("unable to delete a pod: %v", err)
}
time.Sleep(100 * time.Millisecond)

klog.Infof("Check the number of evictions in background decreased")
t.Logf("Check the number of evictions in background decreased")
runDeschedulingCycleAndCheckTotals(t, ctx, nodes, descheduler, 0, 3)
}

Expand Down Expand Up @@ -769,7 +768,7 @@ func TestDeschedulingLimits(t *testing.T) {
}
time.Sleep(100 * time.Millisecond)

klog.Infof("2 evictions in background expected, 2 normal evictions")
t.Logf("2 evictions in background expected, 2 normal evictions")
err := descheduler.runDeschedulerLoop(ctx, nodes)
if err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
Expand Down
Loading
Loading