From 74f17764d98c3c2594e2ef5c06022839c7701d6e Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 11 May 2021 18:56:44 +0000 Subject: [PATCH 01/24] changed klog.Infof to klog.V(4).Infof --- .../pkg/scheduler/eventhandlers.go | 40 ++++----- .../plugins/defaultbinder/default_binder.go | 6 +- .../siteavailability/siteavailability.go | 2 +- globalscheduler/pkg/scheduler/scheduler.go | 88 +++++++++---------- .../scheduler/sitecacheinfo/sitecache_info.go | 2 +- 5 files changed, 68 insertions(+), 70 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 1ed0fae2b..9322054b1 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -138,7 +138,7 @@ func responsibleForPod(pod *v1.Pod, schedulerName string) bool { // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.Infof("Add a pod: %v", pod) + klog.V(4).Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -160,7 +160,7 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.Infof("Update a pod: %v", newPod) + klog.V(4).Infof("Update a pod: %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return @@ -178,7 +178,7 @@ func (sched *Scheduler) deletePodFromCache(obj interface{}) { switch t := obj.(type) { case *v1.Pod: pod = t - klog.Infof("Delete a pod: %v", pod) + klog.V(4).Infof("Delete a pod: %v", pod.Name) case cache.DeletedFinalStateUnknown: var ok bool pod, ok = t.Obj.(*v1.Pod) @@ -301,15 +301,13 @@ func (sched *Scheduler) updatePodInSchedulingQueue(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.Infof("updatePodToSchedulingQueue : %v", newPod) + klog.V(4).Infof("updatePodToSchedulingQueue : %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return } - oldStack := getStackFromPod(oldPod) newStack := getStackFromPod(newPod) - if sched.skipStackUpdate(newStack) { return } @@ -323,7 +321,7 @@ func (sched *Scheduler) deletePodFromSchedulingQueue(obj interface{}) { switch t := obj.(type) { case *v1.Pod: pod = obj.(*v1.Pod) - klog.Infof("deletePodToSchedulingQueue : %v", pod) + klog.V(4).Infof("deletePodToSchedulingQueue : %v", pod.Name) case cache.DeletedFinalStateUnknown: var ok bool pod, ok = t.Obj.(*v1.Pod) @@ -372,14 +370,14 @@ func (sched *Scheduler) skipStackUpdate(stack *types.Stack) bool { if !reflect.DeepEqual(assumedStackCopy, stackCopy) { return false } - klog.V(3).Infof("Skipping stack %s/%s/%s update", stack.Tenant, stack.PodNamespace, stack.PodName) + klog.V(4).Infof("Skipping stack %s/%s/%s update", stack.Tenant, stack.PodNamespace, stack.PodName) return true } func (sched *Scheduler) bindStacks(assumedStacks []types.Stack) { - klog.Infof("assumedStacks: %v", assumedStacks) + klog.V(4).Infof("assumedStacks: %v", assumedStacks) for _, newStack := range assumedStacks { - klog.Infof("newStack: %v", newStack) + klog.V(4).Infof("newStack: %v", newStack) clusterName := newStack.Selected.ClusterName sched.bindToSite(clusterName, &newStack) } @@ -398,7 +396,7 @@ func (sched *Scheduler) setPodScheduleErr(reqStack *types.Stack) error { newStatus := v1.PodStatus{ Phase: v1.PodNoSchedule, } - klog.Infof("Attempting to update pod status from %v to %v", pod.Status, newStatus) + klog.V(4).Infof("Attempting to update pod status from %v to %v", pod.Status, newStatus) _, _, err = statusutil.PatchPodStatus(sched.Client, reqStack.Tenant, reqStack.PodNamespace, reqStack.PodName, pod.Status, newStatus) if err != nil { klog.Warningf("PatchPodStatus for pod %q: %v", reqStack.PodName+"/"+reqStack.PodNamespace+"/"+ @@ -406,7 +404,7 @@ func (sched *Scheduler) setPodScheduleErr(reqStack *types.Stack) error { return err } - klog.Infof("Update pod status from %v to %v success", pod.Status, newStatus) + klog.V(4).Infof("Update pod status from %v to %v success", pod.Status, newStatus) return nil } @@ -424,9 +422,9 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack }, } - klog.V(3).Infof("binding: %v", binding) + klog.V(4).Infof("binding: %v", binding) // do api server update here - klog.Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) + klog.V(4).Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) err := sched.Client.CoreV1().PodsWithMultiTenancy(binding.Namespace, binding.Tenant).Bind(binding) if err != nil { klog.Errorf("Failed to bind stack: %v/%v/%v", assumedStack.Tenant, assumedStack.PodNamespace, @@ -444,7 +442,7 @@ func (sched *Scheduler) addCluster(object interface{}) { resource := object.(*clusterv1.Cluster) clusterCopy := resource.DeepCopy() if sched.verifyClusterInfo(clusterCopy) == false { - klog.Infof(" Cluster data is not correct: %v", clusterCopy) + klog.V(4).Infof(" Cluster data is not correct: %v", clusterCopy) } key, err := controller.KeyFunc(object) if err != nil { @@ -452,7 +450,7 @@ func (sched *Scheduler) addCluster(object interface{}) { return } sched.Enqueue(key, EventType_Create) - klog.Infof("Enqueue Create cluster: %v", key) + klog.V(4).Infof("Enqueue Create cluster: %v", key) } func (sched *Scheduler) updateCluster(oldObject, newObject interface{}) { @@ -461,7 +459,7 @@ func (sched *Scheduler) updateCluster(oldObject, newObject interface{}) { oldClusterCopy := oldResource.DeepCopy() newClusterCopy := newResource.DeepCopy() if sched.verifyClusterInfo(newClusterCopy) { - klog.Infof(" Cluster data is not correct: %v", newResource) + klog.V(4).Infof(" Cluster data is not correct: %v", newResource) } key1, err1 := controller.KeyFunc(oldObject) key2, err2 := controller.KeyFunc(newObject) @@ -478,13 +476,13 @@ func (sched *Scheduler) updateCluster(oldObject, newObject interface{}) { switch eventType { case ClusterUpdateNo: { - klog.Infof("No actual change in clusters, discarding: %v", newClusterCopy.Name) + klog.V(4).Infof("No actual change in clusters, discarding: %v", newClusterCopy.Name) break } case ClusterUpdateYes: { sched.Enqueue(key2, EventType_Update) - klog.Infof("Enqueue Update Cluster: %v", key2) + klog.V(4).Infof("Enqueue Update Cluster: %v", key2) break } default: @@ -499,7 +497,7 @@ func (sched *Scheduler) deleteCluster(object interface{}) { resource := object.(*clusterv1.Cluster) clusterCopy := resource.DeepCopy() if sched.verifyClusterInfo(clusterCopy) == false { - klog.Infof(" Cluster data is not correct: %v", clusterCopy) + klog.V(4).Infof(" Cluster data is not correct: %v", clusterCopy) return } key, err := controller.KeyFunc(object) @@ -510,7 +508,7 @@ func (sched *Scheduler) deleteCluster(object interface{}) { sched.Enqueue(key, EventType_Delete) siteID := clusterCopy.Spec.Region.Region + constants.SiteDelimiter + clusterCopy.Spec.Region.AvailabilityZone sched.deletedClusters[key] = siteID - klog.Infof("Enqueue Delete Cluster: %v", key) + klog.V(4).Infof("Enqueue Delete Cluster: %v", key) } // Enqueue puts key of the cluster object in the work queue diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index 89366bd1a..a8391ea44 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -70,7 +70,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Errorf("Gettng site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } - klog.Errorf("GetSiteSelectorState: %v", siteSelectedInfo) + klog.Errorf("site selector info: %v", siteSelectedInfo) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -85,7 +85,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Warningf("flavor %s not found in region(%s)", flavorID, region) continue } - klog.Infof("flavor %s : %v", flavorID, flv) + klog.V(4).Infof("flavor %s : %v", flavorID, flv) vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) if err != nil || vCPUInt <= 0 { klog.Warningf("flavor %s is invalid in region(%s)", flavorID, region) @@ -111,6 +111,6 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s regionFlavors = map[string]*typed.RegionFlavor{} } siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.Infof("Resource state after deduction: %v", siteCacheInfo) + klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) return nil } diff --git a/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go b/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go index e85ddddda..3a323f4fd 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/siteavailability/siteavailability.go @@ -44,7 +44,7 @@ func (pl *SiteAvailability) Name() string { // Filter invoked at the filter extension point. func (pl *SiteAvailability) Filter(ctx context.Context, cycleState *interfaces.CycleState, stack *types.Stack, siteCacheInfo *sitecacheinfo.SiteCacheInfo) *interfaces.Status { - klog.Infof("Filter- siteCacheInfo: %v", siteCacheInfo) + klog.V(4).Infof("Filter- siteCacheInfo: %v", siteCacheInfo) if siteCacheInfo.GetSite().Status == constants.SiteStatusOffline || siteCacheInfo.GetSite().Status == constants.SiteStatusSellout { msg := fmt.Sprintf("Site(%s) status is %s, not available!", siteCacheInfo.GetSite().SiteID, siteCacheInfo.GetSite().Status) klog.Info(msg) diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 497fc4189..47cab45bd 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -128,7 +128,7 @@ var once sync.Once func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct{}) (*Scheduler, error) { stopEverything := stopCh - klog.Infof("stopEverything to check : %v", stopEverything) + klog.V(4).Infof("stopEverything to check : %v", stopEverything) if stopEverything == nil { stopEverything = wait.NeverStop } @@ -150,7 +150,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() - klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) + klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, and scheduler informers for scheduler err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) if err != nil { @@ -190,14 +190,14 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { } // start pod informers if sched.PodInformer != nil && sched.InformerFactory != nil { - klog.Infof("Starting scheduler %s informer", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s informer", sched.SchedulerName) sched.InformerFactory.Start(stopCh) // Wait for all caches to sync before scheduling. sched.InformerFactory.WaitForCacheSync(stopCh) } // start scheduler informer if sched.schedulerInformer != nil { - klog.Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) go sched.schedulerInformer.Run(stopCh) } // Do scheduling @@ -207,13 +207,13 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { // Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling // and blocked until the context is done. func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan struct{}) { - klog.Infof("Starting scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s", sched.SchedulerName) defer utilruntime.HandleCrash() //cluster if clusterWorkers > 0 { defer sched.ClusterQueue.ShutDown() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.ClusterSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -225,7 +225,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } defer sched.StackQueue.Close() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.PodSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -236,7 +236,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } klog.Info("Started cluster & pod workers") <-stopCh - klog.Infof("Shutting down scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Shutting down scheduler %s", sched.SchedulerName) } // Cache returns the cache in scheduler for test to check the data in scheduler. @@ -261,15 +261,15 @@ func (sched *Scheduler) scheduleOne() bool { if shutdown != nil { return false } - klog.Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) + klog.V(4).Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) allocation, err := sched.generateAllocationFromStack(stack) - klog.Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) + klog.V(4).Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) if err != nil { return false } start := stack.CreateTime end := time.Now().UnixNano() - klog.Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) // 2.do scheduling process start = end @@ -281,21 +281,21 @@ func (sched *Scheduler) scheduleOne() bool { return true } end = time.Now().UnixNano() - klog.Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) - klog.Infof("Schedule result: %v", result) //result is assumed stacks - klog.Infof("3. Assumed Stacks: %v", result) + klog.V(4).Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("Schedule result: %v", result) //result is assumed stacks + klog.V(4).Infof("3. Assumed Stacks: %v", result) // 3.bind scheduler result to pod start = end - klog.Infof("Try to bind to site, stacks:%v", result.Stacks) + klog.V(4).Infof("Try to bind to site, stacks:%v", result.Stacks) sched.bindStacks(result.Stacks) end = time.Now().UnixNano() - klog.Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) // log the elapsed time for the entire schedule if stack.CreateTime != 0 { spendTime := time.Now().UnixNano() - stack.CreateTime - klog.Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) + klog.V(4).Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) } return true } @@ -368,7 +368,7 @@ func (sched *Scheduler) findSitesThatPassFilters(ctx context.Context, state *int err = fmt.Errorf("SiteCacheInfoMap of %v is null", siteID) return nil, err } - klog.Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) + klog.V(4).Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) // Create filtered list with enough space to avoid growing it // and allow assigning. filtered := make([]*types.Site, len(allSiteCacheInfos)) @@ -454,7 +454,7 @@ func (sched *Scheduler) prioritizeSites( // sort by score. sort.Sort(sort.Reverse(result)) - klog.Infof("score sites: %v", result) + klog.V(4).Infof("score sites: %v", result) return result, nil } @@ -502,14 +502,14 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite // Schedule Run begins watching and scheduling. It waits for cache to be synced , // then starts scheduling and blocked until the context is done. func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocation) (result ScheduleResult, err error) { - klog.Infof("Attempting to schedule allocation: %v", allocation.ID) + klog.V(4).Infof("Attempting to schedule allocation: %v", allocation.ID) state := interfaces.NewCycleState() schedulingCycleCtx, cancel := context.WithCancel(ctx) defer cancel() // 1. Snapshot site resource cache start := time.Now() - klog.Infof("[START] snapshot site...") + klog.V(4).Infof("[START] snapshot site...") ///UpdateFlavorMap updates FlavorCache.RegionFlavorMap, FlavorCache.FlavorMap) ///FlavorMap is updated when scheduler starts, RegionFlavorMap is updated @@ -519,16 +519,16 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 2. Run "prefilter" plugins. start = time.Now() - klog.Infof("[START] Running prefilter plugins...") + klog.V(4).Infof("[START] Running prefilter plugins...") preFilterStatus := sched.SchedFrame.RunPreFilterPlugins(schedulingCycleCtx, state, &allocation.Stack) if !preFilterStatus.IsSuccess() { return result, preFilterStatus.AsError() } - klog.Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) // 3. Run "filter" plugins. start = time.Now() - klog.Infof("[START] Running filter plugins...") + klog.V(4).Infof("[START] Running filter plugins...") filteredSitesStatuses := make(interfaces.SiteToStatusMap) allocation.Stack.Selector = allocation.Selector filteredSites, err := sched.findSitesThatPassFilters(ctx, state, &allocation.Stack, filteredSitesStatuses) @@ -536,9 +536,9 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("findSitesThatPassFilters failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) - klog.Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) + klog.V(4).Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) if len(filteredSites) <= 0 { err := fmt.Errorf("filter none site. resultStatus: %s", filteredSitesStatuses.ToString()) klog.Error(err) @@ -547,33 +547,33 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 4. Run "prescore" plugins. start = time.Now() - klog.Infof("[START] Running preScore plugins...") + klog.V(4).Infof("[START] Running preScore plugins...") prescoreStatus := sched.SchedFrame.RunPreScorePlugins(ctx, state, &allocation.Stack, filteredSites) if !prescoreStatus.IsSuccess() { return result, prescoreStatus.AsError() } - klog.Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) // 5. Run "prioritizeSites" plugins. start = time.Now() - klog.Infof("[START] Running prioritizeSites plugins...") + klog.V(4).Infof("[START] Running prioritizeSites plugins...") priorityList, err := sched.prioritizeSites(ctx, state, &allocation.Stack, filteredSites) if err != nil { klog.Errorf("prioritizeSites failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) // 6. Run "strategy" plugins. start = time.Now() - klog.Infof("[START] Running strategy plugins...") + klog.V(4).Infof("[START] Running strategy plugins...") siteCount, strategyStatus := sched.SchedFrame.RunStrategyPlugins(ctx, state, allocation, priorityList) if !strategyStatus.IsSuccess() { klog.Errorf("RunStrategyPlugins failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) - klog.Infof("selected Hosts : %#v", siteCount) + klog.V(4).Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("selected Hosts : %#v", siteCount) // 7. reserve resource start = time.Now() @@ -605,7 +605,7 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("not find suit host") return result, fmt.Errorf("not find suit host") } - klog.Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) + klog.V(4).Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) return } @@ -686,7 +686,7 @@ func (sched *Scheduler) initPodClusterSchedulerInformers(gsconfig *types.GSSched sched.schedulerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if sched, ok := obj.(*schedulerv1.Scheduler); ok { - klog.Infof("The scheduler %s process is going to be killed...", sched.Name) + klog.V(4).Infof("The scheduler %s process is going to be killed...", sched.Name) os.Exit(0) } else { klog.Fatalf("The deleted object %v failed to convert to scheduler", obj) @@ -730,7 +730,7 @@ func (sched *Scheduler) processNextClusterItem() bool { if shutdown { return false } - klog.Infof("Process an item in work queue %v ", workItem) + klog.V(4).Infof("Process an item in work queue %v ", workItem) eventKey := workItem.(KeyWithEventType) key := eventKey.Key defer sched.ClusterQueue.Done(key) @@ -739,7 +739,7 @@ func (sched *Scheduler) processNextClusterItem() bool { utilruntime.HandleError(fmt.Errorf("Handle %v of key %v failed with %v", "serivce", key, err)) } sched.ClusterQueue.Forget(key) - klog.Infof("Successfully processed & synced %s", key) + klog.V(4).Infof("Successfully processed & synced %s", key) return true } @@ -749,7 +749,7 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er return err } key := keyWithEventType.Key - klog.Infof("sync cache for key %v", key) + klog.V(4).Infof("sync cache for key %v", key) startTime := time.Now() defer func() { klog.V(4).Infof("Finished syncing %q (%v)", key, time.Since(startTime)) @@ -757,16 +757,16 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er nameSpace, clusterName, err := cache.SplitMetaNamespaceKey(key) //This performs controller logic - create site's static info - klog.Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) result, err := sched.updateStaticSiteResourceInfo(key, keyWithEventType.EventType, nameSpace, clusterName) if !result { klog.Errorf("Failed a cluster processing - event: %v, key: %v, error: %v", keyWithEventType, key, err) sched.ClusterQueue.AddRateLimited(keyWithEventType) } else { - klog.Infof(" Processed a cluster: %v", key) + klog.V(4).Infof(" Processed a cluster: %v", key) sched.ClusterQueue.Forget(key) } - klog.Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) if keyWithEventType.EventType != EventType_Delete { cluster, err := sched.ClusterLister.Clusters(nameSpace).Get(clusterName) clusterCopy := cluster.DeepCopy() @@ -821,7 +821,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name: %s", clusterName) return false, err } - klog.Infof("create a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("create a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusCreated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -839,7 +839,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name - %s", clusterName) return false, err } - klog.Infof("update a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("update a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusUpdated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -871,7 +871,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { //reset total(available) resource - klog.Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) + klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string for _, siteresource := range resource.CPUMemResources { siteID = region + constants.SiteDelimiter + siteresource.AvailabilityZone diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index ddf4627fc..361961b1d 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -568,7 +568,7 @@ func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavor for reqType, reqRes := range n.RequestedResources { resTypes := strings.Split(reqType, constants.FlavorDelimiter) if !utils.IsContain(resTypes, resType) { - klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) continue } reqRes.VCPU += res.VCPU From 207f22974e833d2f5b0ddf35ac0cd617f66c16c1 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 11 May 2021 19:18:23 +0000 Subject: [PATCH 02/24] updated code - removed else and added continue --- .../pkg/scheduler/eventhandlers.go | 2 -- .../scheduler/sitecacheinfo/sitecache_info.go | 25 ++++++++++--------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 9322054b1..abca84f72 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -421,7 +421,6 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack Name: clusterName, }, } - klog.V(4).Infof("binding: %v", binding) // do api server update here klog.V(4).Infof("Attempting to bind %v to %v", binding.Name, binding.Target.Name) @@ -432,7 +431,6 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack if err := sched.SchedulerCache.ForgetStack(assumedStack); err != nil { klog.Errorf("scheduler cache ForgetStack failed: %v", err) } - return err } return nil diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 361961b1d..2deceb0ae 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -556,7 +556,7 @@ func GetStackKey(stack *types.Stack) (string, error) { func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { var resourceTypes []string for resType, res := range resInfo.CpuAndMem { - //binding a pod for the first + //resource type is null, assign default resource type (e.g. when binding a pod for the first time) if resType == "" { resType = string(DefaultResourceType) resourceTypes = append(resourceTypes, resType) @@ -564,17 +564,17 @@ func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavor if len(n.RequestedResources) == 0 { reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} n.RequestedResources[resType] = &reqRes - } else { - for reqType, reqRes := range n.RequestedResources { - resTypes := strings.Split(reqType, constants.FlavorDelimiter) - if !utils.IsContain(resTypes, resType) { - klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) - continue - } - reqRes.VCPU += res.VCPU - reqRes.Memory += res.Memory - n.RequestedResources[resType] = reqRes + continue + } + for reqType, reqRes := range n.RequestedResources { + resTypes := strings.Split(reqType, constants.FlavorDelimiter) + if !utils.IsContain(resTypes, resType) { + klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + continue } + reqRes.VCPU += res.VCPU + reqRes.Memory += res.Memory + n.RequestedResources[resType] = reqRes } } for volType, used := range resInfo.Storage { @@ -625,7 +625,8 @@ func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors m if totalRes == nil { n.deductFlavor() return - } else if requestRes == nil { + } + if requestRes == nil { requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} } count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt From c75e43847dd6d95497beda118b803667a5c9fc9c Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 09:18:53 +0000 Subject: [PATCH 03/24] implemented withdraw reserved resource for pod --- globalscheduler/controllers/dispatcher/BUILD | 2 + .../dispatcher/dispatcher_process.go | 27 ++- globalscheduler/controllers/distributor/BUILD | 7 +- .../distributor/distributor_test.go | 8 +- .../pkg/scheduler/eventhandlers.go | 169 +++++++++++++++- .../pkg/scheduler/factory/factory.go | 6 +- .../framework/interfaces/framework.go | 29 +++ .../framework/interfaces/interface.go | 4 + .../plugins/defaultbinder/default_binder.go | 92 ++++++++- .../framework/plugins/flavor/flavor.go | 2 +- .../pkg/scheduler/internal/cache/snapshot.go | 13 ++ globalscheduler/pkg/scheduler/scheduler.go | 129 +++++++----- .../scheduler/sitecacheinfo/sitecache_info.go | 184 +++++++++++++++++- 13 files changed, 602 insertions(+), 70 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/BUILD b/globalscheduler/controllers/dispatcher/BUILD index 5cf8b5c91..1b8dcdde5 100644 --- a/globalscheduler/controllers/dispatcher/BUILD +++ b/globalscheduler/controllers/dispatcher/BUILD @@ -31,8 +31,10 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library", "//staging/src/k8s.io/client-go/rest:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library", + "//staging/src/k8s.io/client-go/tools/record:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) diff --git a/globalscheduler/controllers/dispatcher/dispatcher_process.go b/globalscheduler/controllers/dispatcher/dispatcher_process.go index 8f3cdeaab..aec245671 100644 --- a/globalscheduler/controllers/dispatcher/dispatcher_process.go +++ b/globalscheduler/controllers/dispatcher/dispatcher_process.go @@ -19,8 +19,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" "k8s.io/klog" "k8s.io/kubernetes/globalscheduler/cmd/conf" "k8s.io/kubernetes/globalscheduler/controllers/util" @@ -50,6 +53,7 @@ type Process struct { totalDeleteLatency int64 totalPodCreateNum int totalPodDeleteNum int + recorder record.EventRecorder } func NewProcess(config *rest.Config, namespace string, name string, quit chan struct{}) Process { @@ -74,6 +78,11 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st if err != nil { klog.Fatal(err) } + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartLogging(klog.Infof) + eventBroadcaster.StartRecordingToSink( + &typedcorev1.EventSinkImpl{Interface: clientset.CoreV1().Events("")}) + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: name}) return Process{ namespace: namespace, @@ -89,6 +98,7 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st totalDeleteLatency: 0, totalPodCreateNum: 0, totalPodDeleteNum: 0, + recorder: recorder, } } @@ -217,12 +227,13 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { go func() { instanceId, err := openstack.ServerCreate(host, token, &pod.Spec) if err == nil { - klog.V(3).Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) + klog.Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) pod.Status.ClusterInstanceId = instanceId - pod.Status.Phase = v1.ClusterScheduled + //pod.Status.Phase = v1.ClusterScheduled + pod.Status.Phase = v1.PodFailed updatedPod, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod) if err == nil { - klog.V(3).Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) + klog.Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) } else { klog.Warningf("The pod %v failed to update its apiserver database status to scheduled with the error %v", pod.ObjectMeta.Name, err) @@ -234,6 +245,16 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) } } + ///for test + klog.Warningf("The openstack vm for the pod %v failed to create with the error", pod.ObjectMeta.Name) + pod.Status.Phase = v1.PodFailed + if _, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod); err != nil { + klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) + } + klog.Infof("+++ The pod info %s, %#v, %#v", pod.ObjectMeta.Name, pod.Status) + //p.recorder.Event(pod, corev1.EventTypeNormal, SuccessSynched, MessageResourceSynched) + //p.recorder.Event(pod, v1.EventTypeWarning, "Failed", "Failed to create vm") + // util.CheckTime(pod.Name, "dispatcher", "CreatePod-End", 2) }() } diff --git a/globalscheduler/controllers/distributor/BUILD b/globalscheduler/controllers/distributor/BUILD index 0d06146ed..0363e31f8 100644 --- a/globalscheduler/controllers/distributor/BUILD +++ b/globalscheduler/controllers/distributor/BUILD @@ -59,9 +59,14 @@ filegroup( go_test( name = "go_default_test", - srcs = ["distributor_controller_test.go"], + srcs = [ + "distributor_controller_test.go", + "distributor_test.go", + ], embed = [":go_default_library"], deps = [ + "//globalscheduler/pkg/apis/allocation/v1:go_default_library", + "//globalscheduler/pkg/apis/cluster/v1:go_default_library", "//globalscheduler/pkg/apis/distributor/client/clientset/versioned/fake:go_default_library", "//globalscheduler/pkg/apis/distributor/client/informers/externalversions:go_default_library", "//globalscheduler/pkg/apis/distributor/v1:go_default_library", diff --git a/globalscheduler/controllers/distributor/distributor_test.go b/globalscheduler/controllers/distributor/distributor_test.go index 241418516..da63747b9 100644 --- a/globalscheduler/controllers/distributor/distributor_test.go +++ b/globalscheduler/controllers/distributor/distributor_test.go @@ -22,9 +22,9 @@ import ( ) type Testcase struct { - SchedulerGeoLocation clustercrdv1.GeolocationInfo - AllocationGeoLocation allocv1.GeoLocation - ExpectedResult bool + SchedulerGeoLocation clustercrdv1.GeolocationInfo + AllocationGeoLocation allocv1.GeoLocation + ExpectedResult bool } func createSchedulerGeoLocation(city, province, area, country string) clustercrdv1.GeolocationInfo { @@ -106,7 +106,7 @@ func TestIsAllocationGeoLocationMatched(t *testing.T) { res := isAllocationGeoLocationMatched(&testcase.SchedulerGeoLocation, testcase.AllocationGeoLocation) if res != testcase.ExpectedResult { t.Errorf("The isAllocationGeoLocationMatched test result %v is not empty as expected with geoLocations %v, %v", - res, testcase.SchedulerGeoLocation, testcase.AllocationGeoLocation ) + res, testcase.SchedulerGeoLocation, testcase.AllocationGeoLocation) } } } diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index abca84f72..b6930e73d 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -26,11 +26,13 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apitypes "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + //"k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/klog" clusterv1 "k8s.io/kubernetes/globalscheduler/pkg/apis/cluster/v1" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/common/constants" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/utils" "k8s.io/kubernetes/pkg/controller" statusutil "k8s.io/kubernetes/pkg/util/pod" ) @@ -70,6 +72,15 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: + //klog.Infof("###111Pod: %#v", t) + klog.Infof("###111PodStatus: %#v", t.Status) + pod := obj.(*v1.Pod) + klog.Infof("#: %#v", pod.Name) + ppp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) + if err == nil { + //klog.Infof("###pppPod: %#v", ppp) + klog.Infof("###pppPodStatus: %#v", ppp.Status) + } return assignedPod(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -94,6 +105,16 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: + //klog.Infof("###222Pod: %#v", t) + klog.Infof("###222PodStatus: %#v", t.Status) + pod := obj.(*v1.Pod) + klog.Infof("##: %#v", pod.Name) + + pp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) + if err == nil { + //klog.Infof("###222ppPod: %#v", pp) + klog.Infof("###222ppPodStatus: %#v", pp.Status) + } return needToSchedule(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -113,6 +134,42 @@ func AddAllEventHandlers(sched *Scheduler) { }, }, ) + // failed pod queue + sched.PodInformer.Informer().AddEventHandler( + cache.FilteringResourceEventHandler{ + FilterFunc: func(obj interface{}) bool { + switch t := obj.(type) { + case *v1.Pod: + pod := obj.(*v1.Pod) + klog.Infof("###: %#v", pod.Name) + p, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) + klog.Infof("###333Pod: %#v", err) + //klog.Infof("###333Pod: %#v", t) + klog.Infof("###333PodStatus: %#v", t.Status) + //p := sched.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).Get(pod.Name, metav1.GetOptions{}) + if err == nil { + //klog.Infof("###pppPod: %#v", p) + klog.Infof("###pppPodStatus: %#v", p.Status) + } + return failedToSchedule(t) && responsibleForPod(t, sched.SchedulerName) + case cache.DeletedFinalStateUnknown: + if pod, ok := t.Obj.(*v1.Pod); ok { + return failedToSchedule(pod) && responsibleForPod(pod, sched.SchedulerName) + } + utilruntime.HandleError(fmt.Errorf("unable to convert object %T to *v1.Pod in %T", obj, sched)) + return false + default: + utilruntime.HandleError(fmt.Errorf("unable to handle object in %T: %T", sched, obj)) + return false + } + }, + Handler: cache.ResourceEventHandlerFuncs{ + AddFunc: sched.addPodWithdrawResource, + UpdateFunc: sched.updatePodWithdrawResource, + DeleteFunc: sched.deletePodWithdrawResource, + }, + }, + ) sched.ClusterInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: sched.addCluster, UpdateFunc: sched.updateCluster, @@ -122,23 +179,36 @@ func AddAllEventHandlers(sched *Scheduler) { // needToSchedule selects pods that need to be scheduled func needToSchedule(pod *v1.Pod) bool { + klog.Infof("$$$$$$$needToSchedule: %v", pod.Name) + klog.Infof("$$$$$$$needToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned } // assignedPod selects pods that are assigned (scheduled and running). func assignedPod(pod *v1.Pod) bool { + klog.Infof("$$$$$$$assignedPod: %v", pod.Name) + klog.Infof("$$$$$$$assignedPod: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound } // responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler. func responsibleForPod(pod *v1.Pod, schedulerName string) bool { + klog.Infof("$$$$$$$responsibleForPod: %v", pod.Name) + klog.Infof("$$$$$$$responsibleForPod: %v", schedulerName == pod.Status.AssignedScheduler.Name) return schedulerName == pod.Status.AssignedScheduler.Name } +// failedToSchedule selects pods that scheduled but failed to create vm +func failedToSchedule(pod *v1.Pod) bool { + klog.Infof("$$$$$$$failedToSchedule: %v", pod.Name) + klog.Infof("$$$$$$$failedToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed) + return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed +} + // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.V(4).Infof("Add a pod: %v", pod.Name) + klog.Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -433,6 +503,7 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack } return err } + // return nil } @@ -528,3 +599,99 @@ func (sched *Scheduler) verifyClusterInfo(cluster *clusterv1.Cluster) (verified verified = true return verified } + +func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { + verified = false + name := pod.Name + flavors := pod.Spec.VirtualMachine.Flavors + if pod.Name == "" || flavors == nil { + klog.Errorf("pod name:%s, flavors:%v is null", name, flavors) + return verified + } + verified = true + return verified +} + +func (sched *Scheduler) addPodWithdrawResource(object interface{}) { + pod, ok := object.(*v1.Pod) + klog.Infof("Add a pod to withdraw resource: %v", pod.Name) + if !ok { + klog.Errorf("cannot convert to *v1.Pod: %v", object) + return + } + podCopy := pod.DeepCopy() + if sched.verifyPodInfo(podCopy) == false { + klog.Infof(" Pod data is not correct: %v", podCopy) + } + err := sched.withdrawResource(pod.Name) + if err != nil { + klog.Errorf("withdraw resource of pod %s failed", pod.Name) + } +} + +func (sched *Scheduler) updatePodWithdrawResource(oldObj, newObj interface{}) { + oldPod, ok := oldObj.(*v1.Pod) + if !ok { + klog.Errorf("cannot convert oldObj to *v1.Pod: %v", oldObj) + return + } + newPod, ok := newObj.(*v1.Pod) + klog.Infof("Update a pod: %v", newPod) + if !ok { + klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) + return + } + if oldPod.Name != newPod.Name { + klog.Errorf("old pod name and new pod name should be equal: %s, %s", oldPod.Name, newPod.Name) + return + } + err := sched.withdrawResource(newPod.Name) + if err != nil { + klog.Errorf("withdraw resource of pod %s failed", oldPod.Name) + } +} + +func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { + var pod *v1.Pod + switch t := obj.(type) { + case *v1.Pod: + pod = t + klog.Infof("Delete a pod: %v", pod.Name) + case cache.DeletedFinalStateUnknown: + var ok bool + pod, ok = t.Obj.(*v1.Pod) + if !ok { + klog.Errorf("cannot convert to *v1.Pod: %v", t.Obj) + return + } + default: + klog.Errorf("cannot convert to *v1.Pod: %v", t) + return + } + + err := sched.withdrawResource(pod.Name) + if err != nil { + klog.Errorf("withdraw resource of pod %s failed", pod.Name) + } +} + +//withdraw reserved resources to a pod & add it to cash to other pods +func (sched *Scheduler) withdrawResource(podName string) error { + resource := sched.ResourceAllocationMap[podName] + //allResInfo := types.AllResInfo{CpuAndMem: resource.CpuMem, Storage: resource.Storage, eipNum: 0} + if (resource == nil){ + klog.Infof("there is no preserved resource for pod: %s", podName) + return nil + } + allResInfo := resource.Resource + regionName := utils.GetRegionName(resource.SiteID) + regionFlavor, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(regionName) + if err != nil { + klog.Errorf("there is no valid flavor for region: %s", regionName) + return err + } + siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[resource.SiteID] + siteCacheInfo.WithdrawSiteResInfo(allResInfo, regionFlavor) + delete(sched.ResourceAllocationMap, podName) + return nil +} diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index a817abff6..0992ecec3 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,9 +44,11 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { + /*selector := fields.ParseSelectorOrDie( + "status.phase=" + string(v1.PodAssigned) + + ",status.assignedScheduler.name=" + schedulerName)*/ selector := fields.ParseSelectorOrDie( - "status.phase=" + string(v1.PodAssigned) + - ",status.assignedScheduler.name=" + schedulerName) + "status.assignedScheduler.name=" + schedulerName) lw := cache.NewListWatchFromClient(client.CoreV1(), string(v1.ResourcePods), metav1.NamespaceAll, selector) return &podInformer{ informer: cache.NewSharedIndexInformer(lw, &v1.Pod{}, resyncPeriod, diff --git a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go index c782d910f..01203fd49 100644 --- a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go +++ b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go @@ -425,11 +425,40 @@ func (f *framework) RunBindPlugins(ctx context.Context, state *CycleState, stack return status } +//resource func (f *framework) runBindPlugin(ctx context.Context, bp BindPlugin, state *CycleState, stack *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status { return bp.Bind(ctx, state, stack, siteCacheInfo) } +/// RunBindResourcePlugins runs the set of configured bind plugins until one returns a non `Skip` status. +func (f *framework) RunBindResourcePlugins(ctx context.Context, state *CycleState, stack *types.Stack, + siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (status *Status, siteId string, flavor string, resInfo *types.AllResInfo) { + if len(f.bindPlugins) == 0 { + return NewStatus(Skip, ""), "", "", nil + } + for _, bp := range f.bindPlugins { + status, siteId, flavor, resInfo = f.runBindResourcePlugin(ctx, bp, state, stack, siteCacheInfo) + if status != nil && status.Code() == Skip { + continue + } + if !status.IsSuccess() { + msg := fmt.Sprintf("plugin %q failed to bind pod \"%v\": %v", bp.Name(), stack.PodName, status.Message()) + klog.Errorf("%s", msg) + status = NewStatus(Error, msg) + return status, siteId, flavor, resInfo + } + return status, siteId, flavor, resInfo + } + return status, siteId, flavor, resInfo +} + +///added for resource bind & revoke +func (f *framework) runBindResourcePlugin(ctx context.Context, bp BindPlugin, state *CycleState, stack *types.Stack, + siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (*Status, string, string, *types.AllResInfo) { + return bp.BindResource(ctx, state, stack, siteCacheInfo) +} + // RunPostBindPlugins runs the set of configured postbind plugins. func (f *framework) RunPostBindPlugins(ctx context.Context, state *CycleState, stack *types.Stack, siteID string) { for _, pl := range f.postBindPlugins { diff --git a/globalscheduler/pkg/scheduler/framework/interfaces/interface.go b/globalscheduler/pkg/scheduler/framework/interfaces/interface.go index d83daf7bc..b330dacf3 100644 --- a/globalscheduler/pkg/scheduler/framework/interfaces/interface.go +++ b/globalscheduler/pkg/scheduler/framework/interfaces/interface.go @@ -386,6 +386,7 @@ type BindPlugin interface { // it must return Skip in its Status code. If a bind plugin returns an Error, the // pod is rejected and will not be bound. Bind(ctx context.Context, state *CycleState, p *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status + BindResource(ctx context.Context, state *CycleState, p *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (*Status, string, string, *types.AllResInfo) } // StrategyPlugin is an interface that must be implemented by "strategy" plugins. strategy @@ -462,6 +463,9 @@ type Framework interface { RunBindPlugins(ctx context.Context, state *CycleState, stack *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status + RunBindResourcePlugins(ctx context.Context, state *CycleState, stack *types.Stack, + siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) (*Status, string, string, *types.AllResInfo) + //RunStrategyPlugins runs the set of configured strategy plugins. RunStrategyPlugins(ctx context.Context, state *CycleState, allocations *types.Allocation, siteScoreList SiteScoreList) (SiteScoreList, *Status) diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index a8391ea44..fa80d219f 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -23,7 +23,7 @@ import ( "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" "strconv" - "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" + _ "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/framework/interfaces" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" @@ -70,7 +70,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Errorf("Gettng site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } - klog.Errorf("site selector info: %v", siteSelectedInfo) + klog.Infof("site selector info: %v", siteSelectedInfo) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -102,15 +102,97 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes } b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) - regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() + /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() if err != nil { klog.Errorf("Getting region's flavor failed: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } if regionFlavors == nil || err != nil { regionFlavors = map[string]*typed.RegionFlavor{} - } - siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) + }*/ + /*siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) + return nil*/ + + /*klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) + klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) + siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) + klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ return nil } + +// Bind binds pods to site using the k8s client. +// Same function with Bind except return bound resource info +func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.CycleState, stack *types.Stack, + siteCacheInfo *sitecacheinfo.SiteCacheInfo) (*interfaces.Status, string, string, *types.AllResInfo) { + region := siteCacheInfo.GetSite().RegionAzMap.Region + + //eipNum : private data + resInfo := types.AllResInfo{CpuAndMem: map[string]types.CPUAndMemory{}, Storage: map[string]float64{}} + siteID := siteCacheInfo.Site.SiteID + + stack.Selected.SiteID = siteID + stack.Selected.Region = region + stack.Selected.AvailabilityZone = siteCacheInfo.GetSite().RegionAzMap.AvailabilityZone + stack.Selected.ClusterName = siteCacheInfo.Site.ClusterName + stack.Selected.ClusterNamespace = siteCacheInfo.Site.ClusterNamespace + flavorID := "" + //siteSelectedInfo is type of SiteSelectorInfo at cycle_state.go + siteSelectedInfo, err := interfaces.GetSiteSelectorState(state, siteID) + if err != nil { + klog.Errorf("Gettng site selector state failed! err: %s", err) + status := interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) + return status, siteID, flavorID, &resInfo + } + klog.Infof("site selector info: %v", siteSelectedInfo) + klog.Infof("stack.Resources: %#v", stack.Resources) + klog.Infof("siteSelectedInfo.Flavors: %#v", siteSelectedInfo.Flavors) + if len(stack.Resources) != len(siteSelectedInfo.Flavors) { + klog.Errorf("flavor count not equal to server count! err: %s", err) + return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ + "server count!", siteID)), siteID, flavorID, nil + } + for i := 0; i < len(stack.Resources); i++ { + flavorID = siteSelectedInfo.Flavors[i].FlavorID + stack.Resources[i].FlavorIDSelected = flavorID + klog.Infof("GetFlavor - flavorID: %s, region: %s", flavorID, region) + flv, ok := cache.FlavorCache.GetFlavor(flavorID, region) + if !ok { + klog.Warningf("flavor %s not found in region(%s)", flavorID, region) + continue + } + klog.Infof("flavor %s : %v", flavorID, flv) + vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) + if err != nil || vCPUInt <= 0 { + klog.Warningf("flavor %s is invalid in region(%s)", flavorID, region) + continue + } + reqRes, ok := resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] + if !ok { + reqRes = types.CPUAndMemory{VCPU: 0, Memory: 0} + } + reqRes.VCPU += vCPUInt * int64(stack.Resources[i].Count) + reqRes.Memory += flv.Ram * int64(stack.Resources[i].Count) + + //put them all to resInfo + resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes + break + } + klog.Infof("UpdateSiteWithResInfo - siteID: %s, resInfo: %#v", siteID, resInfo) + b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) + /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() + if err != nil { + klog.Errorf("Getting region's flavor failed: %s", err) + return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)), siteID, flavorID, nil + } + if regionFlavors == nil || err != nil { + regionFlavors = map[string]*typed.RegionFlavor{} + } + + klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) + klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) + siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) + klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ + klog.Infof("UpdateSiteWithResInfo - return") + return nil, siteID, flavorID, &resInfo +} diff --git a/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go b/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go index 87bfcb38a..e7f0572a4 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/flavor/flavor.go @@ -310,7 +310,7 @@ func (f *Flavor) Filter(ctx context.Context, cycleState *interfaces.CycleState, var isCommonMatch, _ = isComFlavorMatch(flavorMap, siteCacheInfo) var isSpotMatch, _ = isSpotFlavorMatch(spotFlavorMap, siteCacheInfo) if isCommonMatch && isSpotMatch { - klog.Infof("*** isCommonMatch:%v, isSpotMatch:%v ", isCommonMatch, isSpotMatch) + klog.Infof("isCommonMatch:%v, isSpotMatch:%v ", isCommonMatch, isSpotMatch) return nil } } diff --git a/globalscheduler/pkg/scheduler/internal/cache/snapshot.go b/globalscheduler/pkg/scheduler/internal/cache/snapshot.go index c6e9d6b0c..4015a678c 100644 --- a/globalscheduler/pkg/scheduler/internal/cache/snapshot.go +++ b/globalscheduler/pkg/scheduler/internal/cache/snapshot.go @@ -20,6 +20,7 @@ package cache import ( "fmt" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/common/constants" schedulerlisters "k8s.io/kubernetes/globalscheduler/pkg/scheduler/listers" schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" @@ -135,3 +136,15 @@ func (s *Snapshot) Get(siteID string) (*schedulersitecacheinfo.SiteCacheInfo, er func (s *Snapshot) GetFlavors() (map[string]*typed.RegionFlavor, error) { return s.RegionFlavorMap, nil } + +func (s *Snapshot) GetRegionFlavors(region string) (map[string]*typed.RegionFlavor, error) { + regionFlavorMap := make(map[string]*typed.RegionFlavor) + for flavorId := range s.FlavorMap { + key := region + constants.FlavorDelimiter + flavorId + regionFlavor := s.RegionFlavorMap[key] + if regionFlavor != nil { + regionFlavorMap[key] = regionFlavor + } + } + return regionFlavorMap, nil +} diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 47cab45bd..c81d78df3 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -78,6 +78,13 @@ type ScheduleResult struct { FeasibleSites int // Number of feasible site on one stack scheduled } +type PodSiteResourceAllocation struct { + PodName string + SiteID string + Flavor string + Resource types.AllResInfo +} + // Scheduler watches for new unscheduled pods. It attempts to find // site that they fit on and writes bindings back to the api server. type Scheduler struct { @@ -108,7 +115,8 @@ type Scheduler struct { mu sync.RWMutex //Cluster - KubeClientset clientset.Interface //kubernetes.Interface + //KubeClientset clientset.Interface //kubernetes.Interface + KubeClientset *clientset.Clientset ApiextensionsClientset apiextensionsclientset.Interface ClusterClientset clusterclientset.Interface ClusterInformerFactory externalinformers.SharedInformerFactory @@ -120,6 +128,9 @@ type Scheduler struct { schedulerClientset schedulerclientset.Interface schedulerInformer cache.SharedIndexInformer workerNumber int + + // table to withdraw site resource + ResourceAllocationMap map[string]*PodSiteResourceAllocation } // single scheduler instance @@ -128,7 +139,7 @@ var once sync.Once func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct{}) (*Scheduler, error) { stopEverything := stopCh - klog.V(4).Infof("stopEverything to check : %v", stopEverything) + klog.Infof("stopEverything to check : %v", stopEverything) if stopEverything == nil { stopEverything = wait.NeverStop } @@ -141,8 +152,8 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct ConfigFilePath: gsconfig.ConfigFilePath, deletedClusters: make(map[string]string), workerNumber: 1, - } - + ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), + } err := sched.buildFramework() if err != nil { return nil, fmt.Errorf("buildFramework by %s failed! err: %v", types.SchedulerDefaultProviderName, err) @@ -150,7 +161,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() - klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) + klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, and scheduler informers for scheduler err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) if err != nil { @@ -190,14 +201,14 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { } // start pod informers if sched.PodInformer != nil && sched.InformerFactory != nil { - klog.V(4).Infof("Starting scheduler %s informer", sched.SchedulerName) + klog.Infof("Starting scheduler %s informer", sched.SchedulerName) sched.InformerFactory.Start(stopCh) // Wait for all caches to sync before scheduling. sched.InformerFactory.WaitForCacheSync(stopCh) } // start scheduler informer if sched.schedulerInformer != nil { - klog.V(4).Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) + klog.Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) go sched.schedulerInformer.Run(stopCh) } // Do scheduling @@ -207,13 +218,13 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { // Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling // and blocked until the context is done. func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan struct{}) { - klog.V(4).Infof("Starting scheduler %s", sched.SchedulerName) + klog.Infof("Starting scheduler %s", sched.SchedulerName) defer utilruntime.HandleCrash() //cluster if clusterWorkers > 0 { defer sched.ClusterQueue.ShutDown() - klog.V(4).Infof("Waiting informer caches to sync") + klog.Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.ClusterSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -225,7 +236,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } defer sched.StackQueue.Close() - klog.V(4).Infof("Waiting informer caches to sync") + klog.Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.PodSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -236,7 +247,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } klog.Info("Started cluster & pod workers") <-stopCh - klog.V(4).Infof("Shutting down scheduler %s", sched.SchedulerName) + klog.Infof("Shutting down scheduler %s", sched.SchedulerName) } // Cache returns the cache in scheduler for test to check the data in scheduler. @@ -261,15 +272,15 @@ func (sched *Scheduler) scheduleOne() bool { if shutdown != nil { return false } - klog.V(4).Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) + klog.Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) allocation, err := sched.generateAllocationFromStack(stack) - klog.V(4).Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) + klog.Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) if err != nil { return false } start := stack.CreateTime end := time.Now().UnixNano() - klog.V(4).Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) + klog.Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) // 2.do scheduling process start = end @@ -281,21 +292,21 @@ func (sched *Scheduler) scheduleOne() bool { return true } end = time.Now().UnixNano() - klog.V(4).Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) - klog.V(4).Infof("Schedule result: %v", result) //result is assumed stacks - klog.V(4).Infof("3. Assumed Stacks: %v", result) + klog.Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.Infof("Schedule result: %v", result) //result is assumed stacks + klog.Infof("3. Assumed Stacks: %v", result) // 3.bind scheduler result to pod start = end - klog.V(4).Infof("Try to bind to site, stacks:%v", result.Stacks) + klog.Infof("Try to bind to site, stacks:%v", result.Stacks) sched.bindStacks(result.Stacks) end = time.Now().UnixNano() - klog.V(4).Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) // log the elapsed time for the entire schedule if stack.CreateTime != 0 { spendTime := time.Now().UnixNano() - stack.CreateTime - klog.V(4).Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) + klog.Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) } return true } @@ -335,13 +346,11 @@ func (sched *Scheduler) stackPassesFiltersOnSite( info *schedulersitecacheinfo.SiteCacheInfo, ) (bool, *interfaces.Status, error) { var status *interfaces.Status - statusMap := sched.SchedFrame.RunFilterPlugins(ctx, state, stack, info) status = statusMap.Merge() if !status.IsSuccess() && !status.IsUnschedulable() { return false, status, status.AsError() } - return status.IsSuccess(), status, nil } @@ -368,7 +377,7 @@ func (sched *Scheduler) findSitesThatPassFilters(ctx context.Context, state *int err = fmt.Errorf("SiteCacheInfoMap of %v is null", siteID) return nil, err } - klog.V(4).Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) + klog.Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) // Create filtered list with enough space to avoid growing it // and allow assigning. filtered := make([]*types.Site, len(allSiteCacheInfos)) @@ -454,7 +463,7 @@ func (sched *Scheduler) prioritizeSites( // sort by score. sort.Sort(sort.Reverse(result)) - klog.V(4).Infof("score sites: %v", result) + klog.Infof("score sites: %v", result) return result, nil } @@ -488,9 +497,27 @@ func (sched *Scheduler) selectHost(siteScoreList interfaces.SiteScoreList) (stri // We expect this to run asynchronously, so we handle binding metrics internally. func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSiteID string, state *interfaces.CycleState) (err error) { - bindStatus := sched.SchedFrame.RunBindPlugins(ctx, state, stack, + bindStatus, siteId, flavorId, resInfo := sched.SchedFrame.RunBindResourcePlugins(ctx, state, stack, sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID]) if bindStatus.IsSuccess() { + podResporceAlloc := PodSiteResourceAllocation{stack.PodName, siteId, flavorId, *resInfo} + klog.Infof("bind - podResporceAlloc: %#v", podResporceAlloc) + klog.Infof("bind111 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) + sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc + /*if (sched.ResourceAllocationMap[stack.PodName] == nil) { + sched.ResourceAllocationMap[stack.PodName] = append(sched.ResourceAllocationMap[stack.PodName], &podResporceAlloc) + } else { + sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc + }*/ + klog.Infof("bind222 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) + region := utils.GetRegionName(siteId) + regionFlavors, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(region) + if err != nil { + klog.Errorf("There is no valid flavors in region: %s", region) + return err + } + siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID] + siteCacheInfo.DeductSiteResInfo(*resInfo, regionFlavors) return nil } if bindStatus.Code() == interfaces.Error { @@ -502,14 +529,14 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite // Schedule Run begins watching and scheduling. It waits for cache to be synced , // then starts scheduling and blocked until the context is done. func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocation) (result ScheduleResult, err error) { - klog.V(4).Infof("Attempting to schedule allocation: %v", allocation.ID) + klog.Infof("Attempting to schedule allocation: %v", allocation.ID) state := interfaces.NewCycleState() schedulingCycleCtx, cancel := context.WithCancel(ctx) defer cancel() // 1. Snapshot site resource cache start := time.Now() - klog.V(4).Infof("[START] snapshot site...") + klog.Infof("[START] snapshot site...") ///UpdateFlavorMap updates FlavorCache.RegionFlavorMap, FlavorCache.FlavorMap) ///FlavorMap is updated when scheduler starts, RegionFlavorMap is updated @@ -519,16 +546,16 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 2. Run "prefilter" plugins. start = time.Now() - klog.V(4).Infof("[START] Running prefilter plugins...") + klog.Infof("[START] Running prefilter plugins...") preFilterStatus := sched.SchedFrame.RunPreFilterPlugins(schedulingCycleCtx, state, &allocation.Stack) if !preFilterStatus.IsSuccess() { return result, preFilterStatus.AsError() } - klog.V(4).Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) // 3. Run "filter" plugins. start = time.Now() - klog.V(4).Infof("[START] Running filter plugins...") + klog.Infof("[START] Running filter plugins...") filteredSitesStatuses := make(interfaces.SiteToStatusMap) allocation.Stack.Selector = allocation.Selector filteredSites, err := sched.findSitesThatPassFilters(ctx, state, &allocation.Stack, filteredSitesStatuses) @@ -536,9 +563,9 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("findSitesThatPassFilters failed! err: %s", err) return result, err } - klog.V(4).Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) - klog.V(4).Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) + klog.Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) if len(filteredSites) <= 0 { err := fmt.Errorf("filter none site. resultStatus: %s", filteredSitesStatuses.ToString()) klog.Error(err) @@ -547,33 +574,33 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 4. Run "prescore" plugins. start = time.Now() - klog.V(4).Infof("[START] Running preScore plugins...") + klog.Infof("[START] Running preScore plugins...") prescoreStatus := sched.SchedFrame.RunPreScorePlugins(ctx, state, &allocation.Stack, filteredSites) if !prescoreStatus.IsSuccess() { return result, prescoreStatus.AsError() } - klog.V(4).Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) // 5. Run "prioritizeSites" plugins. start = time.Now() - klog.V(4).Infof("[START] Running prioritizeSites plugins...") + klog.Infof("[START] Running prioritizeSites plugins...") priorityList, err := sched.prioritizeSites(ctx, state, &allocation.Stack, filteredSites) if err != nil { klog.Errorf("prioritizeSites failed! err: %s", err) return result, err } - klog.V(4).Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) + klog.Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) // 6. Run "strategy" plugins. start = time.Now() - klog.V(4).Infof("[START] Running strategy plugins...") + klog.Infof("[START] Running strategy plugins...") siteCount, strategyStatus := sched.SchedFrame.RunStrategyPlugins(ctx, state, allocation, priorityList) if !strategyStatus.IsSuccess() { klog.Errorf("RunStrategyPlugins failed! err: %s", err) return result, err } - klog.V(4).Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) - klog.V(4).Infof("selected Hosts : %#v", siteCount) + klog.Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) + klog.Infof("selected Hosts : %#v", siteCount) // 7. reserve resource start = time.Now() @@ -605,7 +632,7 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("not find suit host") return result, fmt.Errorf("not find suit host") } - klog.V(4).Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) + klog.Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) return } @@ -686,7 +713,7 @@ func (sched *Scheduler) initPodClusterSchedulerInformers(gsconfig *types.GSSched sched.schedulerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if sched, ok := obj.(*schedulerv1.Scheduler); ok { - klog.V(4).Infof("The scheduler %s process is going to be killed...", sched.Name) + klog.Infof("The scheduler %s process is going to be killed...", sched.Name) os.Exit(0) } else { klog.Fatalf("The deleted object %v failed to convert to scheduler", obj) @@ -730,7 +757,7 @@ func (sched *Scheduler) processNextClusterItem() bool { if shutdown { return false } - klog.V(4).Infof("Process an item in work queue %v ", workItem) + klog.Infof("Process an item in work queue %v ", workItem) eventKey := workItem.(KeyWithEventType) key := eventKey.Key defer sched.ClusterQueue.Done(key) @@ -739,7 +766,7 @@ func (sched *Scheduler) processNextClusterItem() bool { utilruntime.HandleError(fmt.Errorf("Handle %v of key %v failed with %v", "serivce", key, err)) } sched.ClusterQueue.Forget(key) - klog.V(4).Infof("Successfully processed & synced %s", key) + klog.Infof("Successfully processed & synced %s", key) return true } @@ -749,24 +776,24 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er return err } key := keyWithEventType.Key - klog.V(4).Infof("sync cache for key %v", key) + klog.Infof("sync cache for key %v", key) startTime := time.Now() defer func() { - klog.V(4).Infof("Finished syncing %q (%v)", key, time.Since(startTime)) + klog.Infof("Finished syncing %q (%v)", key, time.Since(startTime)) }() nameSpace, clusterName, err := cache.SplitMetaNamespaceKey(key) //This performs controller logic - create site's static info - klog.V(4).Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) result, err := sched.updateStaticSiteResourceInfo(key, keyWithEventType.EventType, nameSpace, clusterName) if !result { klog.Errorf("Failed a cluster processing - event: %v, key: %v, error: %v", keyWithEventType, key, err) sched.ClusterQueue.AddRateLimited(keyWithEventType) } else { - klog.V(4).Infof(" Processed a cluster: %v", key) + klog.Infof(" Processed a cluster: %v", key) sched.ClusterQueue.Forget(key) } - klog.V(4).Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) if keyWithEventType.EventType != EventType_Delete { cluster, err := sched.ClusterLister.Clusters(nameSpace).Get(clusterName) clusterCopy := cluster.DeepCopy() @@ -821,7 +848,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name: %s", clusterName) return false, err } - klog.V(4).Infof("create a site static info, cluster profile: %v", clusterCopy) + klog.Infof("create a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusCreated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -839,7 +866,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name - %s", clusterName) return false, err } - klog.V(4).Infof("update a site static info, cluster profile: %v", clusterCopy) + klog.Infof("update a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusUpdated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -871,7 +898,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { //reset total(available) resource - klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) + klog.Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string for _, siteresource := range resource.CPUMemResources { siteID = region + constants.SiteDelimiter + siteresource.AvailabilityZone diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 2deceb0ae..2b935d5f7 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -552,8 +552,54 @@ func GetStackKey(stack *types.Stack) (string, error) { return uid, nil } +//deduct or add +func (n *SiteCacheInfo) UpdateSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor, deduct bool) error { + var resourceTypes []string + klog.Infof("444 UpdateSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) + for resType, res := range resInfo.CpuAndMem { + //resource type is null, assign default resource type (e.g. when binding a pod for the first time) + if resType == "" { + resType = string(DefaultResourceType) + resourceTypes = append(resourceTypes, resType) + } + if len(n.RequestedResources) == 0 { + reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} + n.RequestedResources[resType] = &reqRes + continue + } + for reqType, reqRes := range n.RequestedResources { + klog.Infof("555 UpdateSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) + resTypes := strings.Split(reqType, constants.FlavorDelimiter) + if !utils.IsContain(resTypes, resType) { + klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + continue + } + reqRes.VCPU += res.VCPU + reqRes.Memory += res.Memory + klog.Infof("666 UpdateSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + n.RequestedResources[resType] = reqRes + klog.Infof("777 UpdateSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + + } + } + for volType, used := range resInfo.Storage { + klog.Infof("888 UpdateSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + reqVol, ok := n.RequestedStorage[volType] + if !ok { + reqVol = 0 + } + reqVol += used + n.RequestedStorage[volType] = reqVol + klog.Infof("888 UpdateSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + + } + n.updateSiteFlavor(resourceTypes, regionFlavorMap, deduct) + n.generation = nextGeneration() + return nil +} + // DeductSiteResInfo deduct site's resource info -func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { +/*func (n *SiteCacheInfo) DeductSiteResInfo2(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { var resourceTypes []string for resType, res := range resInfo.CpuAndMem { //resource type is null, assign default resource type (e.g. when binding a pod for the first time) @@ -588,6 +634,72 @@ func (n *SiteCacheInfo) DeductSiteResInfo(resInfo types.AllResInfo, regionFlavor n.updateSiteFlavor(resourceTypes, regionFlavorMap) n.generation = nextGeneration() return nil +}*/ + +func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor, deduct bool) { + n.mu.Lock() + defer n.mu.Unlock() + + klog.Infof("999 updateSiteFlavor Before - resourceTypes: %#v, regionFlavors:%#v", resourceTypes, regionFlavors) + for k, v := range regionFlavors { + klog.Infof("updateSiteFlavor Before - key: %#v, regionFlavor:%#v", k, v) + } + + if n.AllocatableFlavor == nil { + n.AllocatableFlavor = map[string]int64{} + } + supportFlavors := n.AllocatableFlavor + regionName := utils.GetRegionName(n.Site.SiteID) + for flavorid := range supportFlavors { + regionFalvorKey := regionName + constants.FlavorDelimiter + flavorid + flv := regionFlavors[regionFalvorKey] + klog.Infof("000 updateSiteFlavor - flv: %#v", flv) + if flv == nil { + n.deductFlavor() + klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + return + } + vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) + if err != nil { + n.deductFlavor() + klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + return + } + for _, resourceType := range resourceTypes { + klog.Infof("121 updateSiteFlavor Before - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) + totalRes := n.TotalResources[resourceType] + requestRes := n.RequestedResources[resourceType] + if totalRes == nil { + klog.Infof("updateSiteFlavor - totalRes is nil") + n.deductFlavor() + klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + return + } + if requestRes == nil { + klog.Infof("updateSiteFlavor - requestRes is nil") + requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} + } + if(deduct == true) { + count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt + memCount := (totalRes.Memory - requestRes.Memory) / flv.Ram + } else { + count := (totalRes.VCPU + requestRes.VCPU) / vCPUInt + memCount := (totalRes.Memory + requestRes.Memory) / flv.Ram + } + if count > memCount { + count = memCount + } + if _, ok := n.AllocatableFlavor[flavorid]; !ok { + n.AllocatableFlavor[flavorid] = 0 + } + klog.Infof("121 n.AllocatableFlavor Before - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + if n.AllocatableFlavor[flavorid] > count { + n.AllocatableFlavor[flavorid] = count + } + klog.Infof("121 n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) + klog.Infof("121 updateSiteFlavor After - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) + } + } } /* @@ -598,7 +710,7 @@ updateFlavor(): /home/ubuntu/go/src/k8s.io/arktos/conf/flavors.json global scheduler flavor config file: /home/ubuntu/go/src/k8s.io/arktos/conf/flavor_config.yaml */ -func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor) { +func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor) { n.mu.Lock() defer n.mu.Unlock() @@ -661,3 +773,71 @@ func (n *SiteCacheInfo) deductFlavor() { } } } + +func (n *SiteCacheInfo) updateFlavor(deduct bool) { + n := -1 + if deduct == true { + n=1 + } + if n.AllocatableFlavor == nil { + n.AllocatableFlavor = map[string]int64{} + } + for key, value := range n.AllocatableFlavor { + n.AllocatableFlavor[key] = value - 1 + if n.RequestedFlavor == nil { + n.RequestedFlavor = make(map[string]int64) + } + requested, ok := n.RequestedFlavor[key] + if !ok { + n.RequestedFlavor[key] = 1 + } else { + n.RequestedFlavor[key] = requested + 1 + } + } +} + +//revoke bound site's resource to pod because pod creation failed +func (n *SiteCacheInfo) WithdrawSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { + var resourceTypes []string + klog.Infof("444 WithdrawSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) + for resType, res := range resInfo.CpuAndMem { + //resource type is null, assign default resource type (e.g. when binding a pod for the first time) + if resType == "" { + resType = string(DefaultResourceType) + resourceTypes = append(resourceTypes, resType) + } + if len(n.RequestedResources) == 0 { + reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} + n.RequestedResources[resType] = &reqRes + continue + } + for reqType, reqRes := range n.RequestedResources { + klog.Infof("555 WithdrawSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) + resTypes := strings.Split(reqType, constants.FlavorDelimiter) + if !utils.IsContain(resTypes, resType) { + klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + continue + } + reqRes.VCPU += res.VCPU + reqRes.Memory += res.Memory + klog.Infof("666 WithdrawSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + n.RequestedResources[resType] = reqRes + klog.Infof("777 WithdrawSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) + + } + } + for volType, used := range resInfo.Storage { + klog.Infof("888 WithdrawSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + reqVol, ok := n.RequestedStorage[volType] + if !ok { + reqVol = 0 + } + reqVol += used + n.RequestedStorage[volType] = reqVol + klog.Infof("888 WithdrawSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) + + } + n.updateSiteFlavor(resourceTypes, regionFlavorMap) + n.generation = nextGeneration() + return nil +} From 0c14cbe1e8854ddbfe96044b9c838a69cc1bc586 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 19:50:27 +0000 Subject: [PATCH 04/24] implemented resource revokation when vm creation failed --- .../dispatcher/dispatcher_process.go | 26 +-- .../pkg/scheduler/eventhandlers.go | 57 +---- .../plugins/defaultbinder/default_binder.go | 31 +-- globalscheduler/pkg/scheduler/scheduler.go | 106 ++++----- .../scheduler/sitecacheinfo/sitecache_info.go | 217 +++--------------- 5 files changed, 93 insertions(+), 344 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/dispatcher_process.go b/globalscheduler/controllers/dispatcher/dispatcher_process.go index aec245671..8ba07f6c3 100644 --- a/globalscheduler/controllers/dispatcher/dispatcher_process.go +++ b/globalscheduler/controllers/dispatcher/dispatcher_process.go @@ -19,11 +19,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/client-go/kubernetes" - "k8s.io/client-go/kubernetes/scheme" - typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/record" "k8s.io/klog" "k8s.io/kubernetes/globalscheduler/cmd/conf" "k8s.io/kubernetes/globalscheduler/controllers/util" @@ -53,7 +50,6 @@ type Process struct { totalDeleteLatency int64 totalPodCreateNum int totalPodDeleteNum int - recorder record.EventRecorder } func NewProcess(config *rest.Config, namespace string, name string, quit chan struct{}) Process { @@ -78,12 +74,6 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st if err != nil { klog.Fatal(err) } - eventBroadcaster := record.NewBroadcaster() - eventBroadcaster.StartLogging(klog.Infof) - eventBroadcaster.StartRecordingToSink( - &typedcorev1.EventSinkImpl{Interface: clientset.CoreV1().Events("")}) - recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: name}) - return Process{ namespace: namespace, name: name, @@ -98,7 +88,6 @@ func NewProcess(config *rest.Config, namespace string, name string, quit chan st totalDeleteLatency: 0, totalPodCreateNum: 0, totalPodDeleteNum: 0, - recorder: recorder, } } @@ -107,7 +96,6 @@ func (p *Process) Run(quit chan struct{}) { dispatcherSelector := fields.ParseSelectorOrDie("metadata.name=" + p.name) dispatcherLW := cache.NewListWatchFromClient(p.dispatcherClientset.GlobalschedulerV1(), "dispatchers", p.namespace, dispatcherSelector) - dispatcherInformer := cache.NewSharedIndexInformer(dispatcherLW, &dispatcherv1.Dispatcher{}, 0, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) dispatcherInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -229,8 +217,7 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { if err == nil { klog.Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) pod.Status.ClusterInstanceId = instanceId - //pod.Status.Phase = v1.ClusterScheduled - pod.Status.Phase = v1.PodFailed + pod.Status.Phase = v1.ClusterScheduled updatedPod, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod) if err == nil { klog.Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) @@ -245,17 +232,6 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) } } - ///for test - klog.Warningf("The openstack vm for the pod %v failed to create with the error", pod.ObjectMeta.Name) - pod.Status.Phase = v1.PodFailed - if _, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod); err != nil { - klog.Warningf("The pod %v failed to update its apiserver dtatbase status to failed with the error %v", pod.ObjectMeta.Name, err) - } - klog.Infof("+++ The pod info %s, %#v, %#v", pod.ObjectMeta.Name, pod.Status) - //p.recorder.Event(pod, corev1.EventTypeNormal, SuccessSynched, MessageResourceSynched) - //p.recorder.Event(pod, v1.EventTypeWarning, "Failed", "Failed to create vm") - - // util.CheckTime(pod.Name, "dispatcher", "CreatePod-End", 2) }() } } diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index b6930e73d..9b16fe8b1 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -72,15 +72,6 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: - //klog.Infof("###111Pod: %#v", t) - klog.Infof("###111PodStatus: %#v", t.Status) - pod := obj.(*v1.Pod) - klog.Infof("#: %#v", pod.Name) - ppp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) - if err == nil { - //klog.Infof("###pppPod: %#v", ppp) - klog.Infof("###pppPodStatus: %#v", ppp.Status) - } return assignedPod(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -105,16 +96,6 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: - //klog.Infof("###222Pod: %#v", t) - klog.Infof("###222PodStatus: %#v", t.Status) - pod := obj.(*v1.Pod) - klog.Infof("##: %#v", pod.Name) - - pp, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) - if err == nil { - //klog.Infof("###222ppPod: %#v", pp) - klog.Infof("###222ppPodStatus: %#v", pp.Status) - } return needToSchedule(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -140,17 +121,6 @@ func AddAllEventHandlers(sched *Scheduler) { FilterFunc: func(obj interface{}) bool { switch t := obj.(type) { case *v1.Pod: - pod := obj.(*v1.Pod) - klog.Infof("###: %#v", pod.Name) - p, err := sched.Client.CoreV1().Pods("default").Get(pod.Name, metav1.GetOptions{}) - klog.Infof("###333Pod: %#v", err) - //klog.Infof("###333Pod: %#v", t) - klog.Infof("###333PodStatus: %#v", t.Status) - //p := sched.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).Get(pod.Name, metav1.GetOptions{}) - if err == nil { - //klog.Infof("###pppPod: %#v", p) - klog.Infof("###pppPodStatus: %#v", p.Status) - } return failedToSchedule(t) && responsibleForPod(t, sched.SchedulerName) case cache.DeletedFinalStateUnknown: if pod, ok := t.Obj.(*v1.Pod); ok { @@ -179,36 +149,28 @@ func AddAllEventHandlers(sched *Scheduler) { // needToSchedule selects pods that need to be scheduled func needToSchedule(pod *v1.Pod) bool { - klog.Infof("$$$$$$$needToSchedule: %v", pod.Name) - klog.Infof("$$$$$$$needToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodAssigned } // assignedPod selects pods that are assigned (scheduled and running). func assignedPod(pod *v1.Pod) bool { - klog.Infof("$$$$$$$assignedPod: %v", pod.Name) - klog.Infof("$$$$$$$assignedPod: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound) return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodBound } // responsibleForPod returns true if the pod has asked to be scheduled by the given scheduler. func responsibleForPod(pod *v1.Pod, schedulerName string) bool { - klog.Infof("$$$$$$$responsibleForPod: %v", pod.Name) - klog.Infof("$$$$$$$responsibleForPod: %v", schedulerName == pod.Status.AssignedScheduler.Name) return schedulerName == pod.Status.AssignedScheduler.Name } // failedToSchedule selects pods that scheduled but failed to create vm func failedToSchedule(pod *v1.Pod) bool { - klog.Infof("$$$$$$$failedToSchedule: %v", pod.Name) - klog.Infof("$$$$$$$failedToSchedule: %v", pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed) - return pod.Spec.VirtualMachine != nil && pod.Status.Phase == v1.PodFailed + return pod.Status.Phase == v1.PodFailed } // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.Infof("Add a pod: %v", pod.Name) + klog.V(4).Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -614,14 +576,14 @@ func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { func (sched *Scheduler) addPodWithdrawResource(object interface{}) { pod, ok := object.(*v1.Pod) - klog.Infof("Add a pod to withdraw resource: %v", pod.Name) + klog.V(4).Infof("Add a pod to withdraw resource: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", object) return } podCopy := pod.DeepCopy() if sched.verifyPodInfo(podCopy) == false { - klog.Infof(" Pod data is not correct: %v", podCopy) + klog.V(4).Infof(" Pod data is not correct: %v", podCopy) } err := sched.withdrawResource(pod.Name) if err != nil { @@ -636,7 +598,7 @@ func (sched *Scheduler) updatePodWithdrawResource(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.Infof("Update a pod: %v", newPod) + klog.V(4).Infof("Update a pod: %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return @@ -656,7 +618,7 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { switch t := obj.(type) { case *v1.Pod: pod = t - klog.Infof("Delete a pod: %v", pod.Name) + klog.V(4).Infof("Delete a pod: %v", pod.Name) case cache.DeletedFinalStateUnknown: var ok bool pod, ok = t.Obj.(*v1.Pod) @@ -678,9 +640,8 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { //withdraw reserved resources to a pod & add it to cash to other pods func (sched *Scheduler) withdrawResource(podName string) error { resource := sched.ResourceAllocationMap[podName] - //allResInfo := types.AllResInfo{CpuAndMem: resource.CpuMem, Storage: resource.Storage, eipNum: 0} - if (resource == nil){ - klog.Infof("there is no preserved resource for pod: %s", podName) + if resource == nil { + klog.V(4).Infof("there is no preserved resource for pod: %s", podName) return nil } allResInfo := resource.Resource @@ -691,7 +652,7 @@ func (sched *Scheduler) withdrawResource(podName string) error { return err } siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[resource.SiteID] - siteCacheInfo.WithdrawSiteResInfo(allResInfo, regionFlavor) + siteCacheInfo.UpdateSiteResInfo(allResInfo, regionFlavor, false) delete(sched.ResourceAllocationMap, podName) return nil } diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index fa80d219f..af9493ccd 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -70,7 +70,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.Errorf("Gettng site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } - klog.Infof("site selector info: %v", siteSelectedInfo) + klog.V(4).Infof("site selector info: %v", siteSelectedInfo) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -114,10 +114,10 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) return nil*/ - /*klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) - klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) + /*klog.V(4).Infof("111 Resource state before deduction: %#v", siteCacheInfo) + klog.V(4).Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ + klog.V(4).Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ return nil } @@ -144,9 +144,6 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle status := interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) return status, siteID, flavorID, &resInfo } - klog.Infof("site selector info: %v", siteSelectedInfo) - klog.Infof("stack.Resources: %#v", stack.Resources) - klog.Infof("siteSelectedInfo.Flavors: %#v", siteSelectedInfo.Flavors) if len(stack.Resources) != len(siteSelectedInfo.Flavors) { klog.Errorf("flavor count not equal to server count! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ @@ -155,13 +152,13 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle for i := 0; i < len(stack.Resources); i++ { flavorID = siteSelectedInfo.Flavors[i].FlavorID stack.Resources[i].FlavorIDSelected = flavorID - klog.Infof("GetFlavor - flavorID: %s, region: %s", flavorID, region) + klog.V(4).Infof("GetFlavor - flavorID: %s, region: %s", flavorID, region) flv, ok := cache.FlavorCache.GetFlavor(flavorID, region) if !ok { klog.Warningf("flavor %s not found in region(%s)", flavorID, region) continue } - klog.Infof("flavor %s : %v", flavorID, flv) + klog.V(4).Infof("flavor %s : %v", flavorID, flv) vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) if err != nil || vCPUInt <= 0 { klog.Warningf("flavor %s is invalid in region(%s)", flavorID, region) @@ -178,21 +175,7 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes break } - klog.Infof("UpdateSiteWithResInfo - siteID: %s, resInfo: %#v", siteID, resInfo) + klog.V(4).Infof("UpdateSiteWithResInfo - siteID: %s, resInfo: %#v", siteID, resInfo) b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) - /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() - if err != nil { - klog.Errorf("Getting region's flavor failed: %s", err) - return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)), siteID, flavorID, nil - } - if regionFlavors == nil || err != nil { - regionFlavors = map[string]*typed.RegionFlavor{} - } - - klog.Infof("111 Resource state before deduction: %#v", siteCacheInfo) - klog.Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) - siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ - klog.Infof("UpdateSiteWithResInfo - return") return nil, siteID, flavorID, &resInfo } diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index c81d78df3..a5732371d 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -116,7 +116,7 @@ type Scheduler struct { //Cluster //KubeClientset clientset.Interface //kubernetes.Interface - KubeClientset *clientset.Clientset + KubeClientset *clientset.Clientset ApiextensionsClientset apiextensionsclientset.Interface ClusterClientset clusterclientset.Interface ClusterInformerFactory externalinformers.SharedInformerFactory @@ -139,7 +139,7 @@ var once sync.Once func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct{}) (*Scheduler, error) { stopEverything := stopCh - klog.Infof("stopEverything to check : %v", stopEverything) + klog.V(4).Infof("stopEverything to check : %v", stopEverything) if stopEverything == nil { stopEverything = wait.NeverStop } @@ -152,8 +152,8 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct ConfigFilePath: gsconfig.ConfigFilePath, deletedClusters: make(map[string]string), workerNumber: 1, - ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), - } + ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), + } err := sched.buildFramework() if err != nil { return nil, fmt.Errorf("buildFramework by %s failed! err: %v", types.SchedulerDefaultProviderName, err) @@ -161,7 +161,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() - klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) + klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, and scheduler informers for scheduler err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) if err != nil { @@ -201,14 +201,14 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { } // start pod informers if sched.PodInformer != nil && sched.InformerFactory != nil { - klog.Infof("Starting scheduler %s informer", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s informer", sched.SchedulerName) sched.InformerFactory.Start(stopCh) // Wait for all caches to sync before scheduling. sched.InformerFactory.WaitForCacheSync(stopCh) } // start scheduler informer if sched.schedulerInformer != nil { - klog.Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler informer for scheduler %s", sched.SchedulerName) go sched.schedulerInformer.Run(stopCh) } // Do scheduling @@ -218,13 +218,13 @@ func (sched *Scheduler) StartInformersAndRun(stopCh <-chan struct{}) { // Run begins watching and scheduling. It waits for cache to be synced, then starts scheduling // and blocked until the context is done. func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan struct{}) { - klog.Infof("Starting scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Starting scheduler %s", sched.SchedulerName) defer utilruntime.HandleCrash() //cluster if clusterWorkers > 0 { defer sched.ClusterQueue.ShutDown() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.ClusterSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -236,7 +236,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } defer sched.StackQueue.Close() - klog.Infof("Waiting informer caches to sync") + klog.V(4).Infof("Waiting informer caches to sync") if ok := cache.WaitForCacheSync(sched.StopEverything, sched.PodSynced); !ok { klog.Errorf("failed to wait for caches to sync") } @@ -247,7 +247,7 @@ func (sched *Scheduler) Run(clusterWorkers int, podWorkers int, stopCh <-chan st } klog.Info("Started cluster & pod workers") <-stopCh - klog.Infof("Shutting down scheduler %s", sched.SchedulerName) + klog.V(4).Infof("Shutting down scheduler %s", sched.SchedulerName) } // Cache returns the cache in scheduler for test to check the data in scheduler. @@ -272,15 +272,15 @@ func (sched *Scheduler) scheduleOne() bool { if shutdown != nil { return false } - klog.Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) + klog.V(4).Infof("1. Stack: %v, stack selector: %v", stack, stack.Selector) allocation, err := sched.generateAllocationFromStack(stack) - klog.Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) + klog.V(4).Infof("2. Allocation: %v, allocation selector: %v", allocation, allocation.Selector) if err != nil { return false } start := stack.CreateTime end := time.Now().UnixNano() - klog.Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done pop queue, time consumption: %v ms ===", (end-start)/int64(time.Millisecond)) // 2.do scheduling process start = end @@ -292,21 +292,21 @@ func (sched *Scheduler) scheduleOne() bool { return true } end = time.Now().UnixNano() - klog.Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) - klog.Infof("Schedule result: %v", result) //result is assumed stacks - klog.Infof("3. Assumed Stacks: %v", result) + klog.V(4).Infof("=== done Scheduling pipline, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("Schedule result: %v", result) //result is assumed stacks + klog.V(4).Infof("3. Assumed Stacks: %v", result) // 3.bind scheduler result to pod start = end - klog.Infof("Try to bind to site, stacks:%v", result.Stacks) + klog.V(4).Infof("Try to bind to site, stacks:%v", result.Stacks) sched.bindStacks(result.Stacks) end = time.Now().UnixNano() - klog.Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) + klog.V(4).Infof("=== done bind pod to cluster, time consumption: %vms ===", (end-start)/int64(time.Millisecond)) // log the elapsed time for the entire schedule if stack.CreateTime != 0 { spendTime := time.Now().UnixNano() - stack.CreateTime - klog.Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) + klog.V(4).Infof("@@@ Finished Schedule, time consumption: %vms @@@", spendTime/int64(time.Millisecond)) } return true } @@ -377,7 +377,7 @@ func (sched *Scheduler) findSitesThatPassFilters(ctx context.Context, state *int err = fmt.Errorf("SiteCacheInfoMap of %v is null", siteID) return nil, err } - klog.Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) + klog.V(4).Infof("allSiteCacheInfos: %v", len(allSiteCacheInfos)) // Create filtered list with enough space to avoid growing it // and allow assigning. filtered := make([]*types.Site, len(allSiteCacheInfos)) @@ -463,7 +463,7 @@ func (sched *Scheduler) prioritizeSites( // sort by score. sort.Sort(sort.Reverse(result)) - klog.Infof("score sites: %v", result) + klog.V(4).Infof("score sites: %v", result) return result, nil } @@ -501,15 +501,7 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID]) if bindStatus.IsSuccess() { podResporceAlloc := PodSiteResourceAllocation{stack.PodName, siteId, flavorId, *resInfo} - klog.Infof("bind - podResporceAlloc: %#v", podResporceAlloc) - klog.Infof("bind111 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc - /*if (sched.ResourceAllocationMap[stack.PodName] == nil) { - sched.ResourceAllocationMap[stack.PodName] = append(sched.ResourceAllocationMap[stack.PodName], &podResporceAlloc) - } else { - sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc - }*/ - klog.Infof("bind222 - sched.ResourceAllocationMap[stack.PodName]: %#v, stack.PodName:%s", sched.ResourceAllocationMap, stack.PodName) region := utils.GetRegionName(siteId) regionFlavors, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(region) if err != nil { @@ -517,7 +509,7 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite return err } siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID] - siteCacheInfo.DeductSiteResInfo(*resInfo, regionFlavors) + siteCacheInfo.UpdateSiteResInfo(*resInfo, regionFlavors, true) return nil } if bindStatus.Code() == interfaces.Error { @@ -529,14 +521,14 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite // Schedule Run begins watching and scheduling. It waits for cache to be synced , // then starts scheduling and blocked until the context is done. func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocation) (result ScheduleResult, err error) { - klog.Infof("Attempting to schedule allocation: %v", allocation.ID) + klog.V(4).Infof("Attempting to schedule allocation: %v", allocation.ID) state := interfaces.NewCycleState() schedulingCycleCtx, cancel := context.WithCancel(ctx) defer cancel() // 1. Snapshot site resource cache start := time.Now() - klog.Infof("[START] snapshot site...") + klog.V(4).Infof("[START] snapshot site...") ///UpdateFlavorMap updates FlavorCache.RegionFlavorMap, FlavorCache.FlavorMap) ///FlavorMap is updated when scheduler starts, RegionFlavorMap is updated @@ -546,16 +538,16 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 2. Run "prefilter" plugins. start = time.Now() - klog.Infof("[START] Running prefilter plugins...") + klog.V(4).Infof("[START] Running prefilter plugins...") preFilterStatus := sched.SchedFrame.RunPreFilterPlugins(schedulingCycleCtx, state, &allocation.Stack) if !preFilterStatus.IsSuccess() { return result, preFilterStatus.AsError() } - klog.Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prefilter plugins, use_time: %s", time.Since(start).String()) // 3. Run "filter" plugins. start = time.Now() - klog.Infof("[START] Running filter plugins...") + klog.V(4).Infof("[START] Running filter plugins...") filteredSitesStatuses := make(interfaces.SiteToStatusMap) allocation.Stack.Selector = allocation.Selector filteredSites, err := sched.findSitesThatPassFilters(ctx, state, &allocation.Stack, filteredSitesStatuses) @@ -563,9 +555,9 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("findSitesThatPassFilters failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running filter plugins, use_time: %s", time.Since(start).String()) - klog.Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) + klog.V(4).Infof("filteredSitesStatuses = %v", filteredSitesStatuses.ToString()) if len(filteredSites) <= 0 { err := fmt.Errorf("filter none site. resultStatus: %s", filteredSitesStatuses.ToString()) klog.Error(err) @@ -574,33 +566,33 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati // 4. Run "prescore" plugins. start = time.Now() - klog.Infof("[START] Running preScore plugins...") + klog.V(4).Infof("[START] Running preScore plugins...") prescoreStatus := sched.SchedFrame.RunPreScorePlugins(ctx, state, &allocation.Stack, filteredSites) if !prescoreStatus.IsSuccess() { return result, prescoreStatus.AsError() } - klog.Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running preScore plugins, use_time: %s", time.Since(start).String()) // 5. Run "prioritizeSites" plugins. start = time.Now() - klog.Infof("[START] Running prioritizeSites plugins...") + klog.V(4).Infof("[START] Running prioritizeSites plugins...") priorityList, err := sched.prioritizeSites(ctx, state, &allocation.Stack, filteredSites) if err != nil { klog.Errorf("prioritizeSites failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("[DONE] Running prioritizeSites plugins, use_time: %s", time.Since(start).String()) // 6. Run "strategy" plugins. start = time.Now() - klog.Infof("[START] Running strategy plugins...") + klog.V(4).Infof("[START] Running strategy plugins...") siteCount, strategyStatus := sched.SchedFrame.RunStrategyPlugins(ctx, state, allocation, priorityList) if !strategyStatus.IsSuccess() { klog.Errorf("RunStrategyPlugins failed! err: %s", err) return result, err } - klog.Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) - klog.Infof("selected Hosts : %#v", siteCount) + klog.V(4).Infof("[DONE] Running StrategyPlugins plugins, use_time: %s", time.Since(start).String()) + klog.V(4).Infof("selected Hosts : %#v", siteCount) // 7. reserve resource start = time.Now() @@ -632,7 +624,7 @@ func (sched *Scheduler) Schedule(ctx context.Context, allocation *types.Allocati klog.Errorf("not find suit host") return result, fmt.Errorf("not find suit host") } - klog.Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) + klog.V(4).Infof("reserve resource(%s) success, use_time: %s", allocation.ID, time.Since(start).String()) return } @@ -713,7 +705,7 @@ func (sched *Scheduler) initPodClusterSchedulerInformers(gsconfig *types.GSSched sched.schedulerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { if sched, ok := obj.(*schedulerv1.Scheduler); ok { - klog.Infof("The scheduler %s process is going to be killed...", sched.Name) + klog.V(4).Infof("The scheduler %s process is going to be killed...", sched.Name) os.Exit(0) } else { klog.Fatalf("The deleted object %v failed to convert to scheduler", obj) @@ -757,7 +749,7 @@ func (sched *Scheduler) processNextClusterItem() bool { if shutdown { return false } - klog.Infof("Process an item in work queue %v ", workItem) + klog.V(4).Infof("Process an item in work queue %v ", workItem) eventKey := workItem.(KeyWithEventType) key := eventKey.Key defer sched.ClusterQueue.Done(key) @@ -766,7 +758,7 @@ func (sched *Scheduler) processNextClusterItem() bool { utilruntime.HandleError(fmt.Errorf("Handle %v of key %v failed with %v", "serivce", key, err)) } sched.ClusterQueue.Forget(key) - klog.Infof("Successfully processed & synced %s", key) + klog.V(4).Infof("Successfully processed & synced %s", key) return true } @@ -776,24 +768,24 @@ func (sched *Scheduler) clusterSyncHandler(keyWithEventType KeyWithEventType) er return err } key := keyWithEventType.Key - klog.Infof("sync cache for key %v", key) + klog.V(4).Infof("sync cache for key %v", key) startTime := time.Now() defer func() { - klog.Infof("Finished syncing %q (%v)", key, time.Since(startTime)) + klog.V(4).Infof("Finished syncing %q (%v)", key, time.Since(startTime)) }() nameSpace, clusterName, err := cache.SplitMetaNamespaceKey(key) //This performs controller logic - create site's static info - klog.Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("cluster processing - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) result, err := sched.updateStaticSiteResourceInfo(key, keyWithEventType.EventType, nameSpace, clusterName) if !result { klog.Errorf("Failed a cluster processing - event: %v, key: %v, error: %v", keyWithEventType, key, err) sched.ClusterQueue.AddRateLimited(keyWithEventType) } else { - klog.Infof(" Processed a cluster: %v", key) + klog.V(4).Infof(" Processed a cluster: %v", key) sched.ClusterQueue.Forget(key) } - klog.Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) + klog.V(4).Infof("Cluster was handled by ClusterController - event: %v, cluster name: %v", keyWithEventType.EventType, clusterName) if keyWithEventType.EventType != EventType_Delete { cluster, err := sched.ClusterLister.Clusters(nameSpace).Get(clusterName) clusterCopy := cluster.DeepCopy() @@ -848,7 +840,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name: %s", clusterName) return false, err } - klog.Infof("create a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("create a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusCreated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -866,7 +858,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType klog.Errorf("Failed to retrieve cluster in local cache by cluster name - %s", clusterName) return false, err } - klog.Infof("update a site static info, cluster profile: %v", clusterCopy) + klog.V(4).Infof("update a site static info, cluster profile: %v", clusterCopy) clusterCopy.Status = ClusterStatusUpdated site := convertClusterToSite(clusterCopy) siteCacheInfo := schedulersitecacheinfo.NewSiteCacheInfo() @@ -898,7 +890,7 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { //reset total(available) resource - klog.Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) + klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string for _, siteresource := range resource.CPUMemResources { siteID = region + constants.SiteDelimiter + siteresource.AvailabilityZone diff --git a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go index 2b935d5f7..0397c486d 100644 --- a/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go +++ b/globalscheduler/pkg/scheduler/sitecacheinfo/sitecache_info.go @@ -552,10 +552,9 @@ func GetStackKey(stack *types.Stack) (string, error) { return uid, nil } -//deduct or add +//deduct or add func (n *SiteCacheInfo) UpdateSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor, deduct bool) error { var resourceTypes []string - klog.Infof("444 UpdateSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) for resType, res := range resInfo.CpuAndMem { //resource type is null, assign default resource type (e.g. when binding a pod for the first time) if resType == "" { @@ -568,140 +567,29 @@ func (n *SiteCacheInfo) UpdateSiteResInfo(resInfo types.AllResInfo, regionFlavor continue } for reqType, reqRes := range n.RequestedResources { - klog.Infof("555 UpdateSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) resTypes := strings.Split(reqType, constants.FlavorDelimiter) if !utils.IsContain(resTypes, resType) { - klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) + klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) continue } reqRes.VCPU += res.VCPU reqRes.Memory += res.Memory - klog.Infof("666 UpdateSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) n.RequestedResources[resType] = reqRes - klog.Infof("777 UpdateSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) - } } for volType, used := range resInfo.Storage { - klog.Infof("888 UpdateSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) reqVol, ok := n.RequestedStorage[volType] if !ok { reqVol = 0 } reqVol += used n.RequestedStorage[volType] = reqVol - klog.Infof("888 UpdateSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) - } n.updateSiteFlavor(resourceTypes, regionFlavorMap, deduct) n.generation = nextGeneration() return nil } -// DeductSiteResInfo deduct site's resource info -/*func (n *SiteCacheInfo) DeductSiteResInfo2(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { - var resourceTypes []string - for resType, res := range resInfo.CpuAndMem { - //resource type is null, assign default resource type (e.g. when binding a pod for the first time) - if resType == "" { - resType = string(DefaultResourceType) - resourceTypes = append(resourceTypes, resType) - } - if len(n.RequestedResources) == 0 { - reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} - n.RequestedResources[resType] = &reqRes - continue - } - for reqType, reqRes := range n.RequestedResources { - resTypes := strings.Split(reqType, constants.FlavorDelimiter) - if !utils.IsContain(resTypes, resType) { - klog.V(4).Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) - continue - } - reqRes.VCPU += res.VCPU - reqRes.Memory += res.Memory - n.RequestedResources[resType] = reqRes - } - } - for volType, used := range resInfo.Storage { - reqVol, ok := n.RequestedStorage[volType] - if !ok { - reqVol = 0 - } - reqVol += used - n.RequestedStorage[volType] = reqVol - } - n.updateSiteFlavor(resourceTypes, regionFlavorMap) - n.generation = nextGeneration() - return nil -}*/ - -func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor, deduct bool) { - n.mu.Lock() - defer n.mu.Unlock() - - klog.Infof("999 updateSiteFlavor Before - resourceTypes: %#v, regionFlavors:%#v", resourceTypes, regionFlavors) - for k, v := range regionFlavors { - klog.Infof("updateSiteFlavor Before - key: %#v, regionFlavor:%#v", k, v) - } - - if n.AllocatableFlavor == nil { - n.AllocatableFlavor = map[string]int64{} - } - supportFlavors := n.AllocatableFlavor - regionName := utils.GetRegionName(n.Site.SiteID) - for flavorid := range supportFlavors { - regionFalvorKey := regionName + constants.FlavorDelimiter + flavorid - flv := regionFlavors[regionFalvorKey] - klog.Infof("000 updateSiteFlavor - flv: %#v", flv) - if flv == nil { - n.deductFlavor() - klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - return - } - vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) - if err != nil { - n.deductFlavor() - klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - return - } - for _, resourceType := range resourceTypes { - klog.Infof("121 updateSiteFlavor Before - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) - totalRes := n.TotalResources[resourceType] - requestRes := n.RequestedResources[resourceType] - if totalRes == nil { - klog.Infof("updateSiteFlavor - totalRes is nil") - n.deductFlavor() - klog.Infof("n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - return - } - if requestRes == nil { - klog.Infof("updateSiteFlavor - requestRes is nil") - requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} - } - if(deduct == true) { - count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt - memCount := (totalRes.Memory - requestRes.Memory) / flv.Ram - } else { - count := (totalRes.VCPU + requestRes.VCPU) / vCPUInt - memCount := (totalRes.Memory + requestRes.Memory) / flv.Ram - } - if count > memCount { - count = memCount - } - if _, ok := n.AllocatableFlavor[flavorid]; !ok { - n.AllocatableFlavor[flavorid] = 0 - } - klog.Infof("121 n.AllocatableFlavor Before - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - if n.AllocatableFlavor[flavorid] > count { - n.AllocatableFlavor[flavorid] = count - } - klog.Infof("121 n.AllocatableFlavor After - n.AllocatableFlavor[%#v]: %#v", flavorid, n.AllocatableFlavor[flavorid]) - klog.Infof("121 updateSiteFlavor After - resourceType:%#v, n.TotalResources: %#v, RequestedResources: %#v", resourceType, n.TotalResources[resourceType], n.RequestedResources[resourceType]) - } - } -} - /* updateSiteFlavor() is equal with updateFlavor() functionally. But due to the difference between flavor files and data, @@ -710,10 +598,14 @@ updateFlavor(): /home/ubuntu/go/src/k8s.io/arktos/conf/flavors.json global scheduler flavor config file: /home/ubuntu/go/src/k8s.io/arktos/conf/flavor_config.yaml */ -func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor) { +func (n *SiteCacheInfo) updateSiteFlavor(resourceTypes []string, regionFlavors map[string]*typed.RegionFlavor, deduct bool) { n.mu.Lock() defer n.mu.Unlock() + var count, memCount int64 + for k, v := range regionFlavors { + klog.V(4).Infof("updateSiteFlavor Before - key: %#v, regionFlavor:%#v", k, v) + } if n.AllocatableFlavor == nil { n.AllocatableFlavor = map[string]int64{} } @@ -723,26 +615,31 @@ func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors regionFalvorKey := regionName + constants.FlavorDelimiter + flavorid flv := regionFlavors[regionFalvorKey] if flv == nil { - n.deductFlavor() + n.updateFlavorCount(deduct) return } vCPUInt, err := strconv.ParseInt(flv.Vcpus, 10, 64) if err != nil { - n.deductFlavor() + n.updateFlavorCount(deduct) return } for _, resourceType := range resourceTypes { totalRes := n.TotalResources[resourceType] requestRes := n.RequestedResources[resourceType] if totalRes == nil { - n.deductFlavor() + n.updateFlavorCount(deduct) return } if requestRes == nil { requestRes = &types.CPUAndMemory{VCPU: 0, Memory: 0} } - count := (totalRes.VCPU - requestRes.VCPU) / vCPUInt - memCount := (totalRes.Memory - requestRes.Memory) / flv.Ram + if deduct == true { + count = (totalRes.VCPU - requestRes.VCPU) / vCPUInt + memCount = (totalRes.Memory - requestRes.Memory) / flv.Ram + } else { + count = (totalRes.VCPU + requestRes.VCPU) / vCPUInt + memCount = (totalRes.Memory + requestRes.Memory) / flv.Ram + } if count > memCount { count = memCount } @@ -756,88 +653,28 @@ func (n *SiteCacheInfo) updateSiteFlavor2(resourceTypes []string, regionFlavors } } -func (n *SiteCacheInfo) deductFlavor() { - if n.AllocatableFlavor == nil { - n.AllocatableFlavor = map[string]int64{} - } - for key, value := range n.AllocatableFlavor { - n.AllocatableFlavor[key] = value - 1 - if n.RequestedFlavor == nil { - n.RequestedFlavor = make(map[string]int64) - } - requested, ok := n.RequestedFlavor[key] - if !ok { - n.RequestedFlavor[key] = 1 - } else { - n.RequestedFlavor[key] = requested + 1 - } - } -} - -func (n *SiteCacheInfo) updateFlavor(deduct bool) { - n := -1 +func (n *SiteCacheInfo) updateFlavorCount(deduct bool) { + var m int64 + m = 1 //add if deduct == true { - n=1 + m = -1 //deduct } if n.AllocatableFlavor == nil { n.AllocatableFlavor = map[string]int64{} } for key, value := range n.AllocatableFlavor { - n.AllocatableFlavor[key] = value - 1 + n.AllocatableFlavor[key] = value + m if n.RequestedFlavor == nil { n.RequestedFlavor = make(map[string]int64) } requested, ok := n.RequestedFlavor[key] if !ok { - n.RequestedFlavor[key] = 1 - } else { - n.RequestedFlavor[key] = requested + 1 - } - } -} - -//revoke bound site's resource to pod because pod creation failed -func (n *SiteCacheInfo) WithdrawSiteResInfo(resInfo types.AllResInfo, regionFlavorMap map[string]*typed.RegionFlavor) error { - var resourceTypes []string - klog.Infof("444 WithdrawSiteResInfo - resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavorMap) - for resType, res := range resInfo.CpuAndMem { - //resource type is null, assign default resource type (e.g. when binding a pod for the first time) - if resType == "" { - resType = string(DefaultResourceType) - resourceTypes = append(resourceTypes, resType) - } - if len(n.RequestedResources) == 0 { - reqRes := types.CPUAndMemory{VCPU: res.VCPU, Memory: res.Memory} - n.RequestedResources[resType] = &reqRes - continue - } - for reqType, reqRes := range n.RequestedResources { - klog.Infof("555 WithdrawSiteResInfo - reqType: %#v, reqRes:%#v", reqType, reqRes) - resTypes := strings.Split(reqType, constants.FlavorDelimiter) - if !utils.IsContain(resTypes, resType) { - klog.Infof("!utils.IsContain: %v", !utils.IsContain(resTypes, resType)) - continue + n.RequestedFlavor[key] = 0 + if deduct == true { + n.RequestedFlavor[key] = 1 } - reqRes.VCPU += res.VCPU - reqRes.Memory += res.Memory - klog.Infof("666 WithdrawSiteResInfo Before - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) - n.RequestedResources[resType] = reqRes - klog.Infof("777 WithdrawSiteResInfo After - RequestedResources[%#v]: %#v", resType, n.RequestedResources[resType]) - - } - } - for volType, used := range resInfo.Storage { - klog.Infof("888 WithdrawSiteResInfo Before - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) - reqVol, ok := n.RequestedStorage[volType] - if !ok { - reqVol = 0 + } else { + n.RequestedFlavor[key] = requested - m } - reqVol += used - n.RequestedStorage[volType] = reqVol - klog.Infof("888 WithdrawSiteResInfo After - RequestedStorage[%#v]: %#v", volType, n.RequestedStorage[volType]) - } - n.updateSiteFlavor(resourceTypes, regionFlavorMap) - n.generation = nextGeneration() - return nil } From 7773eeb5b55e5d20df1483f36427e0161aa71aed Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 20:42:39 +0000 Subject: [PATCH 05/24] implemented resource revokation when vm creation failed --- globalscheduler/controllers/dispatcher/BUILD | 2 -- globalscheduler/pkg/scheduler/scheduler.go | 6 ------ 2 files changed, 8 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/BUILD b/globalscheduler/controllers/dispatcher/BUILD index 1b8dcdde5..5cf8b5c91 100644 --- a/globalscheduler/controllers/dispatcher/BUILD +++ b/globalscheduler/controllers/dispatcher/BUILD @@ -31,10 +31,8 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/client-go/kubernetes/scheme:go_default_library", - "//staging/src/k8s.io/client-go/kubernetes/typed/core/v1:go_default_library", "//staging/src/k8s.io/client-go/rest:go_default_library", "//staging/src/k8s.io/client-go/tools/cache:go_default_library", - "//staging/src/k8s.io/client-go/tools/record:go_default_library", "//vendor/k8s.io/klog:go_default_library", ], ) diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 3b7e91ae2..0f315173f 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -165,15 +165,9 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct //build entire FlavorMap map sched.UpdateFlavor() -<<<<<<< HEAD - klog.V(4).Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) - // init pod, cluster, and scheduler informers for scheduler - err = sched.initPodClusterSchedulerInformers(gsconfig, stopEverything) -======= klog.Infof("FlavorMap: %v", sched.siteCacheInfoSnapshot.FlavorMap) // init pod, cluster, scheduler, and allocation informers for scheduler err = sched.initPodClusterSchedulerAllocationInformers(gsconfig, stopEverything) ->>>>>>> f1c13358f172faaa2a74119bbf5395594ebea393 if err != nil { return nil, err } From 3b5a1c36699bc495e161747a58cab4eba53f9fe4 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 22:37:23 +0000 Subject: [PATCH 06/24] applied review --- .../dispatcher/dispatcher_process.go | 6 ++--- .../pkg/scheduler/eventhandlers.go | 4 ++-- .../plugins/defaultbinder/default_binder.go | 22 +++---------------- 3 files changed, 8 insertions(+), 24 deletions(-) diff --git a/globalscheduler/controllers/dispatcher/dispatcher_process.go b/globalscheduler/controllers/dispatcher/dispatcher_process.go index 8ba07f6c3..6490444df 100644 --- a/globalscheduler/controllers/dispatcher/dispatcher_process.go +++ b/globalscheduler/controllers/dispatcher/dispatcher_process.go @@ -100,7 +100,7 @@ func (p *Process) Run(quit chan struct{}) { dispatcherInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ DeleteFunc: func(obj interface{}) { - klog.Infof("The dispatcher %s process is going to be killed...", p.name) + klog.V(3).Infof("The dispatcher %s process is going to be killed...", p.name) os.Exit(0) }, UpdateFunc: func(old, new interface{}) { @@ -215,12 +215,12 @@ func (p *Process) SendPodToCluster(pod *v1.Pod) { go func() { instanceId, err := openstack.ServerCreate(host, token, &pod.Spec) if err == nil { - klog.Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) + klog.V(3).Infof("The openstack vm for the pod %v has been created at the host %v", pod.ObjectMeta.Name, host) pod.Status.ClusterInstanceId = instanceId pod.Status.Phase = v1.ClusterScheduled updatedPod, err := p.clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).UpdateStatus(pod) if err == nil { - klog.Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) + klog.V(3).Infof("The pod %v has been updated its apiserver database status to scheduled successfully with the instance id %v", updatedPod, instanceId) } else { klog.Warningf("The pod %v failed to update its apiserver database status to scheduled with the error %v", pod.ObjectMeta.Name, err) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 9b16fe8b1..ce6b0ba9f 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -566,8 +566,8 @@ func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { verified = false name := pod.Name flavors := pod.Spec.VirtualMachine.Flavors - if pod.Name == "" || flavors == nil { - klog.Errorf("pod name:%s, flavors:%v is null", name, flavors) + if pod.Name == "" { + klog.Errorf("pod name:%s is null", name) return verified } verified = true diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index af9493ccd..049cd1a83 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -67,7 +67,7 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s //siteSelectedInfo is type of SiteSelectorInfo at cycle_state.go siteSelectedInfo, err := interfaces.GetSiteSelectorState(state, siteID) if err != nil { - klog.Errorf("Gettng site selector state failed! err: %s", err) + klog.Errorf("Getting site selector state failed! err: %s", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) } klog.V(4).Infof("site selector info: %v", siteSelectedInfo) @@ -102,22 +102,6 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s resInfo.CpuAndMem[flv.OsExtraSpecs.ResourceType] = reqRes } b.handle.Cache().UpdateSiteWithResInfo(siteID, resInfo) - /*regionFlavors, err := b.handle.SnapshotSharedLister().SiteCacheInfos().GetFlavors() - if err != nil { - klog.Errorf("Getting region's flavor failed: %s", err) - return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) - } - if regionFlavors == nil || err != nil { - regionFlavors = map[string]*typed.RegionFlavor{} - }*/ - /*siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.V(4).Infof("Resource state after deduction: %v", siteCacheInfo) - return nil*/ - - /*klog.V(4).Infof("111 Resource state before deduction: %#v", siteCacheInfo) - klog.V(4).Infof("222 resInfo: %#v, regionFlavors:%#v", resInfo, regionFlavors) - siteCacheInfo.DeductSiteResInfo(resInfo, regionFlavors) - klog.V(4).Infof("333 Resource state after deduction: %#v", siteCacheInfo)*/ return nil } @@ -140,12 +124,12 @@ func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.Cycle //siteSelectedInfo is type of SiteSelectorInfo at cycle_state.go siteSelectedInfo, err := interfaces.GetSiteSelectorState(state, siteID) if err != nil { - klog.Errorf("Gettng site selector state failed! err: %s", err) + klog.Errorf("Gettng site selector state failed! err: %v", err) status := interfaces.NewStatus(interfaces.Error, fmt.Sprintf("getting site %q info failed: %v", siteID, err)) return status, siteID, flavorID, &resInfo } if len(stack.Resources) != len(siteSelectedInfo.Flavors) { - klog.Errorf("flavor count not equal to server count! err: %s", err) + klog.Errorf("flavor count not equal to server count! err: %v", err) return interfaces.NewStatus(interfaces.Error, fmt.Sprintf("siteID(%s) flavor count not equal to "+ "server count!", siteID)), siteID, flavorID, nil } From f1e154250c562835fee9588c30520f389ffc4f22 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 18 May 2021 22:49:24 +0000 Subject: [PATCH 07/24] updated according to review --- globalscheduler/pkg/scheduler/eventhandlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index ce6b0ba9f..1b13e907a 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -565,7 +565,6 @@ func (sched *Scheduler) verifyClusterInfo(cluster *clusterv1.Cluster) (verified func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { verified = false name := pod.Name - flavors := pod.Spec.VirtualMachine.Flavors if pod.Name == "" { klog.Errorf("pod name:%s is null", name) return verified From fe2ba14d0321824c44b4def9122a23441cd4986a Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 00:09:26 +0000 Subject: [PATCH 08/24] updated resource data structure --- globalscheduler/pkg/scheduler/eventhandlers.go | 4 ++-- globalscheduler/pkg/scheduler/scheduler.go | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 1b13e907a..f4e0fa385 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -638,7 +638,7 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { //withdraw reserved resources to a pod & add it to cash to other pods func (sched *Scheduler) withdrawResource(podName string) error { - resource := sched.ResourceAllocationMap[podName] + resource := sched.PodSiteResourceMap[podName] if resource == nil { klog.V(4).Infof("there is no preserved resource for pod: %s", podName) return nil @@ -652,6 +652,6 @@ func (sched *Scheduler) withdrawResource(podName string) error { } siteCacheInfo := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[resource.SiteID] siteCacheInfo.UpdateSiteResInfo(allResInfo, regionFlavor, false) - delete(sched.ResourceAllocationMap, podName) + delete(sched.PodSiteResourceMap, podName) return nil } diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 0f315173f..0a4ae2175 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -80,7 +80,8 @@ type ScheduleResult struct { FeasibleSites int // Number of feasible site on one stack scheduled } -type PodSiteResourceAllocation struct { +//perserved site resource for pod +type PodSiteResource struct { PodName string SiteID string Flavor string @@ -134,7 +135,7 @@ type Scheduler struct { workerNumber int // table to withdraw site resource - ResourceAllocationMap map[string]*PodSiteResourceAllocation + PodSiteResourceMap map[string]*PodSiteResource } // single scheduler instance @@ -156,7 +157,7 @@ func NewScheduler(gsconfig *types.GSSchedulerConfiguration, stopCh <-chan struct ConfigFilePath: gsconfig.ConfigFilePath, deletedClusters: make(map[string]string), workerNumber: 1, - ResourceAllocationMap: make(map[string]*PodSiteResourceAllocation), + PodSiteResourceMap: make(map[string]*PodSiteResource), } err := sched.buildFramework() if err != nil { @@ -509,8 +510,8 @@ func (sched *Scheduler) bind(ctx context.Context, stack *types.Stack, targetSite bindStatus, siteId, flavorId, resInfo := sched.SchedFrame.RunBindResourcePlugins(ctx, state, stack, sched.siteCacheInfoSnapshot.SiteCacheInfoMap[targetSiteID]) if bindStatus.IsSuccess() { - podResporceAlloc := PodSiteResourceAllocation{stack.PodName, siteId, flavorId, *resInfo} - sched.ResourceAllocationMap[stack.PodName] = &podResporceAlloc + podResource := PodSiteResource{stack.PodName, siteId, flavorId, *resInfo} + sched.PodSiteResourceMap[stack.PodName] = &podResource region := utils.GetRegionName(siteId) regionFlavors, err := sched.siteCacheInfoSnapshot.GetRegionFlavors(region) if err != nil { From ec4735bb5483352256c02dbd869daec1b477fefe Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Wed, 19 May 2021 01:41:31 +0000 Subject: [PATCH 09/24] applied review --- globalscheduler/pkg/scheduler/eventhandlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index f4e0fa385..c7428e714 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -629,7 +629,6 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { klog.Errorf("cannot convert to *v1.Pod: %v", t) return } - err := sched.withdrawResource(pod.Name) if err != nil { klog.Errorf("withdraw resource of pod %s failed", pod.Name) @@ -655,3 +654,4 @@ func (sched *Scheduler) withdrawResource(podName string) error { delete(sched.PodSiteResourceMap, podName) return nil } + From 4a4928340876574c4442cdd1cd92420d7a4e0a9a Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 25 May 2021 21:47:49 +0000 Subject: [PATCH 10/24] pulled from pr 289 --- globalscheduler/pkg/scheduler/eventhandlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index c7428e714..ebd4ce68a 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -654,4 +654,3 @@ func (sched *Scheduler) withdrawResource(podName string) error { delete(sched.PodSiteResourceMap, podName) return nil } - From 80033e6df264e456f2fdc323907365d83a694c78 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 27 May 2021 18:45:56 +0000 Subject: [PATCH 11/24] applied reviews --- globalscheduler/pkg/scheduler/eventhandlers.go | 5 ++--- globalscheduler/pkg/scheduler/factory/factory.go | 3 --- .../pkg/scheduler/framework/interfaces/framework.go | 1 - .../framework/plugins/defaultbinder/default_binder.go | 3 --- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index ebd4ce68a..eb3493116 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -170,7 +170,6 @@ func failedToSchedule(pod *v1.Pod) bool { // addPodToCache add pod to the stack cache of the scheduler func (sched *Scheduler) addPodToCache(obj interface{}) { pod, ok := obj.(*v1.Pod) - klog.V(4).Infof("Add a pod: %v", pod.Name) if !ok { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return @@ -566,7 +565,7 @@ func (sched *Scheduler) verifyPodInfo(pod *v1.Pod) (verified bool) { verified = false name := pod.Name if pod.Name == "" { - klog.Errorf("pod name:%s is null", name) + klog.Errorf("pod name:%s is empty", name) return verified } verified = true @@ -635,7 +634,7 @@ func (sched *Scheduler) deletePodWithdrawResource(obj interface{}) { } } -//withdraw reserved resources to a pod & add it to cash to other pods +//withdraw reserved resources to a pod & add it to cache to other pods func (sched *Scheduler) withdrawResource(podName string) error { resource := sched.PodSiteResourceMap[podName] if resource == nil { diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index 0992ecec3..d26f53dff 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,9 +44,6 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { - /*selector := fields.ParseSelectorOrDie( - "status.phase=" + string(v1.PodAssigned) + - ",status.assignedScheduler.name=" + schedulerName)*/ selector := fields.ParseSelectorOrDie( "status.assignedScheduler.name=" + schedulerName) lw := cache.NewListWatchFromClient(client.CoreV1(), string(v1.ResourcePods), metav1.NamespaceAll, selector) diff --git a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go index 01203fd49..5dd3302a3 100644 --- a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go +++ b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go @@ -425,7 +425,6 @@ func (f *framework) RunBindPlugins(ctx context.Context, state *CycleState, stack return status } -//resource func (f *framework) runBindPlugin(ctx context.Context, bp BindPlugin, state *CycleState, stack *types.Stack, siteCacheInfo *schedulersitecacheinfo.SiteCacheInfo) *Status { return bp.Bind(ctx, state, stack, siteCacheInfo) diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index 049cd1a83..a0859506b 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -110,11 +110,8 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s func (b DefaultBinder) BindResource(ctx context.Context, state *interfaces.CycleState, stack *types.Stack, siteCacheInfo *sitecacheinfo.SiteCacheInfo) (*interfaces.Status, string, string, *types.AllResInfo) { region := siteCacheInfo.GetSite().RegionAzMap.Region - - //eipNum : private data resInfo := types.AllResInfo{CpuAndMem: map[string]types.CPUAndMemory{}, Storage: map[string]float64{}} siteID := siteCacheInfo.Site.SiteID - stack.Selected.SiteID = siteID stack.Selected.Region = region stack.Selected.AvailabilityZone = siteCacheInfo.GetSite().RegionAzMap.AvailabilityZone From b6e04d9edf8884c8cc724d08e0a2d52bc2e8f9df Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 27 May 2021 22:04:16 +0000 Subject: [PATCH 12/24] performed CICD test --- globalscheduler/pkg/scheduler/eventhandlers_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/globalscheduler/pkg/scheduler/eventhandlers_test.go b/globalscheduler/pkg/scheduler/eventhandlers_test.go index aa2f695d2..fea7a6af5 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers_test.go +++ b/globalscheduler/pkg/scheduler/eventhandlers_test.go @@ -81,3 +81,4 @@ func TestSkipStackUpdate(t *testing.T) { }) } } + From 37f0e04900e1510c85e96d2bd2ff96b53ff908df Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 1 Jun 2021 21:01:16 +0000 Subject: [PATCH 13/24] add unit testcase --- globalscheduler/pkg/scheduler/BUILD | 3 + .../pkg/scheduler/eventhandlers_test.go | 121 +++++++++++++++ .../pkg/scheduler/internal/cache/fake/BUILD | 7 +- .../internal/cache/fake/fake_snapshot.go | 143 ++++++++++++++++++ globalscheduler/pkg/scheduler/scheduler.go | 2 +- 5 files changed, 274 insertions(+), 2 deletions(-) create mode 100644 globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go diff --git a/globalscheduler/pkg/scheduler/BUILD b/globalscheduler/pkg/scheduler/BUILD index c25c31136..b6a426bb9 100644 --- a/globalscheduler/pkg/scheduler/BUILD +++ b/globalscheduler/pkg/scheduler/BUILD @@ -58,7 +58,10 @@ go_test( srcs = ["eventhandlers_test.go"], embed = [":go_default_library"], deps = [ + "//globalscheduler/pkg/scheduler/client/typed:go_default_library", + "//globalscheduler/pkg/scheduler/internal/cache:go_default_library", "//globalscheduler/pkg/scheduler/internal/cache/fake:go_default_library", + "//globalscheduler/pkg/scheduler/sitecacheinfo:go_default_library", "//globalscheduler/pkg/scheduler/types:go_default_library", ], ) diff --git a/globalscheduler/pkg/scheduler/eventhandlers_test.go b/globalscheduler/pkg/scheduler/eventhandlers_test.go index fea7a6af5..7f96cfb62 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers_test.go +++ b/globalscheduler/pkg/scheduler/eventhandlers_test.go @@ -20,7 +20,11 @@ package scheduler import ( "testing" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" + //internalcache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" + internalcache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" fakecache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache/fake" + schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" ) @@ -82,3 +86,120 @@ func TestSkipStackUpdate(t *testing.T) { } } +func TestWithdrawResource(t *testing.T) { + siteId := "SW-1||az-1" + cm := types.CPUAndMemory{ + VCPU: 1, + Memory: 128, + } + cpuAndMemMap := make(map[string]types.CPUAndMemory) + cpuAndMemMap["deafult"] = cm + storageMap := make(map[string]float64) + storageMap["ssd"] = float64(128) + allRes := types.AllResInfo{ + CpuAndMem: cpuAndMemMap, + Storage: storageMap, + //eipNum: 1, + } + + podSiteResource := &PodSiteResource{ + PodName: "pod1", + SiteID: siteId, + Flavor: "42", + Resource: allRes, + } + podSiteResourceMap := make(map[string]*PodSiteResource) + podSiteResourceMap["pod1"] = podSiteResource + + region := "SW-1" + id := "42" + name := "m1.nano" + vcpus := "1" + ram := int64(128) + disk := "0" + regionFlavorID := region + "--" + id + flavor := &typed.RegionFlavor{ + RegionFlavorID: regionFlavorID, + Region: region, + Flavor: typed.Flavor{ + ID: id, + Name: name, + Vcpus: vcpus, + Ram: ram, + Disk: disk, + }, + } + /*type Site struct { + SiteID string `json:"site_id" + ClusterName string `json:"cluster_name"` + ClusterNamespace string `json:"cluster_namespace"` + GeoLocation GeoLocation + RegionAzMap RegionAzMap + Operator string `json:"operator"` + Status string `json:"status"` + SiteAttribute []*typed.SiteAttribute `json:"site_attributes"` + EipTypeName string `json:"eiptype_name"` + SpotResources map[string]SpotResource `json:"spot_resources"` + Hosts []*typed.Host `json:"-"` + }*/ + site := &types.Site{ + SiteID: siteId, + ClusterNamespace: "default", + ClusterName: "cluster1", + // GeoLocation: GeoLocation + // RegionAzMap: RegionAzMap + // Operator: string `json:"operator"` + // Status string `json:"status"` + // SiteAttribute []*typed.SiteAttribute `json:"site_attributes"` + // EipTypeName string `json:"eiptype_name"` + // SpotResources map[string]SpotResource `json:"spot_resources"` + // Hosts []*typed.Host `json:"-"` + } + + siteCacheInfo := &schedulersitecacheinfo.SiteCacheInfo{ + Site: site, + RequestedResources: make(map[string]*types.CPUAndMemory), + TotalResources: make(map[string]*types.CPUAndMemory), + RequestedStorage: make(map[string]float64), + TotalStorage: make(map[string]float64), + RequestedFlavor: make(map[string]int64), + AllocatableFlavor: make(map[string]int64), + AllocatableSpotFlavor: make(map[string]types.SpotResource), + Qos: make(map[string]float64), + } + SiteCacheInfoMap := make(map[string]*schedulersitecacheinfo.SiteCacheInfo) + RegionFlavorMap := make(map[string]*typed.RegionFlavor) + FlavorMap := make(map[string]*typed.RegionFlavor) + FlavorMap[id] = flavor + RegionFlavorMap[regionFlavorID] = flavor + SiteCacheInfoMap[siteId] = siteCacheInfo + table := []struct { + podName string + siteCacheInfoMap map[string]*schedulersitecacheinfo.SiteCacheInfo + regionFlavorMap map[string]*typed.RegionFlavor + flavorMap map[string]*typed.RegionFlavor + }{ + { + podName: "pod1", + siteCacheInfoMap: SiteCacheInfoMap, + regionFlavorMap: RegionFlavorMap, + flavorMap: FlavorMap, + }, + } + for _, test := range table { + t.Run(test.podName, func(t *testing.T) { + sched := &Scheduler{ + PodSiteResourceMap: podSiteResourceMap, + siteCacheInfoSnapshot: &internalcache.Snapshot{ + SiteCacheInfoMap: test.siteCacheInfoMap, + RegionFlavorMap: test.regionFlavorMap, + FlavorMap: test.flavorMap, + }, + } + err := sched.withdrawResource(test.podName) + if err != nil { + t.Errorf("TestWithdrawResource() = %t, expected = %t", err, nil) + } + }) + } +} diff --git a/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD b/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD index bb698242d..0ff675d5f 100644 --- a/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD +++ b/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD @@ -2,13 +2,18 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", - srcs = ["fake_cache.go"], + srcs = [ + "fake_cache.go", + "fake_snapshot.go", + ], importpath = "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache/fake", visibility = ["//globalscheduler/pkg/scheduler:__subpackages__"], deps = [ "//globalscheduler/pkg/scheduler/client/typed:go_default_library", + "//globalscheduler/pkg/scheduler/common/constants:go_default_library", "//globalscheduler/pkg/scheduler/internal/cache:go_default_library", "//globalscheduler/pkg/scheduler/listers:go_default_library", + "//globalscheduler/pkg/scheduler/sitecacheinfo:go_default_library", "//globalscheduler/pkg/scheduler/types:go_default_library", ], ) diff --git a/globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go b/globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go new file mode 100644 index 000000000..b14e2b696 --- /dev/null +++ b/globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go @@ -0,0 +1,143 @@ +/* +Copyright 2020 Authors of Arktos. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fake + +import ( + "fmt" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/common/constants" + schedulerlisters "k8s.io/kubernetes/globalscheduler/pkg/scheduler/listers" + schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" + "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" +) + +// Snapshot is a snapshot of cache SiteCacheInfo and SiteTree order. The scheduler takes a +// snapshot at the beginning of each scheduling cycle and uses it for its operations in that cycle. +type Snapshot struct { + // SiteCacheInfoMap a map of site name to a snapshot of its SiteCacheInfo. + SiteCacheInfoMap map[string]*schedulersitecacheinfo.SiteCacheInfo + RegionFlavorMap map[string]*typed.RegionFlavor + // FlavorMap is a map of the flavor id to a flavor, contains all flavors + FlavorMap map[string]*typed.RegionFlavor +} + +var _ schedulerlisters.SharedLister = &Snapshot{} + +// NewEmptySnapshot initializes a Snapshot struct and returns it. +func NewEmptySnapshot() *Snapshot { + return &Snapshot{ + SiteCacheInfoMap: make(map[string]*schedulersitecacheinfo.SiteCacheInfo), + //SiteCacheInfoList: make([]*schedulersitecacheinfo.SiteCacheInfo, 0, len(SiteCacheInfoMap)), + RegionFlavorMap: make(map[string]*typed.RegionFlavor), + FlavorMap: make(map[string]*typed.RegionFlavor), + } +} + +// createSiteInfoCacheMap obtains a list of pods and pivots that list into a map +// where the keys are site names and the values are the aggregated information +// for that site. +func createSiteInfoCacheMap(stacks []*types.Stack, sites []*types.Site) map[string]*schedulersitecacheinfo.SiteCacheInfo { + siteIDToInfo := make(map[string]*schedulersitecacheinfo.SiteCacheInfo) + for _, stack := range stacks { + siteID := stack.Selected.SiteID + if _, ok := siteIDToInfo[siteID]; !ok { + siteIDToInfo[siteID] = schedulersitecacheinfo.NewSiteCacheInfo() + } + siteIDToInfo[siteID].AddStack(stack) + } + + for _, site := range sites { + if _, ok := siteIDToInfo[site.SiteID]; !ok { + siteIDToInfo[site.SiteID] = schedulersitecacheinfo.NewSiteCacheInfo() + } + siteCacheInfo := siteIDToInfo[site.SiteID] + siteCacheInfo.SetSite(site) + } + return siteIDToInfo +} + +// Stacks returns a StackLister +func (s *Snapshot) Stacks() schedulerlisters.StackLister { + return nil +} + +// SiteCacheInfos returns a SiteCacheInfoLister. +func (s *Snapshot) SiteCacheInfos() schedulerlisters.SiteCacheInfoLister { + return s +} + +type stackLister []*schedulersitecacheinfo.SiteCacheInfo + +// List returns the list of stacks in the snapshot. +func (p stackLister) List() ([]*types.Stack, error) { + alwaysTrue := func(*types.Stack) bool { return true } + return p.FilteredList(alwaysTrue) +} + +// FilteredList returns a filtered list of stacks in the snapshot. +func (p stackLister) FilteredList(filter schedulerlisters.StackFilter) ([]*types.Stack, error) { + // stackFilter is expected to return true for most or all of the stacks. We + // can avoid expensive array growth without wasting too much memory by + // pre-allocating capacity. + maxSize := 0 + for _, n := range p { + maxSize += len(n.Stacks()) + } + stacks := make([]*types.Stack, 0, maxSize) + for _, n := range p { + for _, stack := range n.Stacks() { + if filter(stack) { + stacks = append(stacks, stack) + } + } + } + return stacks, nil +} + +// List returns the list of siteIDs in the snapshot. +func (s *Snapshot) List() ([]*schedulersitecacheinfo.SiteCacheInfo, error) { + return nil, nil +} + +// HavePodsWithAffinityList returns the list of siteIDs with at least one pods with inter-pod affinity +func (s *Snapshot) HavePodsWithAffinityList() ([]*schedulersitecacheinfo.SiteCacheInfo, error) { + return nil, nil +} + +// Get returns the SiteCacheInfo of the given site ID. +func (s *Snapshot) Get(siteID string) (*schedulersitecacheinfo.SiteCacheInfo, error) { + if v, ok := s.SiteCacheInfoMap[siteID]; ok { + return v, nil + } + return nil, fmt.Errorf("sitecacheinfo not found for site ID %q", siteID) +} + +func (s *Snapshot) GetFlavors() (map[string]*typed.RegionFlavor, error) { + return s.RegionFlavorMap, nil +} + +func (s *Snapshot) GetRegionFlavors(region string) (map[string]*typed.RegionFlavor, error) { + regionFlavorMap := make(map[string]*typed.RegionFlavor) + for flavorId := range s.FlavorMap { + key := region + constants.FlavorDelimiter + flavorId + regionFlavor := s.RegionFlavorMap[key] + if regionFlavor != nil { + regionFlavorMap[key] = regionFlavor + } + } + return regionFlavorMap, nil +} diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index 0a4ae2175..b27845728 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -134,7 +134,7 @@ type Scheduler struct { schedulerInformer cache.SharedIndexInformer workerNumber int - // table to withdraw site resource + // table to withdraw site resource PodSiteResourceMap map[string]*PodSiteResource } From d05b4f932885847ac347a8d5fe49b795b1f50441 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 1 Jun 2021 21:10:33 +0000 Subject: [PATCH 14/24] add unit testcase --- .../pkg/scheduler/eventhandlers_test.go | 26 ++----------------- 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers_test.go b/globalscheduler/pkg/scheduler/eventhandlers_test.go index 7f96cfb62..2ab3bc37a 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers_test.go +++ b/globalscheduler/pkg/scheduler/eventhandlers_test.go @@ -19,9 +19,7 @@ package scheduler import ( "testing" - "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" - //internalcache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" internalcache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" fakecache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache/fake" schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" @@ -99,7 +97,6 @@ func TestWithdrawResource(t *testing.T) { allRes := types.AllResInfo{ CpuAndMem: cpuAndMemMap, Storage: storageMap, - //eipNum: 1, } podSiteResource := &PodSiteResource{ @@ -129,31 +126,10 @@ func TestWithdrawResource(t *testing.T) { Disk: disk, }, } - /*type Site struct { - SiteID string `json:"site_id" - ClusterName string `json:"cluster_name"` - ClusterNamespace string `json:"cluster_namespace"` - GeoLocation GeoLocation - RegionAzMap RegionAzMap - Operator string `json:"operator"` - Status string `json:"status"` - SiteAttribute []*typed.SiteAttribute `json:"site_attributes"` - EipTypeName string `json:"eiptype_name"` - SpotResources map[string]SpotResource `json:"spot_resources"` - Hosts []*typed.Host `json:"-"` - }*/ site := &types.Site{ SiteID: siteId, ClusterNamespace: "default", ClusterName: "cluster1", - // GeoLocation: GeoLocation - // RegionAzMap: RegionAzMap - // Operator: string `json:"operator"` - // Status string `json:"status"` - // SiteAttribute []*typed.SiteAttribute `json:"site_attributes"` - // EipTypeName string `json:"eiptype_name"` - // SpotResources map[string]SpotResource `json:"spot_resources"` - // Hosts []*typed.Host `json:"-"` } siteCacheInfo := &schedulersitecacheinfo.SiteCacheInfo{ @@ -168,6 +144,7 @@ func TestWithdrawResource(t *testing.T) { Qos: make(map[string]float64), } SiteCacheInfoMap := make(map[string]*schedulersitecacheinfo.SiteCacheInfo) + RegionFlavorMap := make(map[string]*typed.RegionFlavor) FlavorMap := make(map[string]*typed.RegionFlavor) FlavorMap[id] = flavor @@ -200,6 +177,7 @@ func TestWithdrawResource(t *testing.T) { if err != nil { t.Errorf("TestWithdrawResource() = %t, expected = %t", err, nil) } + }) } } From 414d7527e7349382ae608c10305d9cca73b00f0c Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 1 Jun 2021 22:35:13 +0000 Subject: [PATCH 15/24] added unit testcase --- .../pkg/scheduler/eventhandlers_test.go | 18 ++- .../internal/cache/fake/fake_snapshot.go | 143 ------------------ 2 files changed, 15 insertions(+), 146 deletions(-) delete mode 100644 globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go diff --git a/globalscheduler/pkg/scheduler/eventhandlers_test.go b/globalscheduler/pkg/scheduler/eventhandlers_test.go index 2ab3bc37a..7fc2f8992 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers_test.go +++ b/globalscheduler/pkg/scheduler/eventhandlers_test.go @@ -18,12 +18,12 @@ limitations under the License. package scheduler import ( - "testing" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" internalcache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache" fakecache "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache/fake" schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" + "testing" ) func TestSkipStackUpdate(t *testing.T) { @@ -144,7 +144,6 @@ func TestWithdrawResource(t *testing.T) { Qos: make(map[string]float64), } SiteCacheInfoMap := make(map[string]*schedulersitecacheinfo.SiteCacheInfo) - RegionFlavorMap := make(map[string]*typed.RegionFlavor) FlavorMap := make(map[string]*typed.RegionFlavor) FlavorMap[id] = flavor @@ -155,12 +154,14 @@ func TestWithdrawResource(t *testing.T) { siteCacheInfoMap map[string]*schedulersitecacheinfo.SiteCacheInfo regionFlavorMap map[string]*typed.RegionFlavor flavorMap map[string]*typed.RegionFlavor + expected bool }{ { podName: "pod1", siteCacheInfoMap: SiteCacheInfoMap, regionFlavorMap: RegionFlavorMap, flavorMap: FlavorMap, + expected: true, }, } for _, test := range table { @@ -173,11 +174,22 @@ func TestWithdrawResource(t *testing.T) { FlavorMap: test.flavorMap, }, } + var nAllocatableFlavorBefore int64 + nAllocatableFlavorBefore = int64(0) + siteInfoBefore, ok := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[siteId] + if ok { + nAllocatableFlavorBefore = siteInfoBefore.AllocatableFlavor[id] + } err := sched.withdrawResource(test.podName) if err != nil { t.Errorf("TestWithdrawResource() = %t, expected = %t", err, nil) } - + siteInfoAfter := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[siteId] + nAllocatableFlavorAfter := siteInfoAfter.AllocatableFlavor[id] + if testResult := nAllocatableFlavorAfter >= nAllocatableFlavorBefore; testResult != test.expected { + t.Errorf("nAllocatableFlavorAfter = %d, nAllocatableFlavorBefore = %d", nAllocatableFlavorAfter, nAllocatableFlavorBefore) + t.Errorf("TestWithdrawResource() = %v, expected = %v", testResult, test.expected) + } }) } } diff --git a/globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go b/globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go deleted file mode 100644 index b14e2b696..000000000 --- a/globalscheduler/pkg/scheduler/internal/cache/fake/fake_snapshot.go +++ /dev/null @@ -1,143 +0,0 @@ -/* -Copyright 2020 Authors of Arktos. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package fake - -import ( - "fmt" - "k8s.io/kubernetes/globalscheduler/pkg/scheduler/client/typed" - "k8s.io/kubernetes/globalscheduler/pkg/scheduler/common/constants" - schedulerlisters "k8s.io/kubernetes/globalscheduler/pkg/scheduler/listers" - schedulersitecacheinfo "k8s.io/kubernetes/globalscheduler/pkg/scheduler/sitecacheinfo" - "k8s.io/kubernetes/globalscheduler/pkg/scheduler/types" -) - -// Snapshot is a snapshot of cache SiteCacheInfo and SiteTree order. The scheduler takes a -// snapshot at the beginning of each scheduling cycle and uses it for its operations in that cycle. -type Snapshot struct { - // SiteCacheInfoMap a map of site name to a snapshot of its SiteCacheInfo. - SiteCacheInfoMap map[string]*schedulersitecacheinfo.SiteCacheInfo - RegionFlavorMap map[string]*typed.RegionFlavor - // FlavorMap is a map of the flavor id to a flavor, contains all flavors - FlavorMap map[string]*typed.RegionFlavor -} - -var _ schedulerlisters.SharedLister = &Snapshot{} - -// NewEmptySnapshot initializes a Snapshot struct and returns it. -func NewEmptySnapshot() *Snapshot { - return &Snapshot{ - SiteCacheInfoMap: make(map[string]*schedulersitecacheinfo.SiteCacheInfo), - //SiteCacheInfoList: make([]*schedulersitecacheinfo.SiteCacheInfo, 0, len(SiteCacheInfoMap)), - RegionFlavorMap: make(map[string]*typed.RegionFlavor), - FlavorMap: make(map[string]*typed.RegionFlavor), - } -} - -// createSiteInfoCacheMap obtains a list of pods and pivots that list into a map -// where the keys are site names and the values are the aggregated information -// for that site. -func createSiteInfoCacheMap(stacks []*types.Stack, sites []*types.Site) map[string]*schedulersitecacheinfo.SiteCacheInfo { - siteIDToInfo := make(map[string]*schedulersitecacheinfo.SiteCacheInfo) - for _, stack := range stacks { - siteID := stack.Selected.SiteID - if _, ok := siteIDToInfo[siteID]; !ok { - siteIDToInfo[siteID] = schedulersitecacheinfo.NewSiteCacheInfo() - } - siteIDToInfo[siteID].AddStack(stack) - } - - for _, site := range sites { - if _, ok := siteIDToInfo[site.SiteID]; !ok { - siteIDToInfo[site.SiteID] = schedulersitecacheinfo.NewSiteCacheInfo() - } - siteCacheInfo := siteIDToInfo[site.SiteID] - siteCacheInfo.SetSite(site) - } - return siteIDToInfo -} - -// Stacks returns a StackLister -func (s *Snapshot) Stacks() schedulerlisters.StackLister { - return nil -} - -// SiteCacheInfos returns a SiteCacheInfoLister. -func (s *Snapshot) SiteCacheInfos() schedulerlisters.SiteCacheInfoLister { - return s -} - -type stackLister []*schedulersitecacheinfo.SiteCacheInfo - -// List returns the list of stacks in the snapshot. -func (p stackLister) List() ([]*types.Stack, error) { - alwaysTrue := func(*types.Stack) bool { return true } - return p.FilteredList(alwaysTrue) -} - -// FilteredList returns a filtered list of stacks in the snapshot. -func (p stackLister) FilteredList(filter schedulerlisters.StackFilter) ([]*types.Stack, error) { - // stackFilter is expected to return true for most or all of the stacks. We - // can avoid expensive array growth without wasting too much memory by - // pre-allocating capacity. - maxSize := 0 - for _, n := range p { - maxSize += len(n.Stacks()) - } - stacks := make([]*types.Stack, 0, maxSize) - for _, n := range p { - for _, stack := range n.Stacks() { - if filter(stack) { - stacks = append(stacks, stack) - } - } - } - return stacks, nil -} - -// List returns the list of siteIDs in the snapshot. -func (s *Snapshot) List() ([]*schedulersitecacheinfo.SiteCacheInfo, error) { - return nil, nil -} - -// HavePodsWithAffinityList returns the list of siteIDs with at least one pods with inter-pod affinity -func (s *Snapshot) HavePodsWithAffinityList() ([]*schedulersitecacheinfo.SiteCacheInfo, error) { - return nil, nil -} - -// Get returns the SiteCacheInfo of the given site ID. -func (s *Snapshot) Get(siteID string) (*schedulersitecacheinfo.SiteCacheInfo, error) { - if v, ok := s.SiteCacheInfoMap[siteID]; ok { - return v, nil - } - return nil, fmt.Errorf("sitecacheinfo not found for site ID %q", siteID) -} - -func (s *Snapshot) GetFlavors() (map[string]*typed.RegionFlavor, error) { - return s.RegionFlavorMap, nil -} - -func (s *Snapshot) GetRegionFlavors(region string) (map[string]*typed.RegionFlavor, error) { - regionFlavorMap := make(map[string]*typed.RegionFlavor) - for flavorId := range s.FlavorMap { - key := region + constants.FlavorDelimiter + flavorId - regionFlavor := s.RegionFlavorMap[key] - if regionFlavor != nil { - regionFlavorMap[key] = regionFlavor - } - } - return regionFlavorMap, nil -} From 0416df500efcde8270315489d7c9d930a6577ede Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Tue, 1 Jun 2021 22:43:36 +0000 Subject: [PATCH 16/24] added unit testcase --- globalscheduler/pkg/scheduler/internal/cache/fake/BUILD | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD b/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD index 0ff675d5f..bb698242d 100644 --- a/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD +++ b/globalscheduler/pkg/scheduler/internal/cache/fake/BUILD @@ -2,18 +2,13 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", - srcs = [ - "fake_cache.go", - "fake_snapshot.go", - ], + srcs = ["fake_cache.go"], importpath = "k8s.io/kubernetes/globalscheduler/pkg/scheduler/internal/cache/fake", visibility = ["//globalscheduler/pkg/scheduler:__subpackages__"], deps = [ "//globalscheduler/pkg/scheduler/client/typed:go_default_library", - "//globalscheduler/pkg/scheduler/common/constants:go_default_library", "//globalscheduler/pkg/scheduler/internal/cache:go_default_library", "//globalscheduler/pkg/scheduler/listers:go_default_library", - "//globalscheduler/pkg/scheduler/sitecacheinfo:go_default_library", "//globalscheduler/pkg/scheduler/types:go_default_library", ], ) From 504fe0b51c5f403f31c9a6f0579fdc3ba6d15aca Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 3 Jun 2021 07:32:46 +0000 Subject: [PATCH 17/24] updated scheduler according to review --- globalscheduler/pkg/scheduler/factory/factory.go | 4 +++- .../pkg/scheduler/framework/interfaces/framework.go | 2 +- .../framework/plugins/defaultbinder/default_binder.go | 1 - globalscheduler/pkg/scheduler/scheduler.go | 5 +++++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index d26f53dff..25a298cc8 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,8 +44,10 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { + //This selector is to avoid to receive all pods event so that it improves scheduling performance. // selector := fields.ParseSelectorOrDie( - "status.assignedScheduler.name=" + schedulerName) + "status.phase != " + string(v1.PodScheduled) + + ",status.assignedScheduler.name=" + schedulerName) lw := cache.NewListWatchFromClient(client.CoreV1(), string(v1.ResourcePods), metav1.NamespaceAll, selector) return &podInformer{ informer: cache.NewSharedIndexInformer(lw, &v1.Pod{}, resyncPeriod, diff --git a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go index 5dd3302a3..57a975c6d 100644 --- a/globalscheduler/pkg/scheduler/framework/interfaces/framework.go +++ b/globalscheduler/pkg/scheduler/framework/interfaces/framework.go @@ -442,7 +442,7 @@ func (f *framework) RunBindResourcePlugins(ctx context.Context, state *CycleStat continue } if !status.IsSuccess() { - msg := fmt.Sprintf("plugin %q failed to bind pod \"%v\": %v", bp.Name(), stack.PodName, status.Message()) + msg := fmt.Sprintf("plugin %q failed to bind pod %q: %v", bp.Name(), stack.PodName, status.Message()) klog.Errorf("%s", msg) status = NewStatus(Error, msg) return status, siteId, flavor, resInfo diff --git a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go index a0859506b..65dbe2b3f 100644 --- a/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go +++ b/globalscheduler/pkg/scheduler/framework/plugins/defaultbinder/default_binder.go @@ -54,7 +54,6 @@ func (b DefaultBinder) Bind(ctx context.Context, state *interfaces.CycleState, s siteCacheInfo *sitecacheinfo.SiteCacheInfo) *interfaces.Status { region := siteCacheInfo.GetSite().RegionAzMap.Region - //eipNum : private data resInfo := types.AllResInfo{CpuAndMem: map[string]types.CPUAndMemory{}, Storage: map[string]float64{}} siteID := siteCacheInfo.Site.SiteID diff --git a/globalscheduler/pkg/scheduler/scheduler.go b/globalscheduler/pkg/scheduler/scheduler.go index b27845728..f3561304e 100644 --- a/globalscheduler/pkg/scheduler/scheduler.go +++ b/globalscheduler/pkg/scheduler/scheduler.go @@ -921,6 +921,11 @@ func (sched *Scheduler) updateStaticSiteResourceInfo(key string, event EventType //This function updates sites' dynamic resource informaton func (sched *Scheduler) UpdateSiteDynamicResource(region string, resource *types.SiteResource) (err error) { + //empty PodSiteResourceMap because this map is ony for time gap (60 seconds) + //because resource collector will update DynamicResource every 60 seconds. + for key := range sched.PodSiteResourceMap { + delete(sched.PodSiteResourceMap, key) + } //reset total(available) resource klog.V(4).Infof("UpdateSiteDynamicResource region: %s, resource:%v", region, resource) var siteID string From baa3a9664d9dd2dce6dfc663519742dd767457ea Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 3 Jun 2021 16:53:42 +0000 Subject: [PATCH 18/24] updated scheduler according to review --- globalscheduler/pkg/scheduler/eventhandlers_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers_test.go b/globalscheduler/pkg/scheduler/eventhandlers_test.go index 7fc2f8992..5f816da57 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers_test.go +++ b/globalscheduler/pkg/scheduler/eventhandlers_test.go @@ -91,7 +91,7 @@ func TestWithdrawResource(t *testing.T) { Memory: 128, } cpuAndMemMap := make(map[string]types.CPUAndMemory) - cpuAndMemMap["deafult"] = cm + cpuAndMemMap["default"] = cm storageMap := make(map[string]float64) storageMap["ssd"] = float64(128) allRes := types.AllResInfo{ From 3d3664a50b5bf39b3ed1d6cd0dc80c52ce3c0e22 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 3 Jun 2021 19:15:10 +0000 Subject: [PATCH 19/24] updated eventhandlers_test.go --- globalscheduler/pkg/scheduler/eventhandlers_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers_test.go b/globalscheduler/pkg/scheduler/eventhandlers_test.go index 5f816da57..434f0cb14 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers_test.go +++ b/globalscheduler/pkg/scheduler/eventhandlers_test.go @@ -187,7 +187,6 @@ func TestWithdrawResource(t *testing.T) { siteInfoAfter := sched.siteCacheInfoSnapshot.SiteCacheInfoMap[siteId] nAllocatableFlavorAfter := siteInfoAfter.AllocatableFlavor[id] if testResult := nAllocatableFlavorAfter >= nAllocatableFlavorBefore; testResult != test.expected { - t.Errorf("nAllocatableFlavorAfter = %d, nAllocatableFlavorBefore = %d", nAllocatableFlavorAfter, nAllocatableFlavorBefore) t.Errorf("TestWithdrawResource() = %v, expected = %v", testResult, test.expected) } }) From ed4c2b17b3023212b468e02d90e7e019fe7e9d48 Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 3 Jun 2021 21:08:39 +0000 Subject: [PATCH 20/24] updated eventhandlers_test.go --- globalscheduler/pkg/scheduler/eventhandlers.go | 2 -- globalscheduler/pkg/scheduler/factory/factory.go | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index eb3493116..2f79134e3 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -26,7 +26,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apitypes "k8s.io/apimachinery/pkg/types" utilruntime "k8s.io/apimachinery/pkg/util/runtime" - //"k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/klog" clusterv1 "k8s.io/kubernetes/globalscheduler/pkg/apis/cluster/v1" @@ -464,7 +463,6 @@ func (sched *Scheduler) bindToSite(clusterName string, assumedStack *types.Stack } return err } - // return nil } diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index 25a298cc8..04be71757 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,7 +44,8 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { - //This selector is to avoid to receive all pods event so that it improves scheduling performance. // + //This selector is to avoid to receive unneccesary pods event (e.g. scheduled) so that it improves scheduling performance. // + //This receives pod events only their status is one of failed, assigned, and bound selector := fields.ParseSelectorOrDie( "status.phase != " + string(v1.PodScheduled) + ",status.assignedScheduler.name=" + schedulerName) From 0dfb890e7573cb452433e415280208718e9b0c4d Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 3 Jun 2021 23:39:02 +0000 Subject: [PATCH 21/24] updated eventhandlers_test.go --- globalscheduler/pkg/scheduler/factory/factory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index 04be71757..067bda5bb 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,7 +44,7 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { - //This selector is to avoid to receive unneccesary pods event (e.g. scheduled) so that it improves scheduling performance. // + //This selector is to avoid to receive unneccesary pods event (e.g. scheduled) so that it improves scheduling performance. //This receives pod events only their status is one of failed, assigned, and bound selector := fields.ParseSelectorOrDie( "status.phase != " + string(v1.PodScheduled) + From ea63fd475632546bd760885e9a996faeec74d5ca Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Thu, 3 Jun 2021 23:50:44 +0000 Subject: [PATCH 22/24] updated eventhandlers_test.go --- globalscheduler/pkg/scheduler/factory/factory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index 067bda5bb..863cc4f0d 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -44,7 +44,7 @@ func (i *podInformer) Lister() corelisters.PodLister { // NewPodInformer creates a shared index informer that returns only non-terminal pods. func NewPodInformer(schedulerName string, client clientset.Interface, resyncPeriod time.Duration) coreinformers.PodInformer { - //This selector is to avoid to receive unneccesary pods event (e.g. scheduled) so that it improves scheduling performance. + //This selector is to avoid to receive unneccesary pods event (e.g. scheduled) so that it improves scheduling performance. //This receives pod events only their status is one of failed, assigned, and bound selector := fields.ParseSelectorOrDie( "status.phase != " + string(v1.PodScheduled) + From 1f6026d068ebdae3a74e433158f2fc21cc34f3ca Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Fri, 4 Jun 2021 18:37:05 +0000 Subject: [PATCH 23/24] updated eventhandlers_test.go --- globalscheduler/pkg/scheduler/eventhandlers.go | 1 - 1 file changed, 1 deletion(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index 2f79134e3..b9a8c3c9d 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -173,7 +173,6 @@ func (sched *Scheduler) addPodToCache(obj interface{}) { klog.Errorf("cannot convert to *v1.Pod: %v", obj) return } - // add pod resource to a stack stack := getStackFromPod(pod) From c1746886316cd458b46df016353a41f3da99561d Mon Sep 17 00:00:00 2001 From: kimeunju108 Date: Fri, 4 Jun 2021 22:02:32 +0000 Subject: [PATCH 24/24] updated selector --- globalscheduler/pkg/scheduler/eventhandlers.go | 1 - globalscheduler/pkg/scheduler/factory/factory.go | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/globalscheduler/pkg/scheduler/eventhandlers.go b/globalscheduler/pkg/scheduler/eventhandlers.go index b9a8c3c9d..cc0068ee7 100644 --- a/globalscheduler/pkg/scheduler/eventhandlers.go +++ b/globalscheduler/pkg/scheduler/eventhandlers.go @@ -189,7 +189,6 @@ func (sched *Scheduler) updatePodInCache(oldObj, newObj interface{}) { return } newPod, ok := newObj.(*v1.Pod) - klog.V(4).Infof("Update a pod: %v", newPod) if !ok { klog.Errorf("cannot convert newObj to *v1.Pod: %v", newObj) return diff --git a/globalscheduler/pkg/scheduler/factory/factory.go b/globalscheduler/pkg/scheduler/factory/factory.go index 863cc4f0d..6b15c2077 100644 --- a/globalscheduler/pkg/scheduler/factory/factory.go +++ b/globalscheduler/pkg/scheduler/factory/factory.go @@ -47,8 +47,9 @@ func NewPodInformer(schedulerName string, client clientset.Interface, //This selector is to avoid to receive unneccesary pods event (e.g. scheduled) so that it improves scheduling performance. //This receives pod events only their status is one of failed, assigned, and bound selector := fields.ParseSelectorOrDie( - "status.phase != " + string(v1.PodScheduled) + - ",status.assignedScheduler.name=" + schedulerName) + "status.phase = " + string(v1.PodAssigned) + + ",status.phase = " + string(v1.PodFailed) + + ",status.assignedScheduler.name = " + schedulerName) lw := cache.NewListWatchFromClient(client.CoreV1(), string(v1.ResourcePods), metav1.NamespaceAll, selector) return &podInformer{ informer: cache.NewSharedIndexInformer(lw, &v1.Pod{}, resyncPeriod,