Skip to content

Commit

Permalink
Merge pull request #315 from JimmyYang20/il-compute
Browse files Browse the repository at this point in the history
IL supports multipile device soc versions
  • Loading branch information
kubeedge-bot authored Jun 2, 2022
2 parents c0e76f5 + f58213f commit 75e66ca
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 97 deletions.
7 changes: 7 additions & 0 deletions build/crds/sedna.io_incrementallearningjobs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7145,6 +7145,13 @@ spec:
evalSpec:
description: EvalSpec describes the data an eval worker should have
properties:
initialEvalModel:
properties:
name:
type: string
required:
- name
type: object
template:
description: PodTemplateSpec describes the data a pod should have
when created from a template
Expand Down
4 changes: 4 additions & 0 deletions build/crds/sedna.io_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ spec:
properties:
credentialName:
type: string
device_soc_versions:
items:
type: string
type: array
format:
type: string
url:
Expand Down
7 changes: 6 additions & 1 deletion pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ type TrainSpec struct {

// EvalSpec describes the data an eval worker should have
type EvalSpec struct {
Template v1.PodTemplateSpec `json:"template"`
InitialModel *InitialEvalModel `json:"initialEvalModel,omitempty"`
Template v1.PodTemplateSpec `json:"template"`
}

// DeploySpec describes the deploy model to be updated
Expand Down Expand Up @@ -94,6 +95,10 @@ type InitialModel struct {
Name string `json:"name"`
}

type InitialEvalModel struct {
Name string `json:"name"`
}

type DeployModel struct {
Name string `json:"name"`
// HotUpdateEnabled will enable the model hot update feature if its value is true.
Expand Down
5 changes: 3 additions & 2 deletions pkg/apis/sedna/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ type Model struct {

// ModelSpec is a description of a model
type ModelSpec struct {
URL string `json:"url"`
Format string `json:"format"`
URL string `json:"url"`
Format string `json:"format"`
Devices []string `json:"device_soc_versions,omitempty"`

CredentialName string `json:"credentialName,omitempty"`
}
Expand Down
28 changes: 27 additions & 1 deletion pkg/apis/sedna/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

81 changes: 60 additions & 21 deletions pkg/globalmanager/controllers/incrementallearning/downstream.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ func (c *Controller) syncToEdge(eventType watch.EventType, obj interface{}) erro
currentType := latestCondition.Type
jobStage := latestCondition.Stage

syncModelWithName := func(modelName string) {
if err := c.syncModelWithName(dsNodeName, modelName, job.Namespace); err != nil {
syncModelWithName := func(modelName string, nodeName string) {
if err := c.syncModelWithName(nodeName, modelName, job.Namespace); err != nil {
klog.Warningf("Error to sync model %s when sync incremental learning job %s to node %s: %v",
modelName, job.Name, dsNodeName, err)
modelName, job.Name, nodeName, err)
}
}

Expand All @@ -137,29 +137,68 @@ func (c *Controller) syncToEdge(eventType watch.EventType, obj interface{}) erro
return false
}

doJobStageEvent := func(modelName string, nodeName string) {
if currentType == sednav1.ILJobStageCondWaiting {
// delete job
deleteJob := func(nodeName string) {
if !isJobResidentNode(nodeName) {
// delete LC's job from nodeName that's different from dataset node when worker's status
// is completed or failed.
c.sendToEdgeFunc(nodeName, watch.Deleted, job)
}
}

switch currentType {
case sednav1.ILJobStageCondWaiting:
switch jobStage {
case sednav1.ILJobTrain:
syncModelWithName(job.Spec.InitialModel.Name, dsNodeName)
syncJobWithNodeName(dsNodeName)
if modelName != "" {
syncModelWithName(modelName)
case sednav1.ILJobEval:
syncModelWithName(job.Spec.DeploySpec.Model.Name, dsNodeName)
if job.Spec.EvalSpec.InitialModel != nil {
syncModelWithName(job.Spec.EvalSpec.InitialModel.Name, dsNodeName)
}
} else if currentType == sednav1.ILJobStageCondRunning {
syncJobWithNodeName(nodeName)
} else if currentType == sednav1.ILJobStageCondCompleted || currentType == sednav1.ILJobStageCondFailed {
if !isJobResidentNode(nodeName) {
// delete LC's job from nodeName that's different from dataset node when worker's status is completed or failed.
c.sendToEdgeFunc(nodeName, watch.Deleted, job)
syncJobWithNodeName(dsNodeName)
case sednav1.ILJobDeploy:
deployNodeName = evalNodeName

syncModelWithName(job.Spec.DeploySpec.Model.Name, evalNodeName)
if job.Spec.EvalSpec.InitialModel != nil && !job.Spec.DeploySpec.Model.HotUpdateEnabled {
syncModelWithName(job.Spec.EvalSpec.InitialModel.Name, deployNodeName)
}
syncJobWithNodeName(deployNodeName)
}
}
case sednav1.ILJobStageCondRunning:
switch jobStage {
case sednav1.ILJobTrain:
syncJobWithNodeName(trainNodeName)
case sednav1.ILJobEval:
if trainNodeName != evalNodeName && trainNodeName != dsNodeName {
c.sendToEdgeFunc(trainNodeName, watch.Deleted, job)
}
syncJobWithNodeName(evalNodeName)
case sednav1.ILJobDeploy:
if evalNodeName != deployNodeName && evalNodeName != dsNodeName {
c.sendToEdgeFunc(evalNodeName, watch.Deleted, job)
}

switch jobStage {
case sednav1.ILJobTrain:
doJobStageEvent(job.Spec.InitialModel.Name, trainNodeName)
case sednav1.ILJobEval:
doJobStageEvent(job.Spec.DeploySpec.Model.Name, evalNodeName)
case sednav1.ILJobDeploy:
doJobStageEvent("", deployNodeName)
if job.Spec.EvalSpec.InitialModel != nil {
syncModelWithName(job.Spec.EvalSpec.InitialModel.Name, deployNodeName)
}
syncModelWithName(job.Spec.DeploySpec.Model.Name, deployNodeName)
syncJobWithNodeName(deployNodeName)
}
case sednav1.ILJobStageCondCompleted, sednav1.ILJobStageCondFailed:
if !job.Spec.DeploySpec.Model.HotUpdateEnabled {
deployNodeName = evalNodeName
}
switch jobStage {
case sednav1.ILJobTrain:
deleteJob(trainNodeName)
case sednav1.ILJobEval:
deleteJob(evalNodeName)
case sednav1.ILJobDeploy:
deleteJob(deployNodeName)
}
}

return nil
Expand Down
5 changes: 3 additions & 2 deletions pkg/globalmanager/runtime/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ const (
)

type Model struct {
Format string `json:"format,omitempty"`
URL string `json:"url,omitempty"`
Format string `json:"format"`
URL string `json:"url"`
Devices []string `json:"device_soc_versions,omitempty"`
Metrics map[string]interface{} `json:"metrics,omitempty"`
}

Expand Down
Loading

0 comments on commit 75e66ca

Please sign in to comment.