Skip to content

Commit

Permalink
Add zimbie killer.
Browse files Browse the repository at this point in the history
Signed-off-by: Jeff Ortel <[email protected]>
  • Loading branch information
jortel committed Aug 15, 2024
1 parent 029e936 commit b379a5d
Showing 1 changed file with 51 additions and 0 deletions.
51 changes: 51 additions & 0 deletions task/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ func (m *Manager) Run(ctx context.Context) {
m.deleteOrphanPods()
m.runActions()
m.updateRunning()
m.deleteZombies()
m.startReady()
m.pause()
} else {
Expand Down Expand Up @@ -904,6 +905,56 @@ func (m *Manager) updateRunning() {
}
}

// deleteZombies - detect and delete zombie pods.
// A zombie is a (succeed|failed) task with a running pod that
// the manager has previously tried to kill.
func (m *Manager) deleteZombies() {
var err error
defer func() {
Log.Error(err, "")
}()
var pods []string
for _, pod := range m.cluster.Pods() {
if pod.Status.Phase == core.PodRunning {
ref := path.Join(pod.Namespace, pod.Name)
pods = append(
pods,
ref)
}
}
fetched := []*Task{}
db := m.DB.Select("Events")
db = db.Where("Pod", pods)
db = db.Where("state IN ?",
[]string{
Succeeded,
Failed,
})
err = db.Find(&fetched).Error
if err != nil {
err = liberr.Wrap(err)
return
}
for _, task := range fetched {
event, found := task.LastEvent(ContainerKilled)
if !found {
continue
}
if time.Since(event.Last) > time.Minute {
Log.Info(
"Zombie detected.",
"task",
task.ID,
"pod",
task.Pod)
err = task.Delete(m.Client)
if err != nil {
Log.Error(err, "")
}
}
}
}

// deleteOrphanPods finds and deletes task pods not referenced by a task.
func (m *Manager) deleteOrphanPods() {
var err error
Expand Down

0 comments on commit b379a5d

Please sign in to comment.