Skip to content

Commit dfc6f87

Browse files
committed
[jobmanager] Recover from job panics
Signed-off-by: Dmitrii Okunev <[email protected]>
1 parent fa98f00 commit dfc6f87

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

pkg/jobmanager/jobmanager.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"time"
1515

1616
"github.com/facebookincubator/go-belt/beltctx"
17+
"github.com/facebookincubator/go-belt/tool/experimental/errmon"
1718
"github.com/insomniacslk/xjson"
1819

1920
"github.com/linuxboot/contest/pkg/api"
@@ -183,6 +184,9 @@ loop:
183184
logging.Debugf(ev.Context, "Handling event %+v", ev)
184185
handlerWg.Add(1)
185186
go func() {
187+
defer func() {
188+
errmon.ObserveRecoverCtx(ev.Context, recover())
189+
}()
186190
defer handlerWg.Done()
187191
jm.handleEvent(ev)
188192
}()

pkg/jobmanager/start.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,14 @@ func (jm *JobManager) startJob(ctx context.Context, j *job.Job, resumeState *job
9898
logging.Debugf(ctx, "cancelling job context")
9999
jobCancel()
100100
}}
101-
go jm.runJob(jobCtx, j, resumeState)
101+
102+
go func() {
103+
defer func() {
104+
errmon.ObserveRecoverCtx(ctx, recover())
105+
}()
106+
107+
jm.runJob(jobCtx, j, resumeState)
108+
}()
102109
}
103110

104111
func (jm *JobManager) runJob(ctx context.Context, j *job.Job, resumeState *job.PauseEventPayload) {

0 commit comments

Comments
 (0)