Skip to content

Commit

Permalink
increase vllm pod startup to 30 min (#173)
Browse files Browse the repository at this point in the history
fixes #172
  • Loading branch information
samos123 committed Sep 3, 2024
1 parent 3f38a81 commit d8a86f4
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions internal/modelcontroller/model_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,24 @@ func (r *ModelReconciler) vLLMPodForModel(m *kubeaiv1.Model, index int32) *corev
Name: "http",
},
},
StartupProbe: &corev1.Probe{
// Give the model 30 minutes to start up.
FailureThreshold: 900,
PeriodSeconds: 2,
TimeoutSeconds: 2,
SuccessThreshold: 1,
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/health",
Port: intstr.FromString("http"),
},
},
},
ReadinessProbe: &corev1.Probe{
FailureThreshold: 3,
InitialDelaySeconds: 20,
PeriodSeconds: 10,
TimeoutSeconds: 2,
SuccessThreshold: 1,
FailureThreshold: 3,
PeriodSeconds: 10,
TimeoutSeconds: 2,
SuccessThreshold: 1,
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/health",
Expand All @@ -278,11 +290,10 @@ func (r *ModelReconciler) vLLMPodForModel(m *kubeaiv1.Model, index int32) *corev
},
},
LivenessProbe: &corev1.Probe{
FailureThreshold: 3,
InitialDelaySeconds: 900,
PeriodSeconds: 30,
TimeoutSeconds: 3,
SuccessThreshold: 1,
FailureThreshold: 3,
PeriodSeconds: 30,
TimeoutSeconds: 3,
SuccessThreshold: 1,
ProbeHandler: corev1.ProbeHandler{
HTTPGet: &corev1.HTTPGetAction{
Path: "/health",
Expand Down

0 comments on commit d8a86f4

Please sign in to comment.