Skip to content
This repository has been archived by the owner on May 6, 2020. It is now read-only.

Commit

Permalink
feat(health checks): implement new kubernetes 1.2 health check features
Browse files Browse the repository at this point in the history
Added the following new functionalities

* HEALTHCHECK_PERIOD_SECONDS - How often (in seconds) to perform the probe
* HEALTHCHECK_SUCCESS_THRESHOLD - How many probe runs need to be done after failure to consider it a success again
* HEALTHCHECK_FAILURE_THRESHOLD - How many times a probe should fail before considered being down

These defaults are all copied from kubernetes but may have to be adjusted due to our high default timeout, or we bring the default timeout down

Closes #251
  • Loading branch information
helgi committed Apr 4, 2016
1 parent d352d2b commit 25e7a73
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 6 deletions.
31 changes: 28 additions & 3 deletions rootfs/api/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,43 @@ def healthcheck(self):
timeout = int(self.values.get('HEALTHCHECK_TIMEOUT'))
delay = int(self.values.get('HEALTHCHECK_INITIAL_DELAY'))
port = int(self.values.get('HEALTHCHECK_PORT'))

return {'path': path, 'timeout': timeout, 'delay': delay, 'port': port}
period_seconds = int(self.values.get('HEALTHCHECK_PERIOD_SECONDS'))
success_threshold = int(self.values.get('HEALTHCHECK_SUCCESS_THRESHOLD'))
failure_threshold = int(self.values.get('HEALTHCHECK_FAILURE_THRESHOLD'))

return {
'path': path,
'timeout': timeout,
'delay': delay,
'port': port,
'period_seconds': period_seconds,
'success_threshold': success_threshold,
'failure_threshold': failure_threshold,
}

def set_healthchecks(self):
"""Defines default values for HTTP healthchecks"""
if not {k: v for k, v in self.values.items() if k.startswith('HEALTHCHECK_')}:
return

# HTTP GET related
self.values['HEALTHCHECK_URL'] = self.values.get('HEALTHCHECK_URL', '/')
self.values['HEALTHCHECK_PORT'] = int(self.values.get('HEALTHCHECK_PORT', 5000))

# Number of seconds after which the probe times out.
# More info: http://releases.k8s.io/HEAD/docs/user-guide/pod-states.md#container-probes
self.values['HEALTHCHECK_TIMEOUT'] = int(self.values.get('HEALTHCHECK_TIMEOUT', 50))
# Number of seconds after the container has started before liveness probes are initiated.
# More info: http://releases.k8s.io/HEAD/docs/user-guide/pod-states.md#container-probes
self.values['HEALTHCHECK_INITIAL_DELAY'] = int(self.values.get('HEALTHCHECK_INITIAL_DELAY', 50)) # noqa
self.values['HEALTHCHECK_PORT'] = int(self.values.get('HEALTHCHECK_PORT', 5000))
# How often (in seconds) to perform the probe.
self.values['HEALTHCHECK_PERIOD_SECONDS'] = int(self.values.get('HEALTHCHECK_PERIOD_SECONDS', 10)) # noqa
# Minimum consecutive successes for the probe to be considered successful
# after having failed.
self.values['HEALTHCHECK_SUCCESS_THRESHOLD'] = int(self.values.get('HEALTHCHECK_SUCCESS_THRESHOLD', 1)) # noqa
# Minimum consecutive failures for the probe to be considered failed after
# having succeeded.
self.values['HEALTHCHECK_FAILURE_THRESHOLD'] = int(self.values.get('HEALTHCHECK_FAILURE_THRESHOLD', 3)) # noqa

def save(self, **kwargs):
"""merge the old config with the new"""
Expand Down
20 changes: 17 additions & 3 deletions rootfs/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,15 @@ def _delete_rc(self, namespace, name):

return response

def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30, timeout=5): # noqa
def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30, timeout=5,
period_seconds=1, success_threshold=1, failure_threshold=3): # noqa
"""
Apply HTTP GET healthcehck to the application container
http://kubernetes.io/docs/user-guide/walkthrough/k8s201/#health-checking
http://kubernetes.io/docs/user-guide/pod-states/#container-probes
http://kubernetes.io/docs/user-guide/liveness/
"""
if not routable:
return controller

Expand All @@ -1011,7 +1019,10 @@ def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30
# length of time to wait for a pod to initialize
# after pod startup, before applying health checking
'initialDelaySeconds': delay,
'timeoutSeconds': timeout
'timeoutSeconds': timeout,
'periodSeconds': period_seconds,
'successThreshold': success_threshold,
'failureThreshold': failure_threshold,
},
'readinessProbe': {
# an http probe
Expand All @@ -1022,7 +1033,10 @@ def _healthcheck(self, controller, routable=False, path='/', port=5000, delay=30
# length of time to wait for a pod to initialize
# after pod startup, before applying health checking
'initialDelaySeconds': delay,
'timeoutSeconds': timeout
'timeoutSeconds': timeout,
'periodSeconds': period_seconds,
'successThreshold': success_threshold,
'failureThreshold': failure_threshold,
},
}

Expand Down

0 comments on commit 25e7a73

Please sign in to comment.