Skip to content

Commit

Permalink
Timing: Log invocation durations (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
msinto93 authored Jul 4, 2023
2 parents fe62aa1 + 70fd60a commit b7c92db
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ keywords = [
requires-python = ">=3.7"
# All runtime dependencies that must be packaged, pin major version only.
dependencies = [
"codetiming~=1.4",
"importlib-metadata<4; python_version<'3.8'",
"pluggy~=1.0",
"werkzeug~=2.0",
Expand Down
18 changes: 10 additions & 8 deletions src/inference_server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import logging
from typing import TYPE_CHECKING

import codetiming
import werkzeug
import werkzeug.exceptions
from werkzeug.datastructures import MIMEAccept
Expand Down Expand Up @@ -76,14 +77,15 @@ def _handle_invocations(request: werkzeug.Request) -> werkzeug.Response:
:param request: HTTP request data
"""
pm = inference_server._plugin.manager()
# Deserialize HTTP body payload (bytes) into input features
data = pm.hook.input_fn(input_data=request.data, content_type=request.content_type)
# Then use the model to make a prediction
prediction = pm.hook.predict_fn(data=data, model=_model())
# Then serialize the data as bytes. This is often (but not necessarily) JSON bytes.
prediction_bytes, content_type = pm.hook.output_fn(prediction=prediction, accept=request.accept_mimetypes)
return werkzeug.Response(prediction_bytes, mimetype=content_type)
with codetiming.Timer(text="Invocation took {:.3f} seconds", logger=logger.debug):
pm = inference_server._plugin.manager()
# Deserialize HTTP body payload (bytes) into input features
data = pm.hook.input_fn(input_data=request.data, content_type=request.content_type)
# Then use the model to make a prediction
prediction = pm.hook.predict_fn(data=data, model=_model())
# Then serialize the data as bytes. This is often (but not necessarily) JSON bytes.
prediction_bytes, content_type = pm.hook.output_fn(prediction=prediction, accept=request.accept_mimetypes)
return werkzeug.Response(prediction_bytes, mimetype=content_type)


def _handle_ping(request: werkzeug.Request) -> werkzeug.Response:
Expand Down

0 comments on commit b7c92db

Please sign in to comment.