Skip to content

Commit

Permalink
fix lint
Browse files Browse the repository at this point in the history
  • Loading branch information
jinhongyii committed Dec 13, 2024
1 parent e43a578 commit 1be61e6
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 12 deletions.
5 changes: 1 addition & 4 deletions python/mlc_llm/interface/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@
from mlc_llm.router import Router
from mlc_llm.serve import engine_base, engine_utils

#
# Global variables
#


def serve(
model: str,
Expand All @@ -29,6 +25,7 @@ def serve(
router_mode: Literal["disagg", "round-robin"],
pd_balance_factor: float,
): # pylint: disable=too-many-arguments
"""Start the router with the specified configuration."""
# 1. Instantiate router
router = Router(
model=model,
Expand Down
11 changes: 5 additions & 6 deletions python/mlc_llm/router/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,17 @@ async def _handle_completion_round_robin(
# Note that only _handle_completion_disagg() has scheduling logics. The other three
# helper methods only reflect our flow.
#
async def _handle_completion_disagg(
async def _handle_completion_disagg( # pylint: disable=too-many-locals
self,
original_request: openai_api_protocol.CompletionRequest,
request_id: str,
pd_balance_factor=0,
) -> AsyncGenerator[
openai_api_protocol.CompletionResponse, Any
]: # pylint: disable=too-many-locals
) -> AsyncGenerator[openai_api_protocol.CompletionResponse, Any]:
"""
Handle a completion request from API with disaggregated scheduling. Given two servers
P (prefill) and D (decode), the router does the following:
1. Ask D to prepare metadata, receive D's metadata (prefix cache, KV append positions, etc.)
1. Ask D to prepare metadata, receive D's metadata
(prefix cache, KV append positions, etc.)
2. Send P the prefill request and D's metadata, receive ack
3. Ask D to start decoding, receive response as a normal streaming
"""
Expand Down Expand Up @@ -380,7 +379,7 @@ async def send_prefill(
assert "extra" in data["usage"]
return

async def send_decode(
async def send_decode( # pylint: disable=fixme
self,
session: aiohttp.ClientSession,
decode_request: openai_api_protocol.CompletionRequest,
Expand Down
4 changes: 2 additions & 2 deletions python/mlc_llm/serve/server/popen_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ def __init__( # pylint: disable=too-many-arguments
self.base_url = ""
self.openai_v1_base_url = ""

def start(
def start( # pylint: disable=too-many-branches,too-many-statements
self, extra_env=None
) -> None: # pylint: disable=too-many-branches,too-many-statements
) -> None:
"""Launch the server in a popen subprocess.
Wait until the server becomes ready before return.
"""
Expand Down

0 comments on commit 1be61e6

Please sign in to comment.