fix lint

mlc-ai · Dec 13, 2024 · 1be61e6 · 1be61e6
1 parent e43a578
commit 1be61e6
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 12 deletions.
diff --git a/python/mlc_llm/interface/router.py b/python/mlc_llm/interface/router.py
@@ -12,10 +12,6 @@
 from mlc_llm.router import Router
 from mlc_llm.serve import engine_base, engine_utils
 
-#
-# Global variables
-#
-
 
 def serve(
     model: str,
@@ -29,6 +25,7 @@ def serve(
     router_mode: Literal["disagg", "round-robin"],
     pd_balance_factor: float,
 ):  # pylint: disable=too-many-arguments
+    """Start the router with the specified configuration."""
     # 1. Instantiate router
     router = Router(
         model=model,

diff --git a/python/mlc_llm/router/router.py b/python/mlc_llm/router/router.py
@@ -193,18 +193,17 @@ async def _handle_completion_round_robin(
     # Note that only _handle_completion_disagg() has scheduling logics. The other three
     # helper methods only reflect our flow.
     #
-    async def _handle_completion_disagg(
+    async def _handle_completion_disagg(  # pylint: disable=too-many-locals
         self,
         original_request: openai_api_protocol.CompletionRequest,
         request_id: str,
         pd_balance_factor=0,
-    ) -> AsyncGenerator[
-        openai_api_protocol.CompletionResponse, Any
-    ]:  # pylint: disable=too-many-locals
+    ) -> AsyncGenerator[openai_api_protocol.CompletionResponse, Any]:
         """
         Handle a completion request from API with disaggregated scheduling. Given two servers
         P (prefill) and D (decode), the router does the following:
-            1. Ask D to prepare metadata, receive D's metadata (prefix cache, KV append positions, etc.)
+            1. Ask D to prepare metadata, receive D's metadata
+            (prefix cache, KV append positions, etc.)
             2. Send P the prefill request and D's metadata, receive ack
             3. Ask D to start decoding, receive response as a normal streaming
         """
@@ -380,7 +379,7 @@ async def send_prefill(
             assert "extra" in data["usage"]
             return
 
-    async def send_decode(
+    async def send_decode(  # pylint: disable=fixme
         self,
         session: aiohttp.ClientSession,
         decode_request: openai_api_protocol.CompletionRequest,

diff --git a/python/mlc_llm/serve/server/popen_server.py b/python/mlc_llm/serve/server/popen_server.py
@@ -56,9 +56,9 @@ def __init__(  # pylint: disable=too-many-arguments
         self.base_url = ""
         self.openai_v1_base_url = ""
 
-    def start(
+    def start(  # pylint: disable=too-many-branches,too-many-statements
         self, extra_env=None
-    ) -> None:  # pylint: disable=too-many-branches,too-many-statements
+    ) -> None:
         """Launch the server in a popen subprocess.
         Wait until the server becomes ready before return.
         """