Skip to content

Commit 2d6ea23

Browse files
authored
refactor(service): extract inference bridge backends into sglang/vllm submodules (#1221)
Break monolithic data_proxy/backend.py into properly-separated submodules with a protocol-based backend abstraction, preparing the inference_service package structure for weight update integration. Key changes: - Add InfBridgeBackend protocol in inference_service/backend.py - Extract SGLangBridgeBackend into sglang/bridge.py - Extract VLLMBridgeBackend into vllm/bridge.py - Move inf_bridge.py from data_proxy/ to inference_service/ - Create sglang/ and vllm/ as proper Python packages - Update all consumer imports (app.py, 9 test files)
1 parent 6592824 commit 2d6ea23

17 files changed

Lines changed: 392 additions & 388 deletions
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
"""Inference bridge backend protocol.
4+
5+
Defines the :class:`InfBridgeBackend` protocol that concrete backends
6+
(SGLang, vLLM, …) must satisfy. See ``sglang/bridge.py`` and
7+
``vllm/bridge.py`` for implementations.
8+
"""
9+
10+
from __future__ import annotations
11+
12+
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
13+
14+
from areal.api.io_struct import HttpGenerationResult, HttpRequest
15+
16+
if TYPE_CHECKING:
17+
from areal.api.io_struct import ModelRequest
18+
19+
20+
# ---------------------------------------------------------------------------
21+
# Backend protocol
22+
# ---------------------------------------------------------------------------
23+
24+
25+
@runtime_checkable
26+
class InfBridgeBackend(Protocol):
27+
"""Protocol for inference-server backends used by :class:`InfBridge`.
28+
29+
Each method converts between AReaL domain objects and HTTP payloads
30+
specific to a particular inference server (SGLang, vLLM, …).
31+
"""
32+
33+
def build_generation_request(
34+
self,
35+
req: ModelRequest,
36+
with_lora: bool,
37+
version: int = -1,
38+
) -> HttpRequest:
39+
"""Translate a :class:`ModelRequest` into a backend-specific HTTP request.
40+
41+
Parameters
42+
----------
43+
req:
44+
The model-level generation request.
45+
with_lora:
46+
Whether to include LoRA adapter info in the payload.
47+
version:
48+
Current weight version (used for LoRA versioning).
49+
50+
Returns
51+
-------
52+
HttpRequest
53+
An endpoint + JSON payload ready for :pymethod:`InfBridge._send_request`.
54+
"""
55+
...
56+
57+
def parse_generation_response(
58+
self,
59+
response: dict[str, Any],
60+
) -> HttpGenerationResult:
61+
"""Parse a raw JSON response from the backend into an
62+
:class:`HttpGenerationResult`.
63+
"""
64+
...
65+
66+
def get_pause_request(self) -> HttpRequest:
67+
"""Return the HTTP request that pauses generation on the backend."""
68+
...
69+
70+
def get_resume_request(self) -> HttpRequest:
71+
"""Return the HTTP request that resumes generation on the backend."""
72+
...
73+
74+
def get_generation_max_new_tokens(self, http_req: HttpRequest) -> int:
75+
"""Return the current generation budget encoded in ``http_req``."""
76+
...
77+
78+
def patch_generation_request(
79+
self,
80+
http_req: HttpRequest,
81+
req: ModelRequest,
82+
accumulated_tokens: list[int],
83+
remaining_tokens: int,
84+
) -> None:
85+
"""Mutate ``http_req`` for an abort/resubmit iteration."""
86+
...

areal/experimental/inference_service/data_proxy/app.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,7 @@
1515
from fastapi.responses import StreamingResponse
1616
from flask import Flask
1717

18-
from areal.experimental.inference_service.data_proxy.backend import (
19-
SGLangBridgeBackend,
20-
VLLMBridgeBackend,
21-
)
2218
from areal.experimental.inference_service.data_proxy.config import DataProxyConfig
23-
from areal.experimental.inference_service.data_proxy.inf_bridge import InfBridge
2419
from areal.experimental.inference_service.data_proxy.pause import PauseState
2520
from areal.experimental.inference_service.data_proxy.session import (
2621
ExportTrajectoriesRequest,
@@ -35,6 +30,9 @@
3530
from areal.experimental.inference_service.data_proxy.tokenizer_proxy import (
3631
TokenizerProxy,
3732
)
33+
from areal.experimental.inference_service.inf_bridge import InfBridge
34+
from areal.experimental.inference_service.sglang.bridge import SGLangBridgeBackend
35+
from areal.experimental.inference_service.vllm.bridge import VLLMBridgeBackend
3836
from areal.experimental.openai.client import ArealOpenAI
3937
from areal.experimental.openai.proxy.server import serialize_interactions
4038
from areal.infra.rpc.guard.data_blueprint import (

0 commit comments

Comments
 (0)