Skip to content

Unify RPS and Concurrent Scheduler Paths #233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/guidellm/benchmark/aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
GenerationRequest,
GenerativeRequestLoaderDescription,
RequestLoaderDescription,
RequestT,
ResponseT,
)
from guidellm.scheduler import (
GenerativeRequestsWorkerDescription,
RequestT,
ResponseT,
SchedulerRequestResult,
WorkerDescription,
)
Expand Down
4 changes: 2 additions & 2 deletions src/guidellm/benchmark/benchmarker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
GenerationRequest,
GenerativeRequestLoaderDescription,
RequestLoaderDescription,
RequestT,
ResponseT,
)
from guidellm.scheduler import (
GenerativeRequestsWorker,
RequestsWorker,
RequestT,
ResponseT,
Scheduler,
SchedulerRequestResult,
SchedulingStrategy,
Expand Down
6 changes: 6 additions & 0 deletions src/guidellm/request/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
RequestLoaderDescription,
)
from .request import GenerationRequest
from .session import GenerativeRequestSession, RequestSession
from .types import RequestT, ResponseT

__all__ = [
"GenerationRequest",
"GenerativeRequestLoader",
"GenerativeRequestLoaderDescription",
"GenerativeRequestSession",
"RequestLoader",
"RequestLoaderDescription",
"RequestSession",
"RequestT",
"ResponseT",
]
9 changes: 5 additions & 4 deletions src/guidellm/request/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from guidellm.dataset import ColumnInputTypes, load_dataset
from guidellm.objects import StandardBaseModel
from guidellm.request.request import GenerationRequest
from guidellm.request.session import GenerativeRequestSession

__all__ = [
"GenerativeRequestLoader",
Expand All @@ -30,10 +31,10 @@ class RequestLoaderDescription(StandardBaseModel):

class RequestLoader(Iterable):
@abstractmethod
def __iter__(self): ...
def __iter__(self) -> Iterator: ...

@abstractmethod
def __len__(self): ...
def __len__(self) -> int: ...

@property
@abstractmethod
Expand Down Expand Up @@ -105,14 +106,14 @@ def __init__(
self.preserve_iter_state = iter_type == "infinite" # ensure no caching requests
self._preserved_iter = None

def __iter__(self) -> Iterator[GenerationRequest]:
def __iter__(self) -> Iterator[GenerativeRequestSession]:
scope_create_count = 0

while (dataset_iter := self._get_dataset_iter(scope_create_count)) is not None:
scope_create_count += 1

for item in dataset_iter:
yield self._create_request(item)
yield GenerativeRequestSession(self._create_request(item))

self._preserved_iter = None

Expand Down
52 changes: 52 additions & 0 deletions src/guidellm/request/session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from abc import ABC, abstractmethod
from typing import Generic, TypeVar

from guidellm.backend.response import ResponseSummary
from guidellm.request.request import GenerationRequest

__all__ = ["GenerativeRequestSession", "RequestSession"]

# TODO: Replace with specific types that implement needed features
RequestT = TypeVar("RequestT")
ResponseT = TypeVar("ResponseT")


class RequestSession(ABC, Generic[RequestT, ResponseT]):
@abstractmethod
def __len__(self) -> int: ...

@abstractmethod
def get_next_request(self) -> RequestT: ...

@abstractmethod
def get_next_delay(self) -> float: ...

@abstractmethod
def push_response(self, response: ResponseT) -> None: ...

@property
@abstractmethod
def complete(self) -> bool: ...


# TODO: Implement multiturn support
class GenerativeRequestSession(RequestSession[GenerationRequest, ResponseSummary]):
def __init__(self, request: GenerationRequest) -> None:
self.request = request
self._complete = False

def __len__(self) -> int:
return 1

def get_next_request(self) -> GenerationRequest:
return self.request

def get_next_delay(self) -> float:
return 0.0

def push_response(self, response: ResponseSummary) -> None: # noqa: ARG002
self._complete = True

@property
def complete(self) -> bool:
return self._complete
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from typing import TypeVar

__all__ = ["RequestT", "ResponseT"]
__all__ = [
"RequestT",
"ResponseT",
]


RequestT = TypeVar("RequestT")
Expand Down
5 changes: 0 additions & 5 deletions src/guidellm/scheduler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
ThroughputStrategy,
strategy_display_str,
)
from .types import RequestT, ResponseT
from .worker import (
GenerativeRequestsWorker,
GenerativeRequestsWorkerDescription,
RequestsWorker,
ResolveStatus,
WorkerDescription,
WorkerProcessRequest,
WorkerProcessResult,
)

Expand All @@ -32,10 +30,8 @@
"ConcurrentStrategy",
"GenerativeRequestsWorker",
"GenerativeRequestsWorkerDescription",
"RequestT",
"RequestsWorker",
"ResolveStatus",
"ResponseT",
"Scheduler",
"SchedulerRequestInfo",
"SchedulerRequestResult",
Expand All @@ -46,7 +42,6 @@
"SynchronousStrategy",
"ThroughputStrategy",
"WorkerDescription",
"WorkerProcessRequest",
"WorkerProcessResult",
"strategy_display_str",
]
33 changes: 32 additions & 1 deletion src/guidellm/scheduler/result.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
from dataclasses import dataclass
from queue import Queue
from typing import (
Generic,
Literal,
Optional,
)

from guidellm.objects import StandardBaseModel
from guidellm.request.session import RequestSession
from guidellm.request.types import RequestT, ResponseT
from guidellm.scheduler.strategy import SchedulingStrategy
from guidellm.scheduler.types import RequestT, ResponseT

__all__ = [
"MPQueues",
"SchedulerRequestInfo",
"SchedulerRequestResult",
"SchedulerResult",
"SchedulerRunInfo",
"WorkerProcessRequestTime",
"WorkerProcessResult",
]


Expand Down Expand Up @@ -135,3 +141,28 @@ class SchedulerRequestResult(
request: RequestT
request_info: SchedulerRequestInfo
response: Optional[ResponseT] = None


# TODO: Move dataclasses somewhere else


@dataclass
class WorkerProcessRequestTime:
start_time: float
timeout_time: float
queued_time: float


@dataclass
class WorkerProcessResult(Generic[RequestT, ResponseT]):
type_: Literal["request_scheduled", "request_start", "request_complete"]
request: RequestT
response: Optional[ResponseT]
info: SchedulerRequestInfo


@dataclass
class MPQueues(Generic[RequestT, ResponseT]):
requests: Queue[RequestSession[RequestT, ResponseT]]
times: Queue[WorkerProcessRequestTime]
responses: Queue[WorkerProcessResult[RequestT, ResponseT]]
Loading
Loading