Skip to content

Commit fe00fc3

Browse files
committed
refactor: Move cluste config, resource config from deployment to revision
1 parent d650f4d commit fe00fc3

File tree

5 files changed

+136
-98
lines changed

5 files changed

+136
-98
lines changed

docs/manager/graphql-reference/supergraph.graphql

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ input AddModelRevisionInput
2929
{
3030
name: String = null
3131
deploymentId: ID!
32+
clusterConfig: ClusterConfigInput!
33+
resourceConfig: ResourceConfigInput!
3234
image: ImageInput!
3335
modelRuntimeConfig: ModelRuntimeConfigInput!
3436
modelMountConfig: ModelMountConfigInput!
@@ -1130,8 +1132,6 @@ input CreateModelDeploymentInput
11301132
{
11311133
metadata: ModelDeploymentMetadataInput!
11321134
networkAccess: ModelDeploymentNetworkAccessInput!
1133-
clusterConfig: ClusterConfigInput!
1134-
resourceConfig: ResourceConfigInput!
11351135
deploymentStrategy: DeploymentStrategyInput!
11361136
initialRevision: CreateModelRevisionInput!
11371137
}
@@ -1148,6 +1148,8 @@ input CreateModelRevisionInput
11481148
@join__type(graph: STRAWBERRY)
11491149
{
11501150
name: String = null
1151+
clusterConfig: ClusterConfigInput!
1152+
resourceConfig: ResourceConfigInput!
11511153
image: ImageInput!
11521154
modelRuntimeConfig: ModelRuntimeConfigInput!
11531155
modelMountConfig: ModelMountConfigInput!
@@ -2509,8 +2511,6 @@ type ModelDeployment implements Node
25092511
scalingRule: ScalingRule!
25102512
replicaState: ReplicaState!
25112513
deploymentStrategy: DeploymentStrategy!
2512-
clusterConfig: ClusterConfig!
2513-
resourceConfig: ResourceConfig!
25142514
createdUser: UserNode!
25152515
}
25162516

@@ -2636,6 +2636,8 @@ type ModelRevision implements Node
26362636
"""The Globally Unique ID of this object"""
26372637
id: ID!
26382638
name: String!
2639+
clusterConfig: ClusterConfig!
2640+
resourceConfig: ResourceConfig!
26392641
modelRuntimeConfig: ModelRuntimeConfig!
26402642
modelMountConfig: ModelMountConfig!
26412643
extraMounts: [VirtualFolderNode!]!

docs/manager/graphql-reference/v2-schema.graphql

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ schema @link(url: "https://specs.apollo.dev/federation/v2.7", import: ["@externa
88
input AddModelRevisionInput {
99
name: String = null
1010
deploymentId: ID!
11+
clusterConfig: ClusterConfigInput!
12+
resourceConfig: ResourceConfigInput!
1113
image: ImageInput!
1214
modelRuntimeConfig: ModelRuntimeConfigInput!
1315
modelMountConfig: ModelMountConfigInput!
@@ -159,8 +161,6 @@ enum ClusterMode {
159161
input CreateModelDeploymentInput {
160162
metadata: ModelDeploymentMetadataInput!
161163
networkAccess: ModelDeploymentNetworkAccessInput!
162-
clusterConfig: ClusterConfigInput!
163-
resourceConfig: ResourceConfigInput!
164164
deploymentStrategy: DeploymentStrategyInput!
165165
initialRevision: CreateModelRevisionInput!
166166
}
@@ -173,6 +173,8 @@ type CreateModelDeploymentPayload {
173173
"""Added in 25.13.0"""
174174
input CreateModelRevisionInput {
175175
name: String = null
176+
clusterConfig: ClusterConfigInput!
177+
resourceConfig: ResourceConfigInput!
176178
image: ImageInput!
177179
modelRuntimeConfig: ModelRuntimeConfigInput!
178180
modelMountConfig: ModelMountConfigInput!
@@ -308,8 +310,6 @@ type ModelDeployment implements Node {
308310
scalingRule: ScalingRule!
309311
replicaState: ReplicaState!
310312
deploymentStrategy: DeploymentStrategy!
311-
clusterConfig: ClusterConfig!
312-
resourceConfig: ResourceConfig!
313313
createdUser: UserNode!
314314
}
315315

@@ -409,6 +409,8 @@ type ModelRevision implements Node {
409409
"""The Globally Unique ID of this object"""
410410
id: ID!
411411
name: String!
412+
clusterConfig: ClusterConfig!
413+
resourceConfig: ResourceConfig!
412414
modelRuntimeConfig: ModelRuntimeConfig!
413415
modelMountConfig: ModelMountConfig!
414416
extraMounts: [VirtualFolderNode!]!

src/ai/backend/manager/api/gql/model_deployment/model_deployment.py

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from ai.backend.manager.api.gql.federated_types import (
1414
AccessToken,
1515
AutoScalingRule,
16-
ResourceGroup,
1716
User,
1817
)
1918
from ai.backend.manager.api.gql.model_deployment.routing import (
@@ -31,12 +30,6 @@
3130
)
3231

3332

34-
@strawberry.enum(description="Added in 25.13.0")
35-
class ClusterMode(StrEnum):
36-
SINGLE_NODE = "SINGLE_NODE"
37-
MULTI_NODE = "MULTI_NODE"
38-
39-
4033
@strawberry.enum(description="Added in 25.13.0")
4134
class DeploymentStatus(StrEnum):
4235
ACTIVE = "ACTIVE"
@@ -107,12 +100,6 @@ def resolve_connection(
107100
)
108101

109102

110-
@strawberry.type(description="Added in 25.13.0")
111-
class ClusterConfig:
112-
mode: ClusterMode
113-
size: int
114-
115-
116103
@strawberry.type(description="Added in 25.13.0")
117104
class ReplicaState:
118105
desired_replica_count: int
@@ -141,18 +128,6 @@ class ModelDeploymentNetworkAccess:
141128
access_tokens: list[AccessToken]
142129

143130

144-
@strawberry.type(description="Added in 25.13.0")
145-
class ResourceConfig:
146-
resource_group: ResourceGroup
147-
resource_slots: JSONString = strawberry.field(
148-
description='Resource Slots are a JSON string that describes the resources allocated for the deployment. Example: "resourceSlots": "{\\"cpu\\": \\"1\\", \\"mem\\": \\"1073741824\\", \\"cuda.device\\": \\"0\\"}"'
149-
)
150-
resource_opts: Optional[JSONString] = strawberry.field(
151-
description='Resource Options are a JSON string that describes additional options for the resources. This is especially used for shared memory configurations. Example: "resourceOpts": "{\\"shmem\\": \\"64m\\"}"',
152-
default=None,
153-
)
154-
155-
156131
# Main ModelDeployment Type
157132
@strawberry.type(description="Added in 25.13.0")
158133
class ModelDeployment(Node):
@@ -168,8 +143,6 @@ class ModelDeployment(Node):
168143

169144
deployment_strategy: DeploymentStrategy
170145

171-
cluster_config: ClusterConfig
172-
resource_config: ResourceConfig
173146
created_user: User
174147

175148

@@ -253,29 +226,6 @@ class ReplicaStatusChangedPayload:
253226

254227

255228
# Input Types
256-
@strawberry.input(description="Added in 25.13.0")
257-
class ClusterConfigInput:
258-
mode: ClusterMode
259-
size: int
260-
261-
262-
@strawberry.input(description="Added in 25.13.0")
263-
class ResourceGroupInput:
264-
name: str
265-
266-
267-
@strawberry.input(description="Added in 25.13.0")
268-
class ResourceConfigInput:
269-
resource_group: ResourceGroupInput
270-
resource_slots: JSONString = strawberry.field(
271-
description='Resources allocated for the deployment. Example: "resourceSlots": "{\\"cpu\\": \\"1\\", \\"mem\\": \\"1073741824\\", \\"cuda.device\\": \\"0\\"}"'
272-
)
273-
resource_opts: Optional[JSONString] = strawberry.field(
274-
description='Additional options for the resources. This is especially used for shared memory configurations. Example: "resourceOpts": "{\\"shmem\\": \\"64m\\"}"',
275-
default=None,
276-
)
277-
278-
279229
@strawberry.input(description="Added in 25.13.0")
280230
class ModelDeploymentMetadataInput:
281231
name: str
@@ -297,8 +247,6 @@ class DeploymentStrategyInput:
297247
class CreateModelDeploymentInput:
298248
metadata: ModelDeploymentMetadataInput
299249
network_access: ModelDeploymentNetworkAccessInput
300-
cluster_config: ClusterConfigInput
301-
resource_config: ResourceConfigInput
302250
deployment_strategy: DeploymentStrategyInput
303251
initial_revision: CreateModelRevisionInput
304252

@@ -402,18 +350,6 @@ def _generate_mock_global_id() -> str:
402350
open_to_public=True,
403351
access_tokens=[],
404352
),
405-
cluster_config=ClusterConfig(mode=ClusterMode.SINGLE_NODE, size=1),
406-
resource_config=ResourceConfig(
407-
resource_group=ResourceGroup(id=ID(_generate_mock_global_id())),
408-
resource_slots=cast(
409-
JSONString,
410-
'{"cpu": 8, "mem": "32G", "cuda.shares": 1, "cuda.device": 1}',
411-
),
412-
resource_opts=cast(
413-
JSONString,
414-
'{"shmem": "2G", "reserved_time": "24h", "scaling_group": "us-east-1"}',
415-
),
416-
),
417353
revision=mock_model_revision_1,
418354
revision_history=ModelRevisionConnection(
419355
edges=[
@@ -468,18 +404,6 @@ def _generate_mock_global_id() -> str:
468404
open_to_public=False,
469405
access_tokens=[],
470406
),
471-
cluster_config=ClusterConfig(mode=ClusterMode.SINGLE_NODE, size=1),
472-
resource_config=ResourceConfig(
473-
resource_group=ResourceGroup(id=ID(_generate_mock_global_id())),
474-
resource_slots=cast(
475-
JSONString,
476-
'{"cpu": 8, "mem": "32G", "cuda.shares": 1, "cuda.device": 1}',
477-
),
478-
resource_opts=cast(
479-
JSONString,
480-
'{"shmem": "2G", "reserved_time": "24h", "scaling_group": "us-east-1"}',
481-
),
482-
),
483407
revision=mock_model_revision_3,
484408
revision_history=ModelRevisionConnection(
485409
edges=[
@@ -527,18 +451,6 @@ def _generate_mock_global_id() -> str:
527451
open_to_public=False,
528452
access_tokens=[],
529453
),
530-
cluster_config=ClusterConfig(mode=ClusterMode.SINGLE_NODE, size=1),
531-
resource_config=ResourceConfig(
532-
resource_group=ResourceGroup(id=ID(_generate_mock_global_id())),
533-
resource_slots=cast(
534-
JSONString,
535-
'{"cpu": 8, "mem": "32G", "cuda.shares": 1, "cuda.device": 1}',
536-
),
537-
resource_opts=cast(
538-
JSONString,
539-
'{"shmem": "2G", "reserved_time": "24h", "scaling_group": "us-east-1"}',
540-
),
541-
),
542454
revision=None,
543455
revision_history=ModelRevisionConnection(
544456
edges=[],

0 commit comments

Comments
 (0)