Skip to content

Commit f6945ef

Browse files
committed
Setup limits for GPU
1 parent 59651bd commit f6945ef

File tree

126 files changed

+6241
-1884
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+6241
-1884
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ jobs:
137137
smoke/test_vm_deployment_planner
138138
smoke/test_vm_strict_host_tags
139139
smoke/test_vm_schedule
140+
smoke/test_deploy_vgpu_enabled_vm
140141
smoke/test_vm_life_cycle
141142
smoke/test_vm_lifecycle_unmanage_import
142143
smoke/test_vm_snapshot_kvm

agent/src/main/java/com/cloud/agent/properties/AgentProperties.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,15 @@ public class AgentProperties{
213213
*/
214214
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SCRIPT = new Property<>("agent.hooks.libvirt_vm_xml_transformer.script", "libvirt-vm-xml-transformer.groovy");
215215

216+
/**
217+
* This property is used with the agent.hooks.basedir property to define the Libvirt VM XML transformer shell script.<br>
218+
* The shell script is used to execute the Libvirt VM XML transformer script.<br>
219+
* For more information see the agent.properties file.<br>
220+
* Data type: String.<br>
221+
* Default value: <code>libvirt-vm-xml-transformer.sh</code>
222+
*/
223+
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_XML_TRANSFORMER_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_xml_transformer.shell_script", "libvirt-vm-xml-transformer.sh");
224+
216225
/**
217226
* This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_xml_transformer.script properties to define the Libvirt VM XML transformer method.<br>
218227
* Libvirt XML transformer hook does XML-to-XML transformation.<br>
@@ -233,6 +242,15 @@ public class AgentProperties{
233242
*/
234243
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_START_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_start.script", "libvirt-vm-state-change.groovy");
235244

245+
/**
246+
* This property is used with the agent.hooks.basedir property to define the Libvirt VM on start shell script.<br>
247+
* The shell script is used to execute the Libvirt VM on start script.<br>
248+
* For more information see the agent.properties file.<br>
249+
* Data type: String.<br>
250+
* Default value: <code>libvirt-vm-state-change.sh</code>
251+
*/
252+
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_START_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_start.shell_script", "libvirt-vm-state-change.sh");
253+
236254
/**
237255
* This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_on_start.script properties to define the Libvirt VM on start method.<br>
238256
* The hook is called right after Libvirt successfully launched the VM.<br>
@@ -252,6 +270,15 @@ public class AgentProperties{
252270
*/
253271
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_stop.script", "libvirt-vm-state-change.groovy");
254272

273+
/**
274+
* This property is used with the agent.hooks.basedir property to define the Libvirt VM on stop shell script.<br>
275+
* The shell script is used to execute the Libvirt VM on stop script.<br>
276+
* For more information see the agent.properties file.<br>
277+
* Data type: String.<br>
278+
* Default value: <code>libvirt-vm-state-change.sh</code>
279+
*/
280+
public static final Property<String> AGENT_HOOKS_LIBVIRT_VM_ON_STOP_SHELL_SCRIPT = new Property<>("agent.hooks.libvirt_vm_on_stop.shell_script", "libvirt-vm-state-change.sh");
281+
255282
/**
256283
* This property is used with the agent.hooks.basedir and agent.hooks.libvirt_vm_on_stop.script properties to define the Libvirt VM on stop method.<br>
257284
* The hook is called right after libvirt successfully stopped the VM.<br>

api/src/main/java/com/cloud/agent/api/VgpuTypesInfo.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public class VgpuTypesInfo {
4040
private Long maxVgpuPerGpu;
4141
private Long remainingCapacity;
4242
private Long maxCapacity;
43+
private boolean display = false;
4344

4445
public String getModelName() {
4546
return modelName;
@@ -53,18 +54,34 @@ public Long getVideoRam() {
5354
return videoRam;
5455
}
5556

57+
public void setVideoRam(Long videoRam) {
58+
this.videoRam = videoRam;
59+
}
60+
5661
public Long getMaxHeads() {
5762
return maxHeads;
5863
}
5964

65+
public void setMaxHeads(Long maxHeads) {
66+
this.maxHeads = maxHeads;
67+
}
68+
6069
public Long getMaxResolutionX() {
6170
return maxResolutionX;
6271
}
6372

73+
public void setMaxResolutionX(Long maxResolutionX) {
74+
this.maxResolutionX = maxResolutionX;
75+
}
76+
6477
public Long getMaxResolutionY() {
6578
return maxResolutionY;
6679
}
6780

81+
public void setMaxResolutionY(Long maxResolutionY) {
82+
this.maxResolutionY = maxResolutionY;
83+
}
84+
6885
public Long getMaxVpuPerGpu() {
6986
return maxVgpuPerGpu;
7087
}
@@ -177,6 +194,14 @@ public void setVmName(String vmName) {
177194
this.vmName = vmName;
178195
}
179196

197+
public boolean isDisplay() {
198+
return display;
199+
}
200+
201+
public void setDisplay(boolean display) {
202+
this.display = display;
203+
}
204+
180205
public VgpuTypesInfo(GpuDevice.DeviceType deviceType, String groupName, String modelName, String busAddress,
181206
String vendorId, String vendorName, String deviceId, String deviceName, String numaNode, String pciRoot
182207
) {

api/src/main/java/com/cloud/capacity/Capacity.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public interface Capacity extends InternalIdentity, Identity {
3333
public static final short CAPACITY_TYPE_DIRECT_ATTACHED_PUBLIC_IP = 8;
3434
public static final short CAPACITY_TYPE_LOCAL_STORAGE = 9;
3535
public static final short CAPACITY_TYPE_VIRTUAL_NETWORK_IPV6_SUBNET = 10;
36-
public static final short CAPACITY_TYPE_GPU = 19;
36+
public static final short CAPACITY_TYPE_GPU = 11;
3737

3838
public static final short CAPACITY_TYPE_CPU_CORE = 90;
3939

api/src/main/java/com/cloud/user/ResourceLimitService.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ public interface ResourceLimitService {
5050
"The default maximum number of projects that can be created for an account",false);
5151
static final ConfigKey<Long> DefaultMaxDomainProjects = new ConfigKey<>("Domain Defaults",Long.class,"max.domain.projects","50",
5252
"The default maximum number of projects that can be created for a domain",false);
53+
static final ConfigKey<Long> DefaultMaxAccountGpus = new ConfigKey<>("Account Defaults",Long.class,"max.account.gpus","20",
54+
"The default maximum number of GPU devices that can be used for an account", false);
55+
static final ConfigKey<Long> DefaultMaxDomainGpus = new ConfigKey<>("Domain Defaults",Long.class,"max.domain.gpus","20",
56+
"The default maximum number of GPU devices that can be used for a domain", false);
57+
static final ConfigKey<Long> DefaultMaxProjectGpus = new ConfigKey<>("Project Defaults",Long.class,"max.project.gpus","20",
58+
"The default maximum number of GPU devices that can be used for a project", false);
5359

5460
static final List<ResourceType> HostTagsSupportingTypes = List.of(ResourceType.user_vm, ResourceType.cpu, ResourceType.memory, ResourceType.gpu);
5561
static final List<ResourceType> StorageTagsSupportingTypes = List.of(ResourceType.volume, ResourceType.primary_storage);
@@ -284,4 +290,8 @@ void checkVmResourceLimitsForTemplateChange(Account owner, Boolean display, Serv
284290
void incrementVmMemoryResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long memory);
285291
void decrementVmMemoryResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long memory);
286292

293+
void checkVmGpuResourceLimit(Account owner, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu) throws ResourceAllocationException;
294+
void incrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu);
295+
void decrementVmGpuResourceCount(long accountId, Boolean display, ServiceOffering serviceOffering, VirtualMachineTemplate template, Long gpu);
296+
287297
}

api/src/main/java/org/apache/cloudstack/api/ApiConstants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,9 @@ public class ApiConstants {
390390
public static final String GPU_CARD_ID = "gpucardid";
391391
public static final String GPU_CARD_NAME = "gpucardname";
392392
public static final String GPU_COUNT = "gpucount";
393+
public static final String GPU_DISPLAY = "gpudisplay";
393394
public static final String GPU_DEVICE_TYPE = "gpudevicetype";
395+
public static final String GPU_ENABLED = "gpuenabled";
394396
public static final String MAX_VGPU_PER_PHYSICAL_GPU = "maxvgpuperphysicalgpu";
395397
public static final String GUEST_OS_LIST = "guestoslist";
396398
public static final String GUEST_OS_COUNT = "guestoscount";

api/src/main/java/org/apache/cloudstack/api/BaseCmd.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.cloudstack.alert.AlertService;
4040
import org.apache.cloudstack.annotation.AnnotationService;
4141
import org.apache.cloudstack.context.CallContext;
42+
import org.apache.cloudstack.gpu.GpuService;
4243
import org.apache.cloudstack.network.RoutedIpv4Manager;
4344
import org.apache.cloudstack.network.lb.ApplicationLoadBalancerService;
4445
import org.apache.cloudstack.network.lb.InternalLoadBalancerVMService;
@@ -130,6 +131,8 @@ public static enum CommandType {
130131
@Inject
131132
public UserVmService _userVmService;
132133
@Inject
134+
public GpuService gpuService;
135+
@Inject
133136
public ManagementService _mgr;
134137
@Inject
135138
public StorageService _storageService;

api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuCardCmd.java

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,18 +30,13 @@
3030
import org.apache.cloudstack.api.ServerApiException;
3131
import org.apache.cloudstack.api.response.GpuCardResponse;
3232
import org.apache.cloudstack.gpu.GpuCard;
33-
import org.apache.cloudstack.gpu.GpuService;
3433

35-
import javax.inject.Inject;
3634

3735
@APICommand(name = "createGpuCard", description = "Creates a GPU card definition in the system",
38-
responseObject = GpuCardResponse.class,
39-
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, since = "4.21.0")
36+
responseObject = GpuCardResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
37+
since = "4.21.0")
4038
public class CreateGpuCardCmd extends BaseCmd {
4139

42-
@Inject
43-
private GpuService gpuService;
44-
4540
/// //////////////////////////////////////////////////
4641
/// ///////////// API parameters /////////////////////
4742
/// //////////////////////////////////////////////////
@@ -54,8 +49,8 @@ public class CreateGpuCardCmd extends BaseCmd {
5449
description = "the device name of the GPU card")
5550
private String deviceName;
5651

57-
@Parameter(name = ApiConstants.NAME, type = CommandType.STRING, required = true, description
58-
= "the display name of the GPU card")
52+
@Parameter(name = ApiConstants.NAME, type = CommandType.STRING, required = true,
53+
description = "the display name of the GPU card")
5954
private String name;
6055

6156
@Parameter(name = ApiConstants.VENDOR_NAME, type = CommandType.STRING, required = true,
@@ -66,6 +61,11 @@ public class CreateGpuCardCmd extends BaseCmd {
6661
description = "the vendor ID of the GPU card")
6762
private String vendorId;
6863

64+
// Optional parameters for the passthrough vGPU profile display properties
65+
@Parameter(name = ApiConstants.VIDEORAM, type = CommandType.LONG,
66+
description = "the video RAM size in MB for the passthrough vGPU profile")
67+
private Long videoRam;
68+
6969
/// //////////////////////////////////////////////////
7070
/// //////////////// Accessors ///////////////////////
7171
/// //////////////////////////////////////////////////
@@ -90,23 +90,24 @@ public String getVendorId() {
9090
return vendorId;
9191
}
9292

93+
public Long getVideoRam() {
94+
return videoRam;
95+
}
96+
9397
@Override
94-
public void execute() throws ResourceUnavailableException, InsufficientCapacityException,
95-
ServerApiException, ConcurrentOperationException,
96-
ResourceAllocationException, NetworkRuleConflictException {
98+
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException,
99+
ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
97100
try {
98101
GpuCard gpuCard = gpuService.createGpuCard(this);
99102
if (gpuCard != null) {
100103
GpuCardResponse response = new GpuCardResponse(gpuCard);
101104
response.setResponseName(getCommandName());
102105
setResponseObject(response);
103106
} else {
104-
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR,
105-
"Failed to create GPU card");
107+
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to create GPU card");
106108
}
107109
} catch (Exception e) {
108-
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR,
109-
"Failed to create GPU card: " + e.getMessage());
110+
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to create GPU card: " + e.getMessage());
110111
}
111112
}
112113

api/src/main/java/org/apache/cloudstack/api/command/admin/gpu/CreateGpuDeviceCmd.java

Lines changed: 19 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package org.apache.cloudstack.api.command.admin.gpu;
1818

1919
import com.cloud.user.Account;
20+
import org.apache.cloudstack.acl.RoleType;
2021
import org.apache.cloudstack.api.APICommand;
2122
import org.apache.cloudstack.api.ApiConstants;
2223
import org.apache.cloudstack.api.ApiErrorCode;
@@ -28,73 +29,44 @@
2829
import org.apache.cloudstack.api.response.HostResponse;
2930
import org.apache.cloudstack.api.response.VgpuProfileResponse;
3031
import org.apache.cloudstack.gpu.GpuDevice;
31-
import org.apache.cloudstack.gpu.GpuService;
3232
import org.apache.commons.lang3.EnumUtils;
3333
import org.apache.commons.lang3.StringUtils;
3434

35-
import javax.inject.Inject;
3635

37-
@APICommand(name = "createGpuDevice",
38-
description = "Creates a GPU device manually on a host",
39-
responseObject = GpuDeviceResponse.class,
40-
since = "4.21.0",
41-
requestHasSensitiveInfo = false,
42-
responseHasSensitiveInfo = false)
36+
@APICommand(name = "createGpuDevice", description = "Creates a GPU device manually on a host",
37+
responseObject = GpuDeviceResponse.class, since = "4.21.0", requestHasSensitiveInfo = false,
38+
responseHasSensitiveInfo = false, authorized = {RoleType.Admin})
4339
public class CreateGpuDeviceCmd extends BaseCmd {
4440

45-
@Inject
46-
private GpuService gpuService;
47-
48-
@Parameter(name = ApiConstants.HOST_ID,
49-
type = CommandType.UUID,
50-
entityType = HostResponse.class,
51-
required = true,
52-
description = "ID of the host where the GPU device is located")
41+
@Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, required = true,
42+
description = "ID of the host where the GPU device is located")
5343
private Long hostId;
5444

55-
@Parameter(name = ApiConstants.BUS_ADDRESS,
56-
type = CommandType.STRING,
57-
required = true,
58-
description = "PCI bus address of the GPU device (e.g., 0000:01:00.0) or UUID for MDEV devices.")
45+
@Parameter(name = ApiConstants.BUS_ADDRESS, type = CommandType.STRING, required = true,
46+
description = "PCI bus address of the GPU device (e.g., 0000:01:00.0) or UUID for MDEV devices.")
5947
private String busAddress;
6048

61-
@Parameter(name = ApiConstants.GPU_CARD_ID,
62-
type = CommandType.UUID,
63-
entityType = GpuCardResponse.class,
64-
required = true,
65-
description = "ID of the GPU card type")
49+
@Parameter(name = ApiConstants.GPU_CARD_ID, type = CommandType.UUID, entityType = GpuCardResponse.class,
50+
required = true, description = "ID of the GPU card type")
6651
private Long gpuCardId;
6752

68-
@Parameter(name = ApiConstants.VGPU_PROFILE_ID,
69-
type = CommandType.UUID,
70-
entityType = VgpuProfileResponse.class,
71-
required = true,
72-
description = "ID of the vGPU profile")
53+
@Parameter(name = ApiConstants.VGPU_PROFILE_ID, type = CommandType.UUID, entityType = VgpuProfileResponse.class,
54+
required = true, description = "ID of the vGPU profile")
7355
private Long vgpuProfileId;
7456

75-
@Parameter(name = ApiConstants.TYPE,
76-
type = CommandType.STRING,
77-
description = "Type of GPU device (PCI, MDEV, VGPUOnly). Defaults to PCI.")
57+
@Parameter(name = ApiConstants.TYPE, type = CommandType.STRING,
58+
description = "Type of GPU device (PCI, MDEV, VGPUOnly). Defaults to PCI.")
7859
private String type;
7960

80-
@Parameter(name = ApiConstants.PARENT_GPU_DEVICE_ID,
81-
type = CommandType.UUID,
82-
entityType = GpuDeviceResponse.class,
83-
description = "ID of the parent GPU device (for virtual GPU devices)")
61+
@Parameter(name = ApiConstants.PARENT_GPU_DEVICE_ID, type = CommandType.UUID, entityType = GpuDeviceResponse.class,
62+
description = "ID of the parent GPU device (for virtual GPU devices)")
8463
private Long parentGpuDeviceId;
8564

86-
@Parameter(name = ApiConstants.NUMA_NODE,
87-
type = CommandType.STRING,
88-
entityType = GpuDeviceResponse.class,
89-
description = "NUMA node of the GPU device (e.g., 0, 1, etc.). This is optional and can be used to specify the NUMA node for the GPU device which is used during allocation. Defaults to -1")
65+
@Parameter(name = ApiConstants.NUMA_NODE, type = CommandType.STRING,
66+
description = "NUMA node of the GPU device (e.g., 0, 1, etc.). This is optional and can be used to "
67+
+ "specify the NUMA node for the GPU device which is used during allocation. Defaults to -1")
9068
private String numaNode;
9169

92-
@Parameter(name = ApiConstants.PCI_ROOT,
93-
type = CommandType.STRING,
94-
entityType = GpuDeviceResponse.class,
95-
description = "PCI root of the GPU device.")
96-
private String pciRoot;
97-
9870
public Long getHostId() {
9971
return hostId;
10072
}
@@ -133,10 +105,6 @@ public String getNumaNode() {
133105
return numaNode;
134106
}
135107

136-
public String getPciRoot() {
137-
return pciRoot;
138-
}
139-
140108
@Override
141109
public void execute() {
142110
try {

0 commit comments

Comments
 (0)