mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-6357: Not able to select GPU card in case of GPU-passthrough.
This commit is contained in:
parent
09f83e48eb
commit
2ae9da8d47
@ -23,10 +23,6 @@ public class GPU {
|
||||
pciDevice,
|
||||
vgpuType
|
||||
}
|
||||
public enum Type {
|
||||
GPU_Passthrough,
|
||||
VGPU
|
||||
}
|
||||
|
||||
public enum vGPUType {
|
||||
GRID_K100("GRID K100"),
|
||||
|
||||
@ -151,26 +151,29 @@ public interface ResourceManager extends ResourceService {
|
||||
/**
|
||||
* Check if host has GPU devices available
|
||||
* @param hostId the host to be checked
|
||||
* @param groupName: gpuCard name
|
||||
* @param vgpuType the VGPU type
|
||||
* @return true when the host has the capacity with given VGPU type
|
||||
*/
|
||||
boolean isGPUDeviceAvailable(long hostId, String vgpuType);
|
||||
boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType);
|
||||
|
||||
/**
|
||||
* Get available GPU device
|
||||
* @param hostId the host to be checked
|
||||
* @param groupName: gpuCard name
|
||||
* @param vgpuType the VGPU type
|
||||
* @return GPUDeviceTO[]
|
||||
*/
|
||||
GPUDeviceTO getGPUDevice(long hostId, String vgpuType);
|
||||
GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType);
|
||||
|
||||
/**
|
||||
* Return listof available GPU devices
|
||||
* @param hostId, the host to be checked
|
||||
* @param groupName: gpuCard name
|
||||
* @param vgpuType the VGPU type
|
||||
* @return List of HostGpuGroupsVO.
|
||||
*/
|
||||
List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType);
|
||||
List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType);
|
||||
|
||||
/**
|
||||
* Update GPU device details (post VM deployment)
|
||||
|
||||
@ -278,11 +278,13 @@ public class FirstFitAllocator extends AdapterBase implements HostAllocator {
|
||||
}
|
||||
|
||||
// Check if GPU device is required by offering and host has the availability
|
||||
if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null
|
||||
&& !_resourceMgr.isGPUDeviceAvailable(host.getId(), offeringDetails.getValue())){
|
||||
if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null) {
|
||||
ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.pciDevice.toString());
|
||||
if(!_resourceMgr.isGPUDeviceAvailable(host.getId(), groupName.getValue(), offeringDetails.getValue())){
|
||||
s_logger.info("Host name: " + host.getName() + ", hostId: "+ host.getId() +" does not have required GPU devices available");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
int cpu_requested = offering.getCpu() * offering.getSpeed();
|
||||
long ram_requested = offering.getRamSize() * 1024L * 1024L;
|
||||
|
||||
@ -2086,46 +2086,32 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
|
||||
List<ServiceOfferingDetailsVO> detailsVO = null;
|
||||
if (details != null) {
|
||||
// Check if the user has passed the gpu-type before passing the VGPU type
|
||||
if (!details.containsKey(GPU.Keys.pciDevice.toString()) && details.containsKey(GPU.Keys.vgpuType.toString())) {
|
||||
throw new InvalidParameterValueException("Please specify the gpu type");
|
||||
if (!details.containsKey(GPU.Keys.pciDevice.toString()) || !details.containsKey(GPU.Keys.vgpuType.toString())) {
|
||||
throw new InvalidParameterValueException("Please specify the pciDevice and vgpuType correctly.");
|
||||
}
|
||||
detailsVO = new ArrayList<ServiceOfferingDetailsVO>();
|
||||
for (Entry<String, String> detailEntry : details.entrySet()) {
|
||||
String value = null;
|
||||
if (detailEntry.getKey().equals(GPU.Keys.pciDevice.toString())) {
|
||||
for (GPU.Type type : GPU.Type.values()) {
|
||||
if (detailEntry.getValue().equals(type.toString())) {
|
||||
value = detailEntry.getValue();
|
||||
}
|
||||
}
|
||||
if (value == null) {
|
||||
throw new InvalidParameterValueException("Please specify valid gpu type");
|
||||
if (detailEntry.getValue() == null) {
|
||||
throw new InvalidParameterValueException("Please specify a GPU Card.");
|
||||
}
|
||||
}
|
||||
if (detailEntry.getKey().equals(GPU.Keys.vgpuType.toString())) {
|
||||
if (details.get(GPU.Keys.pciDevice.toString()).equals(GPU.Type.GPU_Passthrough.toString())) {
|
||||
throw new InvalidParameterValueException("vgpuTypes are supported only with vGPU pciDevice");
|
||||
}
|
||||
if (detailEntry.getValue() == null) {
|
||||
throw new InvalidParameterValueException("With vGPU as pciDevice, vGPUType value cannot be null");
|
||||
throw new InvalidParameterValueException("vGPUType value cannot be null");
|
||||
}
|
||||
for (GPU.vGPUType entry : GPU.vGPUType.values()) {
|
||||
if (detailEntry.getValue().equals(entry.getType())) {
|
||||
value = entry.getType();
|
||||
}
|
||||
}
|
||||
if (value == null || detailEntry.getValue().equals(GPU.vGPUType.passthrough.getType())) {
|
||||
if (value == null) {
|
||||
throw new InvalidParameterValueException("Please specify valid vGPU type");
|
||||
}
|
||||
}
|
||||
detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(), detailEntry.getKey(), detailEntry.getValue(), true));
|
||||
}
|
||||
// If pciDevice type is passed, put the default VGPU type as 'passthrough'
|
||||
if (details.containsKey(GPU.Keys.pciDevice.toString())
|
||||
&& !details.containsKey(GPU.Keys.vgpuType.toString())) {
|
||||
detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(),
|
||||
GPU.Keys.vgpuType.toString(), GPU.vGPUType.passthrough.getType(), true));
|
||||
}
|
||||
}
|
||||
|
||||
if ((offering = _serviceOfferingDao.persist(offering)) != null) {
|
||||
|
||||
@ -363,9 +363,11 @@ public class DeploymentPlanningManagerImpl extends ManagerBase implements Deploy
|
||||
} else if (_capacityMgr.checkIfHostReachMaxGuestLimit(host)) {
|
||||
s_logger.debug("The last Host, hostId: " + host.getId() +
|
||||
" already has max Running VMs(count includes system VMs), skipping this and trying other available hosts");
|
||||
} else if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null
|
||||
&& !_resourceMgr.isGPUDeviceAvailable(host.getId(), offeringDetails.getValue())){
|
||||
} else if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
|
||||
ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
|
||||
if(!_resourceMgr.isGPUDeviceAvailable(host.getId(), groupName.getValue(), offeringDetails.getValue())){
|
||||
s_logger.debug("The last host of this VM does not have required GPU devices available");
|
||||
}
|
||||
} else {
|
||||
if (host.getStatus() == Status.Up && host.getResourceState() == ResourceState.Enabled) {
|
||||
boolean hostTagsMatch = true;
|
||||
|
||||
@ -144,7 +144,8 @@ public abstract class HypervisorGuruBase extends AdapterBase implements Hypervis
|
||||
// Set GPU details
|
||||
ServiceOfferingDetailsVO offeringDetail = null;
|
||||
if ((offeringDetail = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
|
||||
to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), offeringDetail.getValue()));
|
||||
ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
|
||||
to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), groupName.getValue(), offeringDetail.getValue()));
|
||||
}
|
||||
|
||||
// Workaround to make sure the TO has the UUID we need for Niciri integration
|
||||
|
||||
@ -97,7 +97,6 @@ import com.cloud.exception.DiscoveryException;
|
||||
import com.cloud.exception.InvalidParameterValueException;
|
||||
import com.cloud.exception.PermissionDeniedException;
|
||||
import com.cloud.exception.ResourceInUseException;
|
||||
import com.cloud.gpu.GPU.vGPUType;
|
||||
import com.cloud.gpu.HostGpuGroupsVO;
|
||||
import com.cloud.gpu.VGPUTypesVO;
|
||||
import com.cloud.gpu.dao.HostGpuGroupsDao;
|
||||
@ -1349,6 +1348,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
|
||||
|
||||
_gpuAvailability = _hostGpuGroupsDao.createSearchBuilder();
|
||||
_gpuAvailability.and("hostId", _gpuAvailability.entity().getHostId(), Op.EQ);
|
||||
_gpuAvailability.and("groupName", _gpuAvailability.entity().getGroupName(), Op.EQ);
|
||||
SearchBuilder<VGPUTypesVO> join1 = _vgpuTypesDao.createSearchBuilder();
|
||||
join1.and("vgpuType", join1.entity().getVgpuType(), Op.EQ);
|
||||
join1.and("remainingCapacity", join1.entity().getRemainingCapacity(), Op.GT);
|
||||
@ -2508,21 +2508,19 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType) {
|
||||
if (vgpuType == null) {
|
||||
vgpuType = vGPUType.passthrough.getType();
|
||||
}
|
||||
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType) {
|
||||
Filter searchFilter = new Filter(VGPUTypesVO.class, "remainingCapacity", false, null, null);
|
||||
SearchCriteria<HostGpuGroupsVO> sc = _gpuAvailability.create();
|
||||
sc.setParameters("hostId", hostId);
|
||||
sc.setParameters("groupName", groupName);
|
||||
sc.setJoinParameters("groupId", "vgpuType", vgpuType);
|
||||
sc.setJoinParameters("groupId", "remainingCapacity", 0);
|
||||
return _hostGpuGroupsDao.customSearch(sc, searchFilter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
|
||||
if(!listAvailableGPUDevice(hostId, vgpuType).isEmpty()) {
|
||||
public boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType) {
|
||||
if(!listAvailableGPUDevice(hostId, groupName, vgpuType).isEmpty()) {
|
||||
return true;
|
||||
} else {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
@ -2533,8 +2531,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
|
||||
}
|
||||
|
||||
@Override
|
||||
public GPUDeviceTO getGPUDevice(long hostId, String vgpuType) {
|
||||
HostGpuGroupsVO gpuDevice = listAvailableGPUDevice(hostId, vgpuType).get(0);
|
||||
public GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType) {
|
||||
HostGpuGroupsVO gpuDevice = listAvailableGPUDevice(hostId, groupName, vgpuType).get(0);
|
||||
return new GPUDeviceTO(gpuDevice.getGroupName(), vgpuType, null);
|
||||
}
|
||||
|
||||
|
||||
@ -558,19 +558,19 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
|
||||
public boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType) {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GPUDeviceTO getGPUDevice(long hostId, String vgpuType) {
|
||||
public GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType) {
|
||||
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType) {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -370,25 +370,16 @@
|
||||
description: ''
|
||||
});
|
||||
items.push({
|
||||
id: 'GPU_Passthrough',
|
||||
description: 'GPU-Passthrough'
|
||||
id: 'Group of NVIDIA Corporation GK107GL [GRID K1] GPUs',
|
||||
description: 'NVIDIA GRID K1'
|
||||
});
|
||||
items.push({
|
||||
id: 'VGPU',
|
||||
description: 'VGPU'
|
||||
id: 'Group of NVIDIA Corporation GK104GL [GRID K2] GPUs',
|
||||
description: 'NVIDIA GRID K2'
|
||||
});
|
||||
args.response.success({
|
||||
data: items
|
||||
});
|
||||
args.$select.change(function() {
|
||||
var $form = $(this).closest('form');
|
||||
var $fields = $form.find('.field');
|
||||
if (($(this).val() == "") || $(this).val() == "GPU-Passthrough") {
|
||||
$form.find('[rel=vgpuType]').hide();
|
||||
} else if ($(this).val() == "VGPU") {
|
||||
$form.find('[rel=vgpuType]').css('display', 'block');
|
||||
}
|
||||
});
|
||||
}
|
||||
},
|
||||
|
||||
@ -400,6 +391,10 @@
|
||||
id: '',
|
||||
description: ''
|
||||
});
|
||||
items.push({
|
||||
id: 'passthrough',
|
||||
description: 'passthrough'
|
||||
});
|
||||
items.push({
|
||||
id: 'GRID K100',
|
||||
description: 'GRID K100'
|
||||
@ -499,7 +494,7 @@
|
||||
array1.push("&serviceofferingdetails[1].value" + "=" + args.data.pciDevice);
|
||||
}
|
||||
|
||||
if (args.data.pciDevice == "VGPU") {
|
||||
if (args.data.vgpuType != "") {
|
||||
array1.push("&serviceofferingdetails[2].key" + "=" + "vgpuType");
|
||||
array1.push("&serviceofferingdetails[2].value" + "=" + args.data.vgpuType);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user