CLOUDSTACK-6357: Not able to select GPU card in case of GPU-passthrough.

2025-12-16 10:32:34 +01:00 · 2014-04-08 22:03:37 +05:30 · 2014-04-08 22:03:37 +05:30 · 2ae9da8d47
commit 2ae9da8d47
parent 09f83e48eb
9 changed files with 47 additions and 64 deletions
--- a/api/src/com/cloud/gpu/GPU.java
+++ b/api/src/com/cloud/gpu/GPU.java
@ -23,10 +23,6 @@ public class GPU {
        pciDevice,
        vgpuType
    }
    public enum Type {
        GPU_Passthrough,
        VGPU
    }
    public enum vGPUType {
        GRID_K100("GRID K100"),
--- a/engine/components-api/src/com/cloud/resource/ResourceManager.java
+++ b/engine/components-api/src/com/cloud/resource/ResourceManager.java
@ -151,26 +151,29 @@ public interface ResourceManager extends ResourceService {
    /**
     * Check if host has GPU devices available
     * @param hostId the host to be checked
     * @param groupName: gpuCard name
     * @param vgpuType the VGPU type
     * @return true when the host has the capacity with given VGPU type
     */
-    boolean isGPUDeviceAvailable(long hostId, String vgpuType);
+    boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType);
    /**
     * Get available GPU device
     * @param hostId the host to be checked
     * @param groupName: gpuCard name
     * @param vgpuType the VGPU type
     * @return GPUDeviceTO[]
     */
-    GPUDeviceTO getGPUDevice(long hostId, String vgpuType);
+    GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType);
    /**
     * Return listof available GPU devices
     * @param hostId, the host to be checked
     * @param groupName: gpuCard name
     * @param vgpuType the VGPU type
     * @return List of HostGpuGroupsVO.
     */
-    List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType);
+    List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType);
    /**
     * Update GPU device details (post VM deployment)
--- a/server/src/com/cloud/agent/manager/allocator/impl/FirstFitAllocator.java
+++ b/server/src/com/cloud/agent/manager/allocator/impl/FirstFitAllocator.java
@ -278,11 +278,13 @@ public class FirstFitAllocator extends AdapterBase implements HostAllocator {
            }
            // Check if GPU device is required by offering and host has the availability
-            if ((offeringDetails   = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null
+            if ((offeringDetails   = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null) {
-                    && !_resourceMgr.isGPUDeviceAvailable(host.getId(), offeringDetails.getValue())){
+                ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.pciDevice.toString());
                if(!_resourceMgr.isGPUDeviceAvailable(host.getId(), groupName.getValue(), offeringDetails.getValue())){
                    s_logger.info("Host name: " + host.getName() + ", hostId: "+ host.getId() +" does not have required GPU devices available");
                    continue;
                }
            }
            int cpu_requested = offering.getCpu() * offering.getSpeed();
            long ram_requested = offering.getRamSize() * 1024L * 1024L;
--- a/server/src/com/cloud/configuration/ConfigurationManagerImpl.java
+++ b/server/src/com/cloud/configuration/ConfigurationManagerImpl.java
@ -2086,46 +2086,32 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
        List<ServiceOfferingDetailsVO> detailsVO = null;
        if (details != null) {
            // Check if the user has passed the gpu-type before passing the VGPU type
-            if (!details.containsKey(GPU.Keys.pciDevice.toString()) && details.containsKey(GPU.Keys.vgpuType.toString())) {
+            if (!details.containsKey(GPU.Keys.pciDevice.toString()) || !details.containsKey(GPU.Keys.vgpuType.toString())) {
-                throw new InvalidParameterValueException("Please specify the gpu type");
+                throw new InvalidParameterValueException("Please specify the pciDevice and vgpuType correctly.");
            }
            detailsVO = new ArrayList<ServiceOfferingDetailsVO>();
            for (Entry<String, String> detailEntry : details.entrySet()) {
                String value = null;
                if (detailEntry.getKey().equals(GPU.Keys.pciDevice.toString())) {
-                    for (GPU.Type type : GPU.Type.values()) {
+                    if (detailEntry.getValue() == null) {
-                        if (detailEntry.getValue().equals(type.toString())) {
+                        throw new InvalidParameterValueException("Please specify a GPU Card.");
                            value = detailEntry.getValue();
                        }
                    }
                    if (value == null) {
                        throw new InvalidParameterValueException("Please specify valid gpu type");
                    }
                }
                if (detailEntry.getKey().equals(GPU.Keys.vgpuType.toString())) {
                    if (details.get(GPU.Keys.pciDevice.toString()).equals(GPU.Type.GPU_Passthrough.toString())) {
                        throw new InvalidParameterValueException("vgpuTypes are supported only with vGPU pciDevice");
                    }
                    if (detailEntry.getValue() == null) {
-                        throw new InvalidParameterValueException("With vGPU as pciDevice, vGPUType value cannot be null");
+                        throw new InvalidParameterValueException("vGPUType value cannot be null");
                    }
                    for (GPU.vGPUType entry : GPU.vGPUType.values()) {
                        if (detailEntry.getValue().equals(entry.getType())) {
                            value = entry.getType();
                        }
                    }
-                    if (value == null || detailEntry.getValue().equals(GPU.vGPUType.passthrough.getType())) {
+                    if (value == null) {
                        throw new InvalidParameterValueException("Please specify valid vGPU type");
                    }
                }
                detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(), detailEntry.getKey(), detailEntry.getValue(), true));
            }
            // If pciDevice type is passed, put the default VGPU type as 'passthrough'
            if (details.containsKey(GPU.Keys.pciDevice.toString())
                    && !details.containsKey(GPU.Keys.vgpuType.toString())) {
                detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(),
                        GPU.Keys.vgpuType.toString(), GPU.vGPUType.passthrough.getType(), true));
            }
        }
        if ((offering = _serviceOfferingDao.persist(offering)) != null) {
--- a/server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java
+++ b/server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java
@ -363,9 +363,11 @@ public class DeploymentPlanningManagerImpl extends ManagerBase implements Deploy
            } else if (_capacityMgr.checkIfHostReachMaxGuestLimit(host)) {
                s_logger.debug("The last Host, hostId: " + host.getId() +
                    " already has max Running VMs(count includes system VMs), skipping this and trying other available hosts");
-            } else if ((offeringDetails  = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null
+            } else if ((offeringDetails  = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
-                    && !_resourceMgr.isGPUDeviceAvailable(host.getId(), offeringDetails.getValue())){
+                ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
                if(!_resourceMgr.isGPUDeviceAvailable(host.getId(), groupName.getValue(), offeringDetails.getValue())){
                    s_logger.debug("The last host of this VM does not have required GPU devices available");
                }
            } else {
                if (host.getStatus() == Status.Up && host.getResourceState() == ResourceState.Enabled) {
                    boolean hostTagsMatch = true;
--- a/server/src/com/cloud/hypervisor/HypervisorGuruBase.java
+++ b/server/src/com/cloud/hypervisor/HypervisorGuruBase.java
@ -144,7 +144,8 @@ public abstract class HypervisorGuruBase extends AdapterBase implements Hypervis
        // Set GPU details
        ServiceOfferingDetailsVO offeringDetail = null;
        if ((offeringDetail  = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
-            to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), offeringDetail.getValue()));
+            ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
            to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), groupName.getValue(), offeringDetail.getValue()));
        }
        // Workaround to make sure the TO has the UUID we need for Niciri integration
--- a/server/src/com/cloud/resource/ResourceManagerImpl.java
+++ b/server/src/com/cloud/resource/ResourceManagerImpl.java
@ -97,7 +97,6 @@ import com.cloud.exception.DiscoveryException;
 import com.cloud.exception.InvalidParameterValueException;
 import com.cloud.exception.PermissionDeniedException;
 import com.cloud.exception.ResourceInUseException;
 import com.cloud.gpu.GPU.vGPUType;
 import com.cloud.gpu.HostGpuGroupsVO;
 import com.cloud.gpu.VGPUTypesVO;
 import com.cloud.gpu.dao.HostGpuGroupsDao;
@ -1349,6 +1348,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
        _gpuAvailability = _hostGpuGroupsDao.createSearchBuilder();
        _gpuAvailability.and("hostId", _gpuAvailability.entity().getHostId(), Op.EQ);
        _gpuAvailability.and("groupName", _gpuAvailability.entity().getGroupName(), Op.EQ);
        SearchBuilder<VGPUTypesVO> join1 = _vgpuTypesDao.createSearchBuilder();
        join1.and("vgpuType", join1.entity().getVgpuType(), Op.EQ);
        join1.and("remainingCapacity", join1.entity().getRemainingCapacity(), Op.GT);
@ -2508,21 +2508,19 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
    }
    @Override
-    public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType) {
+    public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType) {
        if (vgpuType == null) {
            vgpuType = vGPUType.passthrough.getType();
        }
        Filter searchFilter = new Filter(VGPUTypesVO.class, "remainingCapacity", false, null, null);
        SearchCriteria<HostGpuGroupsVO> sc = _gpuAvailability.create();
        sc.setParameters("hostId", hostId);
        sc.setParameters("groupName", groupName);
        sc.setJoinParameters("groupId", "vgpuType", vgpuType);
        sc.setJoinParameters("groupId", "remainingCapacity", 0);
        return _hostGpuGroupsDao.customSearch(sc, searchFilter);
    }
    @Override
-    public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
+    public boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType) {
-        if(!listAvailableGPUDevice(hostId, vgpuType).isEmpty()) {
+        if(!listAvailableGPUDevice(hostId, groupName, vgpuType).isEmpty()) {
            return true;
        } else {
            if (s_logger.isDebugEnabled()) {
@ -2533,8 +2531,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
    }
    @Override
-    public GPUDeviceTO getGPUDevice(long hostId, String vgpuType) {
+    public GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType) {
-        HostGpuGroupsVO gpuDevice = listAvailableGPUDevice(hostId, vgpuType).get(0);
+        HostGpuGroupsVO gpuDevice = listAvailableGPUDevice(hostId, groupName, vgpuType).get(0);
        return new GPUDeviceTO(gpuDevice.getGroupName(), vgpuType, null);
    }
--- a/server/test/com/cloud/resource/MockResourceManagerImpl.java
+++ b/server/test/com/cloud/resource/MockResourceManagerImpl.java
@ -558,19 +558,19 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
    }
    @Override
-    public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
+    public boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType) {
        // TODO Auto-generated method stub
        return false;
    }
    @Override
-    public GPUDeviceTO getGPUDevice(long hostId, String vgpuType) {
+    public GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType) {
        // TODO Auto-generated method stub
        return null;
    }
    @Override
-    public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType) {
+    public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType) {
        // TODO Auto-generated method stub
        return null;
    }
--- a/ui/scripts/configuration.js
+++ b/ui/scripts/configuration.js
@ -370,25 +370,16 @@
                                                description: ''
                                            });
                                            items.push({
-                                                id: 'GPU_Passthrough',
+                                                id: 'Group of NVIDIA Corporation GK107GL [GRID K1] GPUs',
-                                                description: 'GPU-Passthrough'
+                                                description: 'NVIDIA GRID K1'
                                            });
                                            items.push({
-                                                id: 'VGPU',
+                                                id: 'Group of NVIDIA Corporation GK104GL [GRID K2] GPUs',
-                                                description: 'VGPU'
+                                                description: 'NVIDIA GRID K2'
                                            });
                                            args.response.success({
                                                data: items
                                            });
                                            args.$select.change(function() {
                                                var $form = $(this).closest('form');
                                                var $fields = $form.find('.field');
                                                if (($(this).val() == "") || $(this).val() == "GPU-Passthrough") {
                                                  $form.find('[rel=vgpuType]').hide();
                                                } else if ($(this).val() == "VGPU") {
                                                  $form.find('[rel=vgpuType]').css('display', 'block');
                                                }
                                            });
                                        }
                                    },
@ -400,6 +391,10 @@
                                                id: '',
                                                description: ''
                                            });
                                            items.push({
                                                id: 'passthrough',
                                                description: 'passthrough'
                                            });
                                            items.push({
                                                id: 'GRID K100',
                                                description: 'GRID K100'
@ -499,7 +494,7 @@
                                    array1.push("&serviceofferingdetails[1].value" + "=" + args.data.pciDevice);
                                }
-                                if (args.data.pciDevice == "VGPU") {
+                                if (args.data.vgpuType != "") {
                                    array1.push("&serviceofferingdetails[2].key" + "=" + "vgpuType");
                                    array1.push("&serviceofferingdetails[2].value" + "=" + args.data.vgpuType);
                                }