From bcd738caa60bb804a8494366360827b246e52fc7 Mon Sep 17 00:00:00 2001 From: Vishesh Date: Thu, 31 Jul 2025 18:29:35 +0530 Subject: [PATCH] Fix GPU discovery script to make it run with mdev for SR-IOV enabled devices (#11340) --- .../kvm/resource/LibvirtGpuDef.java | 2 +- .../kvm/resource/LibvirtGpuDefTest.java | 3 +- scripts/vm/hypervisor/kvm/gpudiscovery.sh | 111 +++++++++++------- .../apache/cloudstack/gpu/GpuServiceImpl.java | 15 ++- ui/src/components/view/GPUSummaryTab.vue | 15 +-- 5 files changed, 83 insertions(+), 63 deletions(-) diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java index 80a34b33b59..06457b1d071 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDef.java @@ -49,7 +49,7 @@ public class LibvirtGpuDef { String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID String displayAttribute = vgpuType.isDisplay() ? "on" : "off"; - gpuBuilder.append("\n"); + gpuBuilder.append("\n"); gpuBuilder.append(" \n"); gpuBuilder.append("
\n"); gpuBuilder.append(" \n"); diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java index 5dbea4fabf9..0060e1d7ed4 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/resource/LibvirtGpuDefTest.java @@ -64,10 +64,9 @@ public class LibvirtGpuDefTest extends TestCase { String gpuXml = gpuDef.toString(); - assertTrue(gpuXml.contains("")); + assertTrue(gpuXml.contains("")); assertTrue(gpuXml.contains("
")); assertTrue(gpuXml.contains("")); - assertFalse(gpuXml.contains("vfio")); // MDEV should not contain vfio driver element } @Test diff --git a/scripts/vm/hypervisor/kvm/gpudiscovery.sh b/scripts/vm/hypervisor/kvm/gpudiscovery.sh index 67627deef57..662abaed2cb 100755 --- a/scripts/vm/hypervisor/kvm/gpudiscovery.sh +++ b/scripts/vm/hypervisor/kvm/gpudiscovery.sh @@ -473,7 +473,7 @@ for VM in "${VMS[@]}"; do # -- MDEV hostdevs: use xmlstarlet to extract UUIDs -- while IFS= read -r UUID; do [[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM" - done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']" -v "@uuid" -n 2>/dev/null || true) + done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']/source/address" -v "@uuid" -n 2>/dev/null || true) done # Helper: convert a VM name to JSON value (quoted string or null) @@ -516,6 +516,55 @@ parse_and_add_gpu_properties() { fi } +# Finds and formats mdev instances for a given PCI device (PF or VF). +# Appends JSON strings for each found mdev instance to the global 'vlist' array. +# Arguments: +# $1: mdev_base_path (e.g., /sys/bus/pci/devices/.../mdev_supported_types) +# $2: bdf (e.g., 01:00.0) +process_mdev_instances() { + local mdev_base_path="$1" + local bdf="$2" + + if [[ ! -d "$mdev_base_path" ]]; then + return + fi + + for PROF_DIR in "$mdev_base_path"/*; do + [[ -d "$PROF_DIR" ]] || continue + + local PROFILE_NAME + if [[ -f "$PROF_DIR/name" ]]; then + PROFILE_NAME=$(<"$PROF_DIR/name") + else + PROFILE_NAME=$(basename "$PROF_DIR") + fi + + parse_and_add_gpu_properties "$PROF_DIR/description" + + local DEVICE_DIR="$PROF_DIR/devices" + if [[ -d "$DEVICE_DIR" ]]; then + for UDIR in "$DEVICE_DIR"/*; do + [[ -d "$UDIR" ]] || continue + local MDEV_UUID + MDEV_UUID=$(basename "$UDIR") + + local DOMAIN="0x0000" + local BUS="0x${bdf:0:2}" + local SLOT="0x${bdf:3:2}" + local FUNC="0x${bdf:6:1}" + + local raw + raw="${mdev_to_vm[$MDEV_UUID]:-}" + local USED_JSON + USED_JSON=$(to_json_vm "$raw") + + vlist+=( + "{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") + done + fi + done +} + # === GPU Discovery === mapfile -t LINES < <(lspci -nnm) @@ -588,51 +637,9 @@ for LINE in "${LINES[@]}"; do # === vGPU (MDEV) instances === VGPU_ARRAY="[]" declare -a vlist=() + # Process mdev on the Physical Function MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types" - if [[ -d "$MDEV_BASE" ]]; then - for PROF_DIR in "$MDEV_BASE"/*; do - [[ -d "$PROF_DIR" ]] || continue - - # Read the human-readable profile name from the 'name' file - if [[ -f "$PROF_DIR/name" ]]; then - PROFILE_NAME=$(<"$PROF_DIR/name") - else - PROFILE_NAME=$(basename "$PROF_DIR") - fi - - # Fetch max_instance from the description file, if present - parse_and_add_gpu_properties "$PROF_DIR/description" - - # Under each profile, existing UUIDs appear in: - # /sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types//devices/* - DEVICE_DIR="$PROF_DIR/devices" - if [[ -d "$DEVICE_DIR" ]]; then - for UDIR in "$DEVICE_DIR"/*; do - [[ -d $UDIR ]] || continue - MDEV_UUID=$(basename "$UDIR") - - # libvirt_address uses PF BDF - DOMAIN="0x0000" - BUS="0x${PCI_ADDR:0:2}" - SLOT="0x${PCI_ADDR:3:2}" - FUNC="0x${PCI_ADDR:6:1}" - - # Determine which VM uses this UUID - raw="${mdev_to_vm[$MDEV_UUID]:-}" - USED_JSON=$(to_json_vm "$raw") - - vlist+=( - "{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") - done - fi - done - if [ ${#vlist[@]} -gt 0 ]; then - VGPU_ARRAY="[$( - IFS=, - echo "${vlist[*]}" - )]" - fi - fi + process_mdev_instances "$MDEV_BASE" "$PCI_ADDR" # === VF instances (SR-IOV / MIG) === VF_ARRAY="[]" @@ -644,6 +651,12 @@ for LINE in "${LINES[@]}"; do VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2" VF_BDF="${VF_ADDR:5}" # "65:00.2" + # For NVIDIA SR-IOV, check for vGPU (mdev) on the VF itself + if [[ "$VENDOR_ID" == "10de" ]]; then + VF_MDEV_BASE="$VF_PATH/mdev_supported_types" + process_mdev_instances "$VF_MDEV_BASE" "$VF_BDF" + fi + DOMAIN="0x0000" BUS="0x${VF_BDF:0:2}" SLOT="0x${VF_BDF:3:2}" @@ -674,6 +687,14 @@ for LINE in "${LINES[@]}"; do fi fi + # Consolidate all vGPU instances (from PF and VFs) + if [ ${#vlist[@]} -gt 0 ]; then + VGPU_ARRAY="[$( + IFS=, + echo "${vlist[*]}" + )]" + fi + # === full_passthrough block === # If vgpu_instances and vf_instances are empty, we can assume full passthrough FP_ENABLED=0 diff --git a/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java b/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java index 5890b176c8e..a553abbacd5 100644 --- a/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java +++ b/server/src/main/java/org/apache/cloudstack/gpu/GpuServiceImpl.java @@ -795,7 +795,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(), gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName()); vgpuInfo.setDisplay(serviceOffering.getGpuDisplay()); - if (gpuDevice.getParentGpuDeviceId() != null) { GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId()); if (parentGpuDevice != null) { @@ -891,14 +890,20 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable } else { // Update the device's info GpuDeviceVO parentGpuDevice = null; - if (existingDevice.getParentGpuDeviceId() == null - && deviceInfo.getParentBusAddress() != null) { + if (deviceInfo.getParentBusAddress() != null) { parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(), deviceInfo.getParentBusAddress()); if (parentGpuDevice != null) { existingDevice.setParentGpuDeviceId(parentGpuDevice.getId()); + parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly); + gpuDeviceDao.persist(parentGpuDevice); } } + if (deviceInfo.isPassthroughEnabled()) { + existingDevice.setType(deviceInfo.getDeviceType()); + } else { + existingDevice.setType(GpuDevice.DeviceType.VGPUOnly); + } if (existingDevice.getPciRoot() == null) { existingDevice.setPciRoot(deviceInfo.getPciRoot()); } @@ -913,7 +918,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable for (final GpuDeviceVO device : gpuDevicesToDisableMap.values()) { logger.info("Disabling GPU device {} on host {} due to missing address in the new devices on the host.", device, host); device.setState(GpuDevice.State.Error); - device.setManagedState(GpuDevice.ManagedState.Unmanaged); gpuDeviceDao.update(device.getId(), device); checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId()); } @@ -1024,11 +1028,14 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable deviceInfo.getParentBusAddress()); if (parentGpuDevice != null) { parentGpuDeviceId = parentGpuDevice.getId(); + parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly); + gpuDeviceDao.persist(parentGpuDevice); } } GpuDeviceVO gpuDevice = new GpuDeviceVO(card.getId(), vgpuProfile.getId(), deviceInfo.getBusAddress(), host.getId(), parentGpuDeviceId, deviceInfo.getNumaNode(), deviceInfo.getPciRoot()); gpuDevice.setHostId(host.getId()); + gpuDevice.setType(deviceInfo.getDeviceType()); gpuDevice.setBusAddress(deviceInfo.getBusAddress()); gpuDevice.setCardId(card.getId()); setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice); diff --git a/ui/src/components/view/GPUSummaryTab.vue b/ui/src/components/view/GPUSummaryTab.vue index 8b649e05662..52553f143d5 100644 --- a/ui/src/components/view/GPUSummaryTab.vue +++ b/ui/src/components/view/GPUSummaryTab.vue @@ -167,15 +167,7 @@ export default { Object.values(cardGroups).forEach(cardGroup => { const profileCount = Object.keys(cardGroup.profiles).length - // Filter devices for card summary calculation - // Exclude passthrough profile devices from aggregates if there are multiple profiles - let cardDevicesForSummary = cardGroup.devices - if (profileCount > 1) { - cardDevicesForSummary = cardGroup.devices.filter(device => !device.vgpuprofilename || device.vgpuprofilename.toLowerCase() !== 'passthrough' - ) - } - - const cardSummary = this.calculateSummary(cardDevicesForSummary) + const cardSummary = this.calculateSummary(cardGroup.devices) const cardKey = `card-${cardGroup.gpucardname}` const cardNode = { @@ -192,7 +184,6 @@ export default { expandedKeys.push(cardKey) cardNode.children = Object.values(cardGroup.profiles) - .filter(profile => profile.vgpuprofilename.toLowerCase() !== 'passthrough') .map(profile => { const profileSummary = this.calculateSummary(profile.devices) return { @@ -204,7 +195,6 @@ export default { } }) } - summaryTree.push(cardNode) }) @@ -222,6 +212,9 @@ export default { } devices.forEach(device => { + if (device.gpudevicetype === 'VGPUOnly') { + return + } summary.total++ if (device.virtualmachineid) {