Fix GPU discovery script to make it run with mdev for SR-IOV enabled devices (#11340)

This commit is contained in:
Vishesh 2025-07-31 18:29:35 +05:30 committed by GitHub
parent e805e45342
commit bcd738caa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 83 additions and 63 deletions

View File

@ -49,7 +49,7 @@ public class LibvirtGpuDef {
String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID
String displayAttribute = vgpuType.isDisplay() ? "on" : "off"; String displayAttribute = vgpuType.isDisplay() ? "on" : "off";
gpuBuilder.append("<hostdev mode='subsystem' type='mdev' managed='no' display='").append(displayAttribute).append("'>\n"); gpuBuilder.append("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='").append(displayAttribute).append("'>\n");
gpuBuilder.append(" <source>\n"); gpuBuilder.append(" <source>\n");
gpuBuilder.append(" <address uuid='").append(mdevUuid).append("'/>\n"); gpuBuilder.append(" <address uuid='").append(mdevUuid).append("'/>\n");
gpuBuilder.append(" </source>\n"); gpuBuilder.append(" </source>\n");

View File

@ -64,10 +64,9 @@ public class LibvirtGpuDefTest extends TestCase {
String gpuXml = gpuDef.toString(); String gpuXml = gpuDef.toString();
assertTrue(gpuXml.contains("<hostdev mode='subsystem' type='mdev' managed='no' display='off'>")); assertTrue(gpuXml.contains("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='off'>"));
assertTrue(gpuXml.contains("<address uuid='4b20d080-1b54-4048-85b3-a6a62d165c01'/>")); assertTrue(gpuXml.contains("<address uuid='4b20d080-1b54-4048-85b3-a6a62d165c01'/>"));
assertTrue(gpuXml.contains("</hostdev>")); assertTrue(gpuXml.contains("</hostdev>"));
assertFalse(gpuXml.contains("vfio")); // MDEV should not contain vfio driver element
} }
@Test @Test

View File

@ -473,7 +473,7 @@ for VM in "${VMS[@]}"; do
# -- MDEV hostdevs: use xmlstarlet to extract UUIDs -- # -- MDEV hostdevs: use xmlstarlet to extract UUIDs --
while IFS= read -r UUID; do while IFS= read -r UUID; do
[[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM" [[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM"
done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']" -v "@uuid" -n 2>/dev/null || true) done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']/source/address" -v "@uuid" -n 2>/dev/null || true)
done done
# Helper: convert a VM name to JSON value (quoted string or null) # Helper: convert a VM name to JSON value (quoted string or null)
@ -516,6 +516,55 @@ parse_and_add_gpu_properties() {
fi fi
} }
# Finds and formats mdev instances for a given PCI device (PF or VF).
# Appends JSON strings for each found mdev instance to the global 'vlist' array.
# Arguments:
# $1: mdev_base_path (e.g., /sys/bus/pci/devices/.../mdev_supported_types)
# $2: bdf (e.g., 01:00.0)
process_mdev_instances() {
local mdev_base_path="$1"
local bdf="$2"
if [[ ! -d "$mdev_base_path" ]]; then
return
fi
for PROF_DIR in "$mdev_base_path"/*; do
[[ -d "$PROF_DIR" ]] || continue
local PROFILE_NAME
if [[ -f "$PROF_DIR/name" ]]; then
PROFILE_NAME=$(<"$PROF_DIR/name")
else
PROFILE_NAME=$(basename "$PROF_DIR")
fi
parse_and_add_gpu_properties "$PROF_DIR/description"
local DEVICE_DIR="$PROF_DIR/devices"
if [[ -d "$DEVICE_DIR" ]]; then
for UDIR in "$DEVICE_DIR"/*; do
[[ -d "$UDIR" ]] || continue
local MDEV_UUID
MDEV_UUID=$(basename "$UDIR")
local DOMAIN="0x0000"
local BUS="0x${bdf:0:2}"
local SLOT="0x${bdf:3:2}"
local FUNC="0x${bdf:6:1}"
local raw
raw="${mdev_to_vm[$MDEV_UUID]:-}"
local USED_JSON
USED_JSON=$(to_json_vm "$raw")
vlist+=(
"{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
done
fi
done
}
# === GPU Discovery === # === GPU Discovery ===
mapfile -t LINES < <(lspci -nnm) mapfile -t LINES < <(lspci -nnm)
@ -588,51 +637,9 @@ for LINE in "${LINES[@]}"; do
# === vGPU (MDEV) instances === # === vGPU (MDEV) instances ===
VGPU_ARRAY="[]" VGPU_ARRAY="[]"
declare -a vlist=() declare -a vlist=()
# Process mdev on the Physical Function
MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types" MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types"
if [[ -d "$MDEV_BASE" ]]; then process_mdev_instances "$MDEV_BASE" "$PCI_ADDR"
for PROF_DIR in "$MDEV_BASE"/*; do
[[ -d "$PROF_DIR" ]] || continue
# Read the human-readable profile name from the 'name' file
if [[ -f "$PROF_DIR/name" ]]; then
PROFILE_NAME=$(<"$PROF_DIR/name")
else
PROFILE_NAME=$(basename "$PROF_DIR")
fi
# Fetch max_instance from the description file, if present
parse_and_add_gpu_properties "$PROF_DIR/description"
# Under each profile, existing UUIDs appear in:
# /sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types/<PROFILE>/devices/*
DEVICE_DIR="$PROF_DIR/devices"
if [[ -d "$DEVICE_DIR" ]]; then
for UDIR in "$DEVICE_DIR"/*; do
[[ -d $UDIR ]] || continue
MDEV_UUID=$(basename "$UDIR")
# libvirt_address uses PF BDF
DOMAIN="0x0000"
BUS="0x${PCI_ADDR:0:2}"
SLOT="0x${PCI_ADDR:3:2}"
FUNC="0x${PCI_ADDR:6:1}"
# Determine which VM uses this UUID
raw="${mdev_to_vm[$MDEV_UUID]:-}"
USED_JSON=$(to_json_vm "$raw")
vlist+=(
"{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
done
fi
done
if [ ${#vlist[@]} -gt 0 ]; then
VGPU_ARRAY="[$(
IFS=,
echo "${vlist[*]}"
)]"
fi
fi
# === VF instances (SR-IOV / MIG) === # === VF instances (SR-IOV / MIG) ===
VF_ARRAY="[]" VF_ARRAY="[]"
@ -644,6 +651,12 @@ for LINE in "${LINES[@]}"; do
VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2" VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2"
VF_BDF="${VF_ADDR:5}" # "65:00.2" VF_BDF="${VF_ADDR:5}" # "65:00.2"
# For NVIDIA SR-IOV, check for vGPU (mdev) on the VF itself
if [[ "$VENDOR_ID" == "10de" ]]; then
VF_MDEV_BASE="$VF_PATH/mdev_supported_types"
process_mdev_instances "$VF_MDEV_BASE" "$VF_BDF"
fi
DOMAIN="0x0000" DOMAIN="0x0000"
BUS="0x${VF_BDF:0:2}" BUS="0x${VF_BDF:0:2}"
SLOT="0x${VF_BDF:3:2}" SLOT="0x${VF_BDF:3:2}"
@ -674,6 +687,14 @@ for LINE in "${LINES[@]}"; do
fi fi
fi fi
# Consolidate all vGPU instances (from PF and VFs)
if [ ${#vlist[@]} -gt 0 ]; then
VGPU_ARRAY="[$(
IFS=,
echo "${vlist[*]}"
)]"
fi
# === full_passthrough block === # === full_passthrough block ===
# If vgpu_instances and vf_instances are empty, we can assume full passthrough # If vgpu_instances and vf_instances are empty, we can assume full passthrough
FP_ENABLED=0 FP_ENABLED=0

View File

@ -795,7 +795,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(), vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(),
gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName()); gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName());
vgpuInfo.setDisplay(serviceOffering.getGpuDisplay()); vgpuInfo.setDisplay(serviceOffering.getGpuDisplay());
if (gpuDevice.getParentGpuDeviceId() != null) { if (gpuDevice.getParentGpuDeviceId() != null) {
GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId()); GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId());
if (parentGpuDevice != null) { if (parentGpuDevice != null) {
@ -891,14 +890,20 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
} else { } else {
// Update the device's info // Update the device's info
GpuDeviceVO parentGpuDevice = null; GpuDeviceVO parentGpuDevice = null;
if (existingDevice.getParentGpuDeviceId() == null if (deviceInfo.getParentBusAddress() != null) {
&& deviceInfo.getParentBusAddress() != null) {
parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(), parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(),
deviceInfo.getParentBusAddress()); deviceInfo.getParentBusAddress());
if (parentGpuDevice != null) { if (parentGpuDevice != null) {
existingDevice.setParentGpuDeviceId(parentGpuDevice.getId()); existingDevice.setParentGpuDeviceId(parentGpuDevice.getId());
parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly);
gpuDeviceDao.persist(parentGpuDevice);
} }
} }
if (deviceInfo.isPassthroughEnabled()) {
existingDevice.setType(deviceInfo.getDeviceType());
} else {
existingDevice.setType(GpuDevice.DeviceType.VGPUOnly);
}
if (existingDevice.getPciRoot() == null) { if (existingDevice.getPciRoot() == null) {
existingDevice.setPciRoot(deviceInfo.getPciRoot()); existingDevice.setPciRoot(deviceInfo.getPciRoot());
} }
@ -913,7 +918,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
for (final GpuDeviceVO device : gpuDevicesToDisableMap.values()) { for (final GpuDeviceVO device : gpuDevicesToDisableMap.values()) {
logger.info("Disabling GPU device {} on host {} due to missing address in the new devices on the host.", device, host); logger.info("Disabling GPU device {} on host {} due to missing address in the new devices on the host.", device, host);
device.setState(GpuDevice.State.Error); device.setState(GpuDevice.State.Error);
device.setManagedState(GpuDevice.ManagedState.Unmanaged);
gpuDeviceDao.update(device.getId(), device); gpuDeviceDao.update(device.getId(), device);
checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId()); checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId());
} }
@ -1024,11 +1028,14 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
deviceInfo.getParentBusAddress()); deviceInfo.getParentBusAddress());
if (parentGpuDevice != null) { if (parentGpuDevice != null) {
parentGpuDeviceId = parentGpuDevice.getId(); parentGpuDeviceId = parentGpuDevice.getId();
parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly);
gpuDeviceDao.persist(parentGpuDevice);
} }
} }
GpuDeviceVO gpuDevice = new GpuDeviceVO(card.getId(), vgpuProfile.getId(), deviceInfo.getBusAddress(), GpuDeviceVO gpuDevice = new GpuDeviceVO(card.getId(), vgpuProfile.getId(), deviceInfo.getBusAddress(),
host.getId(), parentGpuDeviceId, deviceInfo.getNumaNode(), deviceInfo.getPciRoot()); host.getId(), parentGpuDeviceId, deviceInfo.getNumaNode(), deviceInfo.getPciRoot());
gpuDevice.setHostId(host.getId()); gpuDevice.setHostId(host.getId());
gpuDevice.setType(deviceInfo.getDeviceType());
gpuDevice.setBusAddress(deviceInfo.getBusAddress()); gpuDevice.setBusAddress(deviceInfo.getBusAddress());
gpuDevice.setCardId(card.getId()); gpuDevice.setCardId(card.getId());
setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice); setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice);

View File

@ -167,15 +167,7 @@ export default {
Object.values(cardGroups).forEach(cardGroup => { Object.values(cardGroups).forEach(cardGroup => {
const profileCount = Object.keys(cardGroup.profiles).length const profileCount = Object.keys(cardGroup.profiles).length
// Filter devices for card summary calculation const cardSummary = this.calculateSummary(cardGroup.devices)
// Exclude passthrough profile devices from aggregates if there are multiple profiles
let cardDevicesForSummary = cardGroup.devices
if (profileCount > 1) {
cardDevicesForSummary = cardGroup.devices.filter(device => !device.vgpuprofilename || device.vgpuprofilename.toLowerCase() !== 'passthrough'
)
}
const cardSummary = this.calculateSummary(cardDevicesForSummary)
const cardKey = `card-${cardGroup.gpucardname}` const cardKey = `card-${cardGroup.gpucardname}`
const cardNode = { const cardNode = {
@ -192,7 +184,6 @@ export default {
expandedKeys.push(cardKey) expandedKeys.push(cardKey)
cardNode.children = Object.values(cardGroup.profiles) cardNode.children = Object.values(cardGroup.profiles)
.filter(profile => profile.vgpuprofilename.toLowerCase() !== 'passthrough')
.map(profile => { .map(profile => {
const profileSummary = this.calculateSummary(profile.devices) const profileSummary = this.calculateSummary(profile.devices)
return { return {
@ -204,7 +195,6 @@ export default {
} }
}) })
} }
summaryTree.push(cardNode) summaryTree.push(cardNode)
}) })
@ -222,6 +212,9 @@ export default {
} }
devices.forEach(device => { devices.forEach(device => {
if (device.gpudevicetype === 'VGPUOnly') {
return
}
summary.total++ summary.total++
if (device.virtualmachineid) { if (device.virtualmachineid) {