mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Fix GPU discovery script to make it run with mdev for SR-IOV enabled devices (#11340)
This commit is contained in:
parent
e805e45342
commit
bcd738caa6
@ -49,7 +49,7 @@ public class LibvirtGpuDef {
|
||||
String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID
|
||||
String displayAttribute = vgpuType.isDisplay() ? "on" : "off";
|
||||
|
||||
gpuBuilder.append("<hostdev mode='subsystem' type='mdev' managed='no' display='").append(displayAttribute).append("'>\n");
|
||||
gpuBuilder.append("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='").append(displayAttribute).append("'>\n");
|
||||
gpuBuilder.append(" <source>\n");
|
||||
gpuBuilder.append(" <address uuid='").append(mdevUuid).append("'/>\n");
|
||||
gpuBuilder.append(" </source>\n");
|
||||
|
||||
@ -64,10 +64,9 @@ public class LibvirtGpuDefTest extends TestCase {
|
||||
|
||||
String gpuXml = gpuDef.toString();
|
||||
|
||||
assertTrue(gpuXml.contains("<hostdev mode='subsystem' type='mdev' managed='no' display='off'>"));
|
||||
assertTrue(gpuXml.contains("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='off'>"));
|
||||
assertTrue(gpuXml.contains("<address uuid='4b20d080-1b54-4048-85b3-a6a62d165c01'/>"));
|
||||
assertTrue(gpuXml.contains("</hostdev>"));
|
||||
assertFalse(gpuXml.contains("vfio")); // MDEV should not contain vfio driver element
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -473,7 +473,7 @@ for VM in "${VMS[@]}"; do
|
||||
# -- MDEV hostdevs: use xmlstarlet to extract UUIDs --
|
||||
while IFS= read -r UUID; do
|
||||
[[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM"
|
||||
done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']" -v "@uuid" -n 2>/dev/null || true)
|
||||
done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']/source/address" -v "@uuid" -n 2>/dev/null || true)
|
||||
done
|
||||
|
||||
# Helper: convert a VM name to JSON value (quoted string or null)
|
||||
@ -516,6 +516,55 @@ parse_and_add_gpu_properties() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Finds and formats mdev instances for a given PCI device (PF or VF).
|
||||
# Appends JSON strings for each found mdev instance to the global 'vlist' array.
|
||||
# Arguments:
|
||||
# $1: mdev_base_path (e.g., /sys/bus/pci/devices/.../mdev_supported_types)
|
||||
# $2: bdf (e.g., 01:00.0)
|
||||
process_mdev_instances() {
|
||||
local mdev_base_path="$1"
|
||||
local bdf="$2"
|
||||
|
||||
if [[ ! -d "$mdev_base_path" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
for PROF_DIR in "$mdev_base_path"/*; do
|
||||
[[ -d "$PROF_DIR" ]] || continue
|
||||
|
||||
local PROFILE_NAME
|
||||
if [[ -f "$PROF_DIR/name" ]]; then
|
||||
PROFILE_NAME=$(<"$PROF_DIR/name")
|
||||
else
|
||||
PROFILE_NAME=$(basename "$PROF_DIR")
|
||||
fi
|
||||
|
||||
parse_and_add_gpu_properties "$PROF_DIR/description"
|
||||
|
||||
local DEVICE_DIR="$PROF_DIR/devices"
|
||||
if [[ -d "$DEVICE_DIR" ]]; then
|
||||
for UDIR in "$DEVICE_DIR"/*; do
|
||||
[[ -d "$UDIR" ]] || continue
|
||||
local MDEV_UUID
|
||||
MDEV_UUID=$(basename "$UDIR")
|
||||
|
||||
local DOMAIN="0x0000"
|
||||
local BUS="0x${bdf:0:2}"
|
||||
local SLOT="0x${bdf:3:2}"
|
||||
local FUNC="0x${bdf:6:1}"
|
||||
|
||||
local raw
|
||||
raw="${mdev_to_vm[$MDEV_UUID]:-}"
|
||||
local USED_JSON
|
||||
USED_JSON=$(to_json_vm "$raw")
|
||||
|
||||
vlist+=(
|
||||
"{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
|
||||
done
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# === GPU Discovery ===
|
||||
|
||||
mapfile -t LINES < <(lspci -nnm)
|
||||
@ -588,51 +637,9 @@ for LINE in "${LINES[@]}"; do
|
||||
# === vGPU (MDEV) instances ===
|
||||
VGPU_ARRAY="[]"
|
||||
declare -a vlist=()
|
||||
# Process mdev on the Physical Function
|
||||
MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types"
|
||||
if [[ -d "$MDEV_BASE" ]]; then
|
||||
for PROF_DIR in "$MDEV_BASE"/*; do
|
||||
[[ -d "$PROF_DIR" ]] || continue
|
||||
|
||||
# Read the human-readable profile name from the 'name' file
|
||||
if [[ -f "$PROF_DIR/name" ]]; then
|
||||
PROFILE_NAME=$(<"$PROF_DIR/name")
|
||||
else
|
||||
PROFILE_NAME=$(basename "$PROF_DIR")
|
||||
fi
|
||||
|
||||
# Fetch max_instance from the description file, if present
|
||||
parse_and_add_gpu_properties "$PROF_DIR/description"
|
||||
|
||||
# Under each profile, existing UUIDs appear in:
|
||||
# /sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types/<PROFILE>/devices/*
|
||||
DEVICE_DIR="$PROF_DIR/devices"
|
||||
if [[ -d "$DEVICE_DIR" ]]; then
|
||||
for UDIR in "$DEVICE_DIR"/*; do
|
||||
[[ -d $UDIR ]] || continue
|
||||
MDEV_UUID=$(basename "$UDIR")
|
||||
|
||||
# libvirt_address uses PF BDF
|
||||
DOMAIN="0x0000"
|
||||
BUS="0x${PCI_ADDR:0:2}"
|
||||
SLOT="0x${PCI_ADDR:3:2}"
|
||||
FUNC="0x${PCI_ADDR:6:1}"
|
||||
|
||||
# Determine which VM uses this UUID
|
||||
raw="${mdev_to_vm[$MDEV_UUID]:-}"
|
||||
USED_JSON=$(to_json_vm "$raw")
|
||||
|
||||
vlist+=(
|
||||
"{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}")
|
||||
done
|
||||
fi
|
||||
done
|
||||
if [ ${#vlist[@]} -gt 0 ]; then
|
||||
VGPU_ARRAY="[$(
|
||||
IFS=,
|
||||
echo "${vlist[*]}"
|
||||
)]"
|
||||
fi
|
||||
fi
|
||||
process_mdev_instances "$MDEV_BASE" "$PCI_ADDR"
|
||||
|
||||
# === VF instances (SR-IOV / MIG) ===
|
||||
VF_ARRAY="[]"
|
||||
@ -644,6 +651,12 @@ for LINE in "${LINES[@]}"; do
|
||||
VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2"
|
||||
VF_BDF="${VF_ADDR:5}" # "65:00.2"
|
||||
|
||||
# For NVIDIA SR-IOV, check for vGPU (mdev) on the VF itself
|
||||
if [[ "$VENDOR_ID" == "10de" ]]; then
|
||||
VF_MDEV_BASE="$VF_PATH/mdev_supported_types"
|
||||
process_mdev_instances "$VF_MDEV_BASE" "$VF_BDF"
|
||||
fi
|
||||
|
||||
DOMAIN="0x0000"
|
||||
BUS="0x${VF_BDF:0:2}"
|
||||
SLOT="0x${VF_BDF:3:2}"
|
||||
@ -674,6 +687,14 @@ for LINE in "${LINES[@]}"; do
|
||||
fi
|
||||
fi
|
||||
|
||||
# Consolidate all vGPU instances (from PF and VFs)
|
||||
if [ ${#vlist[@]} -gt 0 ]; then
|
||||
VGPU_ARRAY="[$(
|
||||
IFS=,
|
||||
echo "${vlist[*]}"
|
||||
)]"
|
||||
fi
|
||||
|
||||
# === full_passthrough block ===
|
||||
# If vgpu_instances and vf_instances are empty, we can assume full passthrough
|
||||
FP_ENABLED=0
|
||||
|
||||
@ -795,7 +795,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
|
||||
vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(),
|
||||
gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName());
|
||||
vgpuInfo.setDisplay(serviceOffering.getGpuDisplay());
|
||||
|
||||
if (gpuDevice.getParentGpuDeviceId() != null) {
|
||||
GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId());
|
||||
if (parentGpuDevice != null) {
|
||||
@ -891,14 +890,20 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
|
||||
} else {
|
||||
// Update the device's info
|
||||
GpuDeviceVO parentGpuDevice = null;
|
||||
if (existingDevice.getParentGpuDeviceId() == null
|
||||
&& deviceInfo.getParentBusAddress() != null) {
|
||||
if (deviceInfo.getParentBusAddress() != null) {
|
||||
parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(),
|
||||
deviceInfo.getParentBusAddress());
|
||||
if (parentGpuDevice != null) {
|
||||
existingDevice.setParentGpuDeviceId(parentGpuDevice.getId());
|
||||
parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly);
|
||||
gpuDeviceDao.persist(parentGpuDevice);
|
||||
}
|
||||
}
|
||||
if (deviceInfo.isPassthroughEnabled()) {
|
||||
existingDevice.setType(deviceInfo.getDeviceType());
|
||||
} else {
|
||||
existingDevice.setType(GpuDevice.DeviceType.VGPUOnly);
|
||||
}
|
||||
if (existingDevice.getPciRoot() == null) {
|
||||
existingDevice.setPciRoot(deviceInfo.getPciRoot());
|
||||
}
|
||||
@ -913,7 +918,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
|
||||
for (final GpuDeviceVO device : gpuDevicesToDisableMap.values()) {
|
||||
logger.info("Disabling GPU device {} on host {} due to missing address in the new devices on the host.", device, host);
|
||||
device.setState(GpuDevice.State.Error);
|
||||
device.setManagedState(GpuDevice.ManagedState.Unmanaged);
|
||||
gpuDeviceDao.update(device.getId(), device);
|
||||
checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId());
|
||||
}
|
||||
@ -1024,11 +1028,14 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable
|
||||
deviceInfo.getParentBusAddress());
|
||||
if (parentGpuDevice != null) {
|
||||
parentGpuDeviceId = parentGpuDevice.getId();
|
||||
parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly);
|
||||
gpuDeviceDao.persist(parentGpuDevice);
|
||||
}
|
||||
}
|
||||
GpuDeviceVO gpuDevice = new GpuDeviceVO(card.getId(), vgpuProfile.getId(), deviceInfo.getBusAddress(),
|
||||
host.getId(), parentGpuDeviceId, deviceInfo.getNumaNode(), deviceInfo.getPciRoot());
|
||||
gpuDevice.setHostId(host.getId());
|
||||
gpuDevice.setType(deviceInfo.getDeviceType());
|
||||
gpuDevice.setBusAddress(deviceInfo.getBusAddress());
|
||||
gpuDevice.setCardId(card.getId());
|
||||
setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice);
|
||||
|
||||
@ -167,15 +167,7 @@ export default {
|
||||
Object.values(cardGroups).forEach(cardGroup => {
|
||||
const profileCount = Object.keys(cardGroup.profiles).length
|
||||
|
||||
// Filter devices for card summary calculation
|
||||
// Exclude passthrough profile devices from aggregates if there are multiple profiles
|
||||
let cardDevicesForSummary = cardGroup.devices
|
||||
if (profileCount > 1) {
|
||||
cardDevicesForSummary = cardGroup.devices.filter(device => !device.vgpuprofilename || device.vgpuprofilename.toLowerCase() !== 'passthrough'
|
||||
)
|
||||
}
|
||||
|
||||
const cardSummary = this.calculateSummary(cardDevicesForSummary)
|
||||
const cardSummary = this.calculateSummary(cardGroup.devices)
|
||||
const cardKey = `card-${cardGroup.gpucardname}`
|
||||
|
||||
const cardNode = {
|
||||
@ -192,7 +184,6 @@ export default {
|
||||
expandedKeys.push(cardKey)
|
||||
|
||||
cardNode.children = Object.values(cardGroup.profiles)
|
||||
.filter(profile => profile.vgpuprofilename.toLowerCase() !== 'passthrough')
|
||||
.map(profile => {
|
||||
const profileSummary = this.calculateSummary(profile.devices)
|
||||
return {
|
||||
@ -204,7 +195,6 @@ export default {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
summaryTree.push(cardNode)
|
||||
})
|
||||
|
||||
@ -222,6 +212,9 @@ export default {
|
||||
}
|
||||
|
||||
devices.forEach(device => {
|
||||
if (device.gpudevicetype === 'VGPUOnly') {
|
||||
return
|
||||
}
|
||||
summary.total++
|
||||
|
||||
if (device.virtualmachineid) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user