mirror of
				https://github.com/apache/cloudstack.git
				synced 2025-10-26 08:42:29 +01:00 
			
		
		
		
	Fix GPU discovery script to make it run with mdev for SR-IOV enabled devices (#11340)
This commit is contained in:
		
							parent
							
								
									e805e45342
								
							
						
					
					
						commit
						bcd738caa6
					
				| @ -49,7 +49,7 @@ public class LibvirtGpuDef { | ||||
|         String mdevUuid = vgpuType.getBusAddress(); // For MDEV devices, busAddress contains the UUID | ||||
|         String displayAttribute = vgpuType.isDisplay() ? "on" : "off"; | ||||
| 
 | ||||
|         gpuBuilder.append("<hostdev mode='subsystem' type='mdev' managed='no' display='").append(displayAttribute).append("'>\n"); | ||||
|         gpuBuilder.append("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='").append(displayAttribute).append("'>\n"); | ||||
|         gpuBuilder.append("  <source>\n"); | ||||
|         gpuBuilder.append("    <address uuid='").append(mdevUuid).append("'/>\n"); | ||||
|         gpuBuilder.append("  </source>\n"); | ||||
|  | ||||
| @ -64,10 +64,9 @@ public class LibvirtGpuDefTest extends TestCase { | ||||
| 
 | ||||
|         String gpuXml = gpuDef.toString(); | ||||
| 
 | ||||
|         assertTrue(gpuXml.contains("<hostdev mode='subsystem' type='mdev' managed='no' display='off'>")); | ||||
|         assertTrue(gpuXml.contains("<hostdev mode='subsystem' type='mdev' model='vfio-pci' display='off'>")); | ||||
|         assertTrue(gpuXml.contains("<address uuid='4b20d080-1b54-4048-85b3-a6a62d165c01'/>")); | ||||
|         assertTrue(gpuXml.contains("</hostdev>")); | ||||
|         assertFalse(gpuXml.contains("vfio")); // MDEV should not contain vfio driver element | ||||
|     } | ||||
| 
 | ||||
|     @Test | ||||
|  | ||||
| @ -473,7 +473,7 @@ for VM in "${VMS[@]}"; do | ||||
| 	# -- MDEV hostdevs: use xmlstarlet to extract UUIDs -- | ||||
| 	while IFS= read -r UUID; do | ||||
| 		[[ -n "$UUID" ]] && mdev_to_vm["$UUID"]="$VM" | ||||
| 	done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']" -v "@uuid" -n 2>/dev/null || true) | ||||
| 	done < <(echo "$xml" | xmlstarlet sel -T -t -m "//hostdev[@type='mdev']/source/address" -v "@uuid" -n 2>/dev/null || true) | ||||
| done | ||||
| 
 | ||||
| # Helper: convert a VM name to JSON value (quoted string or null) | ||||
| @ -516,6 +516,55 @@ parse_and_add_gpu_properties() { | ||||
|     fi | ||||
| } | ||||
| 
 | ||||
| # Finds and formats mdev instances for a given PCI device (PF or VF). | ||||
| # Appends JSON strings for each found mdev instance to the global 'vlist' array. | ||||
| # Arguments: | ||||
| #   $1: mdev_base_path (e.g., /sys/bus/pci/devices/.../mdev_supported_types) | ||||
| #   $2: bdf (e.g., 01:00.0) | ||||
| process_mdev_instances() { | ||||
| 	local mdev_base_path="$1" | ||||
| 	local bdf="$2" | ||||
| 
 | ||||
| 	if [[ ! -d "$mdev_base_path" ]]; then | ||||
| 		return | ||||
| 	fi | ||||
| 
 | ||||
| 	for PROF_DIR in "$mdev_base_path"/*; do | ||||
| 		[[ -d "$PROF_DIR" ]] || continue | ||||
| 
 | ||||
| 		local PROFILE_NAME | ||||
| 		if [[ -f "$PROF_DIR/name" ]]; then | ||||
| 			PROFILE_NAME=$(<"$PROF_DIR/name") | ||||
| 		else | ||||
| 			PROFILE_NAME=$(basename "$PROF_DIR") | ||||
| 		fi | ||||
| 
 | ||||
| 		parse_and_add_gpu_properties "$PROF_DIR/description" | ||||
| 
 | ||||
| 		local DEVICE_DIR="$PROF_DIR/devices" | ||||
| 		if [[ -d "$DEVICE_DIR" ]]; then | ||||
| 			for UDIR in "$DEVICE_DIR"/*; do | ||||
| 				[[ -d "$UDIR" ]] || continue | ||||
| 				local MDEV_UUID | ||||
| 				MDEV_UUID=$(basename "$UDIR") | ||||
| 
 | ||||
| 				local DOMAIN="0x0000" | ||||
| 				local BUS="0x${bdf:0:2}" | ||||
| 				local SLOT="0x${bdf:3:2}" | ||||
| 				local FUNC="0x${bdf:6:1}" | ||||
| 
 | ||||
| 				local raw | ||||
| 				raw="${mdev_to_vm[$MDEV_UUID]:-}" | ||||
| 				local USED_JSON | ||||
| 				USED_JSON=$(to_json_vm "$raw") | ||||
| 
 | ||||
| 				vlist+=( | ||||
| 					"{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") | ||||
| 			done | ||||
| 		fi | ||||
| 	done | ||||
| } | ||||
| 
 | ||||
| # === GPU Discovery === | ||||
| 
 | ||||
| mapfile -t LINES < <(lspci -nnm) | ||||
| @ -588,51 +637,9 @@ for LINE in "${LINES[@]}"; do | ||||
| 	# === vGPU (MDEV) instances === | ||||
| 	VGPU_ARRAY="[]" | ||||
| 	declare -a vlist=() | ||||
| 	# Process mdev on the Physical Function | ||||
| 	MDEV_BASE="/sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types" | ||||
| 	if [[ -d "$MDEV_BASE" ]]; then | ||||
| 		for PROF_DIR in "$MDEV_BASE"/*; do | ||||
| 			[[ -d "$PROF_DIR" ]] || continue | ||||
| 
 | ||||
| 			# Read the human-readable profile name from the 'name' file | ||||
| 			if [[ -f "$PROF_DIR/name" ]]; then | ||||
| 				PROFILE_NAME=$(<"$PROF_DIR/name") | ||||
| 			else | ||||
| 				PROFILE_NAME=$(basename "$PROF_DIR") | ||||
| 			fi | ||||
| 
 | ||||
| 			# Fetch max_instance from the description file, if present | ||||
| 			parse_and_add_gpu_properties "$PROF_DIR/description" | ||||
| 
 | ||||
| 			# Under each profile, existing UUIDs appear in: | ||||
| 			#    /sys/bus/pci/devices/0000:$PCI_ADDR/mdev_supported_types/<PROFILE>/devices/* | ||||
| 			DEVICE_DIR="$PROF_DIR/devices" | ||||
| 			if [[ -d "$DEVICE_DIR" ]]; then | ||||
| 				for UDIR in "$DEVICE_DIR"/*; do | ||||
| 					[[ -d $UDIR ]] || continue | ||||
| 					MDEV_UUID=$(basename "$UDIR") | ||||
| 
 | ||||
| 					# libvirt_address uses PF BDF | ||||
| 					DOMAIN="0x0000" | ||||
| 					BUS="0x${PCI_ADDR:0:2}" | ||||
| 					SLOT="0x${PCI_ADDR:3:2}" | ||||
| 					FUNC="0x${PCI_ADDR:6:1}" | ||||
| 
 | ||||
| 					# Determine which VM uses this UUID | ||||
| 					raw="${mdev_to_vm[$MDEV_UUID]:-}" | ||||
| 					USED_JSON=$(to_json_vm "$raw") | ||||
| 
 | ||||
| 					vlist+=( | ||||
| 						"{\"mdev_uuid\":\"$MDEV_UUID\",\"profile_name\":$(json_escape "$PROFILE_NAME"),\"max_instances\":$MAX_INSTANCES,\"video_ram\":$VIDEO_RAM,\"max_heads\":$MAX_HEADS,\"max_resolution_x\":$MAX_RESOLUTION_X,\"max_resolution_y\":$MAX_RESOLUTION_Y,\"libvirt_address\":{\"domain\":\"$DOMAIN\",\"bus\":\"$BUS\",\"slot\":\"$SLOT\",\"function\":\"$FUNC\"},\"used_by_vm\":$USED_JSON}") | ||||
| 				done | ||||
| 			fi | ||||
| 		done | ||||
| 		if [ ${#vlist[@]} -gt 0 ]; then | ||||
| 			VGPU_ARRAY="[$( | ||||
| 				IFS=, | ||||
| 				echo "${vlist[*]}" | ||||
| 			)]" | ||||
| 		fi | ||||
| 	fi | ||||
| 	process_mdev_instances "$MDEV_BASE" "$PCI_ADDR" | ||||
| 
 | ||||
| 	# === VF instances (SR-IOV / MIG) === | ||||
| 	VF_ARRAY="[]" | ||||
| @ -644,6 +651,12 @@ for LINE in "${LINES[@]}"; do | ||||
| 			VF_ADDR=${VF_PATH##*/} # e.g. "0000:65:00.2" | ||||
| 			VF_BDF="${VF_ADDR:5}"  # "65:00.2" | ||||
| 
 | ||||
| 			# For NVIDIA SR-IOV, check for vGPU (mdev) on the VF itself | ||||
| 			if [[ "$VENDOR_ID" == "10de" ]]; then | ||||
| 				VF_MDEV_BASE="$VF_PATH/mdev_supported_types" | ||||
| 				process_mdev_instances "$VF_MDEV_BASE" "$VF_BDF" | ||||
| 			fi | ||||
| 
 | ||||
| 			DOMAIN="0x0000" | ||||
| 			BUS="0x${VF_BDF:0:2}" | ||||
| 			SLOT="0x${VF_BDF:3:2}" | ||||
| @ -674,6 +687,14 @@ for LINE in "${LINES[@]}"; do | ||||
| 		fi | ||||
| 	fi | ||||
| 
 | ||||
| 	# Consolidate all vGPU instances (from PF and VFs) | ||||
| 	if [ ${#vlist[@]} -gt 0 ]; then | ||||
| 		VGPU_ARRAY="[$( | ||||
| 			IFS=, | ||||
| 			echo "${vlist[*]}" | ||||
| 		)]" | ||||
| 	fi | ||||
| 
 | ||||
| 	# === full_passthrough block === | ||||
| 	# If vgpu_instances and vf_instances are empty, we can assume full passthrough | ||||
| 	FP_ENABLED=0 | ||||
|  | ||||
| @ -795,7 +795,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable | ||||
|                             vgpuProfile.getName(), gpuDevice.getBusAddress(), gpuCard.getVendorId(), | ||||
|                             gpuCard.getVendorName(), gpuCard.getDeviceId(), gpuCard.getDeviceName()); | ||||
|                     vgpuInfo.setDisplay(serviceOffering.getGpuDisplay()); | ||||
| 
 | ||||
|                     if (gpuDevice.getParentGpuDeviceId() != null) { | ||||
|                         GpuDeviceVO parentGpuDevice = gpuDeviceDao.findById(gpuDevice.getParentGpuDeviceId()); | ||||
|                         if (parentGpuDevice != null) { | ||||
| @ -891,14 +890,20 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable | ||||
|                         } else { | ||||
|                             // Update the device's info | ||||
|                             GpuDeviceVO parentGpuDevice = null; | ||||
|                             if (existingDevice.getParentGpuDeviceId() == null | ||||
|                                 && deviceInfo.getParentBusAddress() != null) { | ||||
|                             if (deviceInfo.getParentBusAddress() != null) { | ||||
|                                 parentGpuDevice = gpuDeviceDao.findByHostIdAndBusAddress(host.getId(), | ||||
|                                         deviceInfo.getParentBusAddress()); | ||||
|                                 if (parentGpuDevice != null) { | ||||
|                                     existingDevice.setParentGpuDeviceId(parentGpuDevice.getId()); | ||||
|                                     parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly); | ||||
|                                     gpuDeviceDao.persist(parentGpuDevice); | ||||
|                                 } | ||||
|                             } | ||||
|                             if (deviceInfo.isPassthroughEnabled()) { | ||||
|                                 existingDevice.setType(deviceInfo.getDeviceType()); | ||||
|                             } else { | ||||
|                                 existingDevice.setType(GpuDevice.DeviceType.VGPUOnly); | ||||
|                             } | ||||
|                             if (existingDevice.getPciRoot() == null) { | ||||
|                                 existingDevice.setPciRoot(deviceInfo.getPciRoot()); | ||||
|                             } | ||||
| @ -913,7 +918,6 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable | ||||
|                     for (final GpuDeviceVO device : gpuDevicesToDisableMap.values()) { | ||||
|                         logger.info("Disabling GPU device {} on host {} due to missing address in the new devices on the host.", device, host); | ||||
|                         device.setState(GpuDevice.State.Error); | ||||
|                         device.setManagedState(GpuDevice.ManagedState.Unmanaged); | ||||
|                         gpuDeviceDao.update(device.getId(), device); | ||||
|                         checkAndUpdateParentGpuDeviceState(device.getParentGpuDeviceId()); | ||||
|                     } | ||||
| @ -1024,11 +1028,14 @@ public class GpuServiceImpl extends ManagerBase implements GpuService, Pluggable | ||||
|                     deviceInfo.getParentBusAddress()); | ||||
|             if (parentGpuDevice != null) { | ||||
|                 parentGpuDeviceId = parentGpuDevice.getId(); | ||||
|                 parentGpuDevice.setType(GpuDevice.DeviceType.VGPUOnly); | ||||
|                 gpuDeviceDao.persist(parentGpuDevice); | ||||
|             } | ||||
|         } | ||||
|         GpuDeviceVO gpuDevice = new GpuDeviceVO(card.getId(), vgpuProfile.getId(), deviceInfo.getBusAddress(), | ||||
|                 host.getId(), parentGpuDeviceId, deviceInfo.getNumaNode(), deviceInfo.getPciRoot()); | ||||
|         gpuDevice.setHostId(host.getId()); | ||||
|         gpuDevice.setType(deviceInfo.getDeviceType()); | ||||
|         gpuDevice.setBusAddress(deviceInfo.getBusAddress()); | ||||
|         gpuDevice.setCardId(card.getId()); | ||||
|         setStateAndVmName(deviceInfo, gpuDevice, parentGpuDevice); | ||||
|  | ||||
| @ -167,15 +167,7 @@ export default { | ||||
|       Object.values(cardGroups).forEach(cardGroup => { | ||||
|         const profileCount = Object.keys(cardGroup.profiles).length | ||||
| 
 | ||||
|         // Filter devices for card summary calculation | ||||
|         // Exclude passthrough profile devices from aggregates if there are multiple profiles | ||||
|         let cardDevicesForSummary = cardGroup.devices | ||||
|         if (profileCount > 1) { | ||||
|           cardDevicesForSummary = cardGroup.devices.filter(device => !device.vgpuprofilename || device.vgpuprofilename.toLowerCase() !== 'passthrough' | ||||
|           ) | ||||
|         } | ||||
| 
 | ||||
|         const cardSummary = this.calculateSummary(cardDevicesForSummary) | ||||
|         const cardSummary = this.calculateSummary(cardGroup.devices) | ||||
|         const cardKey = `card-${cardGroup.gpucardname}` | ||||
| 
 | ||||
|         const cardNode = { | ||||
| @ -192,7 +184,6 @@ export default { | ||||
|           expandedKeys.push(cardKey) | ||||
| 
 | ||||
|           cardNode.children = Object.values(cardGroup.profiles) | ||||
|             .filter(profile => profile.vgpuprofilename.toLowerCase() !== 'passthrough') | ||||
|             .map(profile => { | ||||
|               const profileSummary = this.calculateSummary(profile.devices) | ||||
|               return { | ||||
| @ -204,7 +195,6 @@ export default { | ||||
|               } | ||||
|             }) | ||||
|         } | ||||
| 
 | ||||
|         summaryTree.push(cardNode) | ||||
|       }) | ||||
| 
 | ||||
| @ -222,6 +212,9 @@ export default { | ||||
|       } | ||||
| 
 | ||||
|       devices.forEach(device => { | ||||
|         if (device.gpudevicetype === 'VGPUOnly') { | ||||
|           return | ||||
|         } | ||||
|         summary.total++ | ||||
| 
 | ||||
|         if (device.virtualmachineid) { | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user