mirror of
https://github.com/apache/cloudstack.git
synced 2025-11-02 11:52:28 +01:00
CLOUDSTACK-6649: CS is not giving the system-wide capacity for GPU reosurce.
This commit is contained in:
parent
a605ca09cd
commit
35cd61c463
@ -18,6 +18,7 @@ package com.cloud.agent.api;
|
||||
public class VgpuTypesInfo {
|
||||
|
||||
private String modelName;
|
||||
private String groupName;
|
||||
private Long maxHeads;
|
||||
private Long videoRam;
|
||||
private Long maxResolutionX;
|
||||
@ -30,6 +31,10 @@ public class VgpuTypesInfo {
|
||||
return modelName;
|
||||
}
|
||||
|
||||
public String getGroupName() {
|
||||
return groupName;
|
||||
}
|
||||
|
||||
public Long getVideoRam() {
|
||||
return videoRam;
|
||||
}
|
||||
@ -66,8 +71,9 @@ public class VgpuTypesInfo {
|
||||
this.maxCapacity = maxCapacity;
|
||||
}
|
||||
|
||||
public VgpuTypesInfo(String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
|
||||
public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
|
||||
Long remainingCapacity, Long maxCapacity) {
|
||||
this.groupName = groupName;
|
||||
this.modelName = modelName;
|
||||
this.videoRam = videoRam;
|
||||
this.maxHeads = maxHeads;
|
||||
|
||||
@ -30,6 +30,7 @@ public interface Capacity extends InternalIdentity, Identity {
|
||||
public static final short CAPACITY_TYPE_VLAN = 7;
|
||||
public static final short CAPACITY_TYPE_DIRECT_ATTACHED_PUBLIC_IP = 8;
|
||||
public static final short CAPACITY_TYPE_LOCAL_STORAGE = 9;
|
||||
public static final short CAPACITY_TYPE_GPU = 19;
|
||||
|
||||
public Long getHostOrPoolId();
|
||||
|
||||
|
||||
@ -328,6 +328,7 @@ public class HostResponse extends BaseResponse {
|
||||
public void setGpuGroups(List<GpuResponse> gpuGroup) {
|
||||
this.gpuGroup = gpuGroup;
|
||||
}
|
||||
|
||||
public void setDiskSizeTotal(Long diskSizeTotal) {
|
||||
this.diskSizeTotal = diskSizeTotal;
|
||||
}
|
||||
|
||||
@ -24,7 +24,7 @@ import com.cloud.utils.db.GenericDao;
|
||||
public interface HostGpuGroupsDao extends GenericDao<HostGpuGroupsVO, Long> {
|
||||
|
||||
/**
|
||||
* Find host device by hostId and PCI ID
|
||||
* Find host device by hostId and groupName
|
||||
* @param hostId the host
|
||||
* @param groupName GPU group
|
||||
* @return HostGpuGroupsVO
|
||||
|
||||
@ -25,25 +25,34 @@ import com.cloud.utils.db.GenericDao;
|
||||
|
||||
public interface VGPUTypesDao extends GenericDao<VGPUTypesVO, Long> {
|
||||
|
||||
/**
|
||||
* Find VGPU types by group Id
|
||||
* @param groupId of the GPU group
|
||||
* @return list of VGPUTypesVO
|
||||
*/
|
||||
List<VGPUTypesVO> listByGroupId(long groupId);
|
||||
/**
|
||||
* List zonewide/podwide/clusterwide GPU card capacities.
|
||||
* @param zoneId
|
||||
* @param podId
|
||||
* @param clusterId
|
||||
* @return Custom Query result
|
||||
*/
|
||||
List<VgpuTypesInfo> listGPUCapacities(Long zoneId, Long podId, Long clusterId);
|
||||
|
||||
/**
|
||||
* Find VGPU type by group Id and VGPU type
|
||||
* @param groupId of the GPU group
|
||||
* @param vgpuType name of VGPU type
|
||||
* @return VGPUTypesVO
|
||||
*/
|
||||
VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType);
|
||||
/**
|
||||
* Find VGPU types by group Id
|
||||
* @param groupId of the GPU group
|
||||
* @return list of VGPUTypesVO
|
||||
*/
|
||||
List<VGPUTypesVO> listByGroupId(long groupId);
|
||||
|
||||
/**
|
||||
* Save the list of enabled VGPU types
|
||||
* @param hostId the host
|
||||
* @param groupDetails with enabled VGPU types
|
||||
*/
|
||||
void persist(long hostId, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails);
|
||||
/**
|
||||
* Find VGPU type by group Id and VGPU type
|
||||
* @param groupId of the GPU group
|
||||
* @param vgpuType name of VGPU type
|
||||
* @return VGPUTypesVO
|
||||
*/
|
||||
VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType);
|
||||
|
||||
/**
|
||||
* Save the list of enabled VGPU types
|
||||
* @param hostId the host
|
||||
* @param groupDetails with enabled VGPU types
|
||||
*/
|
||||
void persist(long hostId, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails);
|
||||
}
|
||||
|
||||
@ -16,6 +16,10 @@
|
||||
//under the License.
|
||||
package com.cloud.gpu.dao;
|
||||
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
@ -33,6 +37,8 @@ import com.cloud.gpu.VGPUTypesVO;
|
||||
import com.cloud.utils.db.GenericDaoBase;
|
||||
import com.cloud.utils.db.SearchBuilder;
|
||||
import com.cloud.utils.db.SearchCriteria;
|
||||
import com.cloud.utils.db.TransactionLegacy;
|
||||
import com.cloud.utils.exception.CloudRuntimeException;
|
||||
|
||||
@Component
|
||||
@Local(value = VGPUTypesDao.class)
|
||||
@ -41,11 +47,14 @@ public class VGPUTypesDaoImpl extends GenericDaoBase<VGPUTypesVO, Long> implemen
|
||||
|
||||
private final SearchBuilder<VGPUTypesVO> _searchByGroupId;
|
||||
private final SearchBuilder<VGPUTypesVO> _searchByGroupIdVGPUType;
|
||||
// private final SearchBuilder<VGPUTypesVO> _searchByHostId;
|
||||
// private final SearchBuilder<VGPUTypesVO> _searchForStaleEntries;
|
||||
|
||||
@Inject protected HostGpuGroupsDao _hostGpuGroupsDao;
|
||||
|
||||
private static final String LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES =
|
||||
"SELECT host_gpu_groups.group_name, vgpu_type, max_vgpu_per_pgpu, SUM(remaining_capacity) AS remaining_capacity, SUM(max_capacity) AS total_capacity FROM" +
|
||||
" `cloud`.`vgpu_types` INNER JOIN `cloud`.`host_gpu_groups` ON vgpu_types.gpu_group_id = host_gpu_groups.id INNER JOIN `cloud`.`host`" +
|
||||
" ON host_gpu_groups.host_id = host.id WHERE host.type = 'Routing' AND host.data_center_id = ?";
|
||||
|
||||
public VGPUTypesDaoImpl() {
|
||||
|
||||
_searchByGroupId = createSearchBuilder();
|
||||
@ -58,6 +67,47 @@ public class VGPUTypesDaoImpl extends GenericDaoBase<VGPUTypesVO, Long> implemen
|
||||
_searchByGroupIdVGPUType.done();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VgpuTypesInfo> listGPUCapacities(Long dcId, Long podId, Long clusterId) {
|
||||
StringBuilder finalQuery = new StringBuilder();
|
||||
TransactionLegacy txn = TransactionLegacy.currentTxn();
|
||||
PreparedStatement pstmt = null;
|
||||
List<Long> resourceIdList = new ArrayList<Long>();
|
||||
ArrayList<VgpuTypesInfo> result = new ArrayList<VgpuTypesInfo>();
|
||||
|
||||
resourceIdList.add(dcId);
|
||||
finalQuery.append(LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES);
|
||||
|
||||
if (podId != null) {
|
||||
finalQuery.append(" AND host.pod_id = ?");
|
||||
resourceIdList.add(podId);
|
||||
}
|
||||
|
||||
if (clusterId != null) {
|
||||
finalQuery.append(" AND host.cluster_id = ?");
|
||||
resourceIdList.add(clusterId);
|
||||
}
|
||||
finalQuery.append(" GROUP BY host_gpu_groups.group_name, vgpu_type");
|
||||
|
||||
try {
|
||||
pstmt = txn.prepareAutoCloseStatement(finalQuery.toString());
|
||||
for (int i = 0; i < resourceIdList.size(); i++) {
|
||||
pstmt.setLong(1 + i, resourceIdList.get(i));
|
||||
}
|
||||
ResultSet rs = pstmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
|
||||
VgpuTypesInfo gpuCapacity = new VgpuTypesInfo(rs.getString(1), rs.getString(2), null, null, null, null, rs.getLong(3), rs.getLong(4), rs.getLong(5));
|
||||
result.add(gpuCapacity);
|
||||
}
|
||||
return result;
|
||||
} catch (SQLException e) {
|
||||
throw new CloudRuntimeException("DB Exception on: " + finalQuery, e);
|
||||
} catch (Throwable e) {
|
||||
throw new CloudRuntimeException("Caught: " + finalQuery, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VGPUTypesVO> listByGroupId(long groupId) {
|
||||
SearchCriteria<VGPUTypesVO> sc = _searchByGroupId.create();
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
package com.cloud.vm.dao;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@ -133,4 +134,6 @@ public interface VMInstanceDao extends GenericDao<VMInstanceVO, Long>, StateDao<
|
||||
void resetVmPowerStateTracking(long instanceId);
|
||||
|
||||
void resetHostPowerStateTracking(long hostId);
|
||||
|
||||
HashMap<String, Long> countVgpuVMs(Long dcId, Long podId, Long clusterId);
|
||||
}
|
||||
|
||||
@ -115,6 +115,13 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
|
||||
|
||||
private static final String ORDER_HOSTS_NUMBER_OF_VMS_FOR_ACCOUNT_PART2 = " GROUP BY host.id ORDER BY 2 ASC ";
|
||||
|
||||
private static final String COUNT_VMS_BASED_ON_VGPU_TYPES1 =
|
||||
"SELECT pci, type, SUM(vmcount) FROM (SELECT MAX(IF(offering.name = 'pciDevice',value,'')) AS pci, MAX(IF(offering.name = 'vgpuType', value,'')) " +
|
||||
"AS type, COUNT(DISTINCT vm.id) AS vmcount FROM service_offering_details offering INNER JOIN vm_instance vm ON offering.service_offering_id = vm.service_offering_id " +
|
||||
"INNER JOIN `cloud`.`host` ON vm.host_id = host.id WHERE vm.state = 'Running' AND host.data_center_id = ? ";
|
||||
private static final String COUNT_VMS_BASED_ON_VGPU_TYPES2 =
|
||||
"GROUP BY offering.service_offering_id) results GROUP BY pci, type";
|
||||
|
||||
@Inject
|
||||
protected HostDao _hostDao;
|
||||
|
||||
@ -640,6 +647,45 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashMap<String, Long> countVgpuVMs(Long dcId, Long podId, Long clusterId) {
|
||||
StringBuilder finalQuery = new StringBuilder();
|
||||
TransactionLegacy txn = TransactionLegacy.currentTxn();
|
||||
PreparedStatement pstmt = null;
|
||||
List<Long> resourceIdList = new ArrayList<Long>();
|
||||
HashMap<String, Long> result = new HashMap<String, Long>();
|
||||
|
||||
resourceIdList.add(dcId);
|
||||
finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES1);
|
||||
|
||||
if (podId != null) {
|
||||
finalQuery.append(" AND host.pod_id = ?");
|
||||
resourceIdList.add(podId);
|
||||
}
|
||||
|
||||
if (clusterId != null) {
|
||||
finalQuery.append(" AND host.cluster_id = ?");
|
||||
resourceIdList.add(clusterId);
|
||||
}
|
||||
finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES2);
|
||||
|
||||
try {
|
||||
pstmt = txn.prepareAutoCloseStatement(finalQuery.toString());
|
||||
for (int i = 0; i < resourceIdList.size(); i++) {
|
||||
pstmt.setLong(1 + i, resourceIdList.get(i));
|
||||
}
|
||||
ResultSet rs = pstmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
result.put(rs.getString(1).concat(rs.getString(2)), rs.getLong(3));
|
||||
}
|
||||
return result;
|
||||
} catch (SQLException e) {
|
||||
throw new CloudRuntimeException("DB Exception on: " + finalQuery, e);
|
||||
} catch (Throwable e) {
|
||||
throw new CloudRuntimeException("Caught: " + finalQuery, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long countRunningByAccount(long accountId) {
|
||||
SearchCriteria<Long> sc = CountRunningByAccount.create();
|
||||
|
||||
@ -121,7 +121,7 @@ public class XenServer620SP1Resource extends XenServer620Resource {
|
||||
entry.setMaxVmCapacity(maxCapacity);
|
||||
gpuCapacity.put(record.modelName, entry);
|
||||
} else {
|
||||
VgpuTypesInfo vgpuTypeRecord = new VgpuTypesInfo(record.modelName, record.framebufferSize, record.maxHeads,
|
||||
VgpuTypesInfo vgpuTypeRecord = new VgpuTypesInfo(null, record.modelName, record.framebufferSize, record.maxHeads,
|
||||
record.maxResolutionX, record.maxResolutionY, maxCapacity, remainingCapacity, maxCapacity);
|
||||
gpuCapacity.put(record.modelName, vgpuTypeRecord);
|
||||
}
|
||||
|
||||
@ -18,6 +18,7 @@ package com.cloud.api;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Map;
|
||||
@ -64,6 +65,7 @@ import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
|
||||
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
|
||||
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
|
||||
|
||||
import com.cloud.agent.api.VgpuTypesInfo;
|
||||
import com.cloud.api.query.dao.AccountJoinDao;
|
||||
import com.cloud.api.query.dao.AffinityGroupJoinDao;
|
||||
import com.cloud.api.query.dao.AsyncJobJoinDao;
|
||||
@ -1101,6 +1103,14 @@ public class ApiDBUtils {
|
||||
return s_hostGpuGroupsDao.listByHostId(hostId);
|
||||
}
|
||||
|
||||
public static List<VgpuTypesInfo> getGpuCapacites(Long zoneId, Long podId, Long clusterId) {
|
||||
return s_vgpuTypesDao.listGPUCapacities(zoneId, podId, clusterId);
|
||||
}
|
||||
|
||||
public static HashMap<String, Long> getVgpuVmsCount(Long zoneId, Long podId, Long clusterId) {
|
||||
return s_vmDao.countVgpuVMs(zoneId, podId, clusterId);
|
||||
}
|
||||
|
||||
public static List<VGPUTypesVO> getVgpus(long groupId) {
|
||||
return s_vgpuTypesDao.listByGroupId(groupId);
|
||||
}
|
||||
|
||||
@ -150,6 +150,7 @@ import org.apache.cloudstack.usage.Usage;
|
||||
import org.apache.cloudstack.usage.UsageService;
|
||||
import org.apache.cloudstack.usage.UsageTypes;
|
||||
|
||||
import com.cloud.agent.api.VgpuTypesInfo;
|
||||
import com.cloud.api.query.ViewResponseHelper;
|
||||
import com.cloud.api.query.vo.AccountJoinVO;
|
||||
import com.cloud.api.query.vo.AsyncJobJoinVO;
|
||||
@ -193,6 +194,7 @@ import com.cloud.domain.Domain;
|
||||
import com.cloud.event.Event;
|
||||
import com.cloud.exception.InvalidParameterValueException;
|
||||
import com.cloud.exception.PermissionDeniedException;
|
||||
import com.cloud.gpu.GPU;
|
||||
import com.cloud.host.Host;
|
||||
import com.cloud.host.HostVO;
|
||||
import com.cloud.hypervisor.HypervisorCapabilities;
|
||||
@ -1467,6 +1469,44 @@ public class ApiResponseHelper implements ResponseGenerator {
|
||||
capacityResponses.add(capacityResponse);
|
||||
}
|
||||
|
||||
List<VgpuTypesInfo> gpuCapacities;
|
||||
if ((gpuCapacities = ApiDBUtils.getGpuCapacites(result.get(0).getDataCenterId(), result.get(0).getPodId(), result.get(0).getClusterId())) != null) {
|
||||
HashMap<String, Long> vgpuVMs = ApiDBUtils.getVgpuVmsCount(result.get(0).getDataCenterId(), result.get(0).getPodId(), result.get(0).getClusterId());
|
||||
|
||||
float capacityUsed = 0;
|
||||
long capacityMax = 0;
|
||||
for (VgpuTypesInfo capacity : gpuCapacities) {
|
||||
if (vgpuVMs.containsKey(capacity.getGroupName().concat(capacity.getModelName()))) {
|
||||
capacityUsed += (float)vgpuVMs.get(capacity.getGroupName().concat(capacity.getModelName())) / capacity.getMaxVpuPerGpu();
|
||||
}
|
||||
if (capacity.getModelName().equals(GPU.vGPUType.passthrough.toString())) {
|
||||
capacityMax += capacity.getMaxCapacity();
|
||||
}
|
||||
}
|
||||
|
||||
DataCenter zone = ApiDBUtils.findZoneById(result.get(0).getDataCenterId());
|
||||
CapacityResponse capacityResponse = new CapacityResponse();
|
||||
if (zone != null) {
|
||||
capacityResponse.setZoneId(zone.getUuid());
|
||||
capacityResponse.setZoneName(zone.getName());
|
||||
}
|
||||
if (result.get(0).getPodId() != null) {
|
||||
HostPodVO pod = ApiDBUtils.findPodById(result.get(0).getPodId());
|
||||
capacityResponse.setPodId(pod.getUuid());
|
||||
capacityResponse.setPodName(pod.getName());
|
||||
}
|
||||
if (result.get(0).getClusterId() != null) {
|
||||
ClusterVO cluster = ApiDBUtils.findClusterById(result.get(0).getClusterId());
|
||||
capacityResponse.setClusterId(cluster.getUuid());
|
||||
capacityResponse.setClusterName(cluster.getName());
|
||||
}
|
||||
capacityResponse.setCapacityType(Capacity.CAPACITY_TYPE_GPU);
|
||||
capacityResponse.setCapacityUsed((long)Math.ceil(capacityUsed));
|
||||
capacityResponse.setCapacityTotal(capacityMax);
|
||||
capacityResponse.setPercentUsed(format.format(capacityUsed / capacityMax * 100f));
|
||||
capacityResponse.setObjectName("capacity");
|
||||
capacityResponses.add(capacityResponse);
|
||||
}
|
||||
return capacityResponses;
|
||||
}
|
||||
|
||||
|
||||
@ -1104,6 +1104,8 @@ cloudStack.converters = {
|
||||
return "VLAN";
|
||||
case 18:
|
||||
return "Secondary Storage VM";
|
||||
case 19:
|
||||
return "GPU";
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
@ -383,6 +383,9 @@
|
||||
},
|
||||
7: {
|
||||
name: _l('label.vlan')
|
||||
},
|
||||
19: {
|
||||
name: _l('GPU')
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user