CLOUDSTACK-6649: CS is not giving the system-wide capacity for GPU reosurce.

This commit is contained in:
Sanjay Tripathi 2014-05-13 15:00:11 +05:30
parent a605ca09cd
commit 35cd61c463
13 changed files with 195 additions and 24 deletions

View File

@ -18,6 +18,7 @@ package com.cloud.agent.api;
public class VgpuTypesInfo {
private String modelName;
private String groupName;
private Long maxHeads;
private Long videoRam;
private Long maxResolutionX;
@ -30,6 +31,10 @@ public class VgpuTypesInfo {
return modelName;
}
public String getGroupName() {
return groupName;
}
public Long getVideoRam() {
return videoRam;
}
@ -66,8 +71,9 @@ public class VgpuTypesInfo {
this.maxCapacity = maxCapacity;
}
public VgpuTypesInfo(String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
Long remainingCapacity, Long maxCapacity) {
this.groupName = groupName;
this.modelName = modelName;
this.videoRam = videoRam;
this.maxHeads = maxHeads;

View File

@ -30,6 +30,7 @@ public interface Capacity extends InternalIdentity, Identity {
public static final short CAPACITY_TYPE_VLAN = 7;
public static final short CAPACITY_TYPE_DIRECT_ATTACHED_PUBLIC_IP = 8;
public static final short CAPACITY_TYPE_LOCAL_STORAGE = 9;
public static final short CAPACITY_TYPE_GPU = 19;
public Long getHostOrPoolId();

View File

@ -328,6 +328,7 @@ public class HostResponse extends BaseResponse {
public void setGpuGroups(List<GpuResponse> gpuGroup) {
this.gpuGroup = gpuGroup;
}
public void setDiskSizeTotal(Long diskSizeTotal) {
this.diskSizeTotal = diskSizeTotal;
}

View File

@ -24,7 +24,7 @@ import com.cloud.utils.db.GenericDao;
public interface HostGpuGroupsDao extends GenericDao<HostGpuGroupsVO, Long> {
/**
* Find host device by hostId and PCI ID
* Find host device by hostId and groupName
* @param hostId the host
* @param groupName GPU group
* @return HostGpuGroupsVO

View File

@ -25,25 +25,34 @@ import com.cloud.utils.db.GenericDao;
public interface VGPUTypesDao extends GenericDao<VGPUTypesVO, Long> {
/**
* Find VGPU types by group Id
* @param groupId of the GPU group
* @return list of VGPUTypesVO
*/
List<VGPUTypesVO> listByGroupId(long groupId);
/**
* List zonewide/podwide/clusterwide GPU card capacities.
* @param zoneId
* @param podId
* @param clusterId
* @return Custom Query result
*/
List<VgpuTypesInfo> listGPUCapacities(Long zoneId, Long podId, Long clusterId);
/**
* Find VGPU type by group Id and VGPU type
* @param groupId of the GPU group
* @param vgpuType name of VGPU type
* @return VGPUTypesVO
*/
VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType);
/**
* Find VGPU types by group Id
* @param groupId of the GPU group
* @return list of VGPUTypesVO
*/
List<VGPUTypesVO> listByGroupId(long groupId);
/**
* Save the list of enabled VGPU types
* @param hostId the host
* @param groupDetails with enabled VGPU types
*/
void persist(long hostId, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails);
/**
* Find VGPU type by group Id and VGPU type
* @param groupId of the GPU group
* @param vgpuType name of VGPU type
* @return VGPUTypesVO
*/
VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType);
/**
* Save the list of enabled VGPU types
* @param hostId the host
* @param groupDetails with enabled VGPU types
*/
void persist(long hostId, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails);
}

View File

@ -16,6 +16,10 @@
//under the License.
package com.cloud.gpu.dao;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -33,6 +37,8 @@ import com.cloud.gpu.VGPUTypesVO;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.TransactionLegacy;
import com.cloud.utils.exception.CloudRuntimeException;
@Component
@Local(value = VGPUTypesDao.class)
@ -41,11 +47,14 @@ public class VGPUTypesDaoImpl extends GenericDaoBase<VGPUTypesVO, Long> implemen
private final SearchBuilder<VGPUTypesVO> _searchByGroupId;
private final SearchBuilder<VGPUTypesVO> _searchByGroupIdVGPUType;
// private final SearchBuilder<VGPUTypesVO> _searchByHostId;
// private final SearchBuilder<VGPUTypesVO> _searchForStaleEntries;
@Inject protected HostGpuGroupsDao _hostGpuGroupsDao;
private static final String LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES =
"SELECT host_gpu_groups.group_name, vgpu_type, max_vgpu_per_pgpu, SUM(remaining_capacity) AS remaining_capacity, SUM(max_capacity) AS total_capacity FROM" +
" `cloud`.`vgpu_types` INNER JOIN `cloud`.`host_gpu_groups` ON vgpu_types.gpu_group_id = host_gpu_groups.id INNER JOIN `cloud`.`host`" +
" ON host_gpu_groups.host_id = host.id WHERE host.type = 'Routing' AND host.data_center_id = ?";
public VGPUTypesDaoImpl() {
_searchByGroupId = createSearchBuilder();
@ -58,6 +67,47 @@ public class VGPUTypesDaoImpl extends GenericDaoBase<VGPUTypesVO, Long> implemen
_searchByGroupIdVGPUType.done();
}
@Override
public List<VgpuTypesInfo> listGPUCapacities(Long dcId, Long podId, Long clusterId) {
StringBuilder finalQuery = new StringBuilder();
TransactionLegacy txn = TransactionLegacy.currentTxn();
PreparedStatement pstmt = null;
List<Long> resourceIdList = new ArrayList<Long>();
ArrayList<VgpuTypesInfo> result = new ArrayList<VgpuTypesInfo>();
resourceIdList.add(dcId);
finalQuery.append(LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES);
if (podId != null) {
finalQuery.append(" AND host.pod_id = ?");
resourceIdList.add(podId);
}
if (clusterId != null) {
finalQuery.append(" AND host.cluster_id = ?");
resourceIdList.add(clusterId);
}
finalQuery.append(" GROUP BY host_gpu_groups.group_name, vgpu_type");
try {
pstmt = txn.prepareAutoCloseStatement(finalQuery.toString());
for (int i = 0; i < resourceIdList.size(); i++) {
pstmt.setLong(1 + i, resourceIdList.get(i));
}
ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
VgpuTypesInfo gpuCapacity = new VgpuTypesInfo(rs.getString(1), rs.getString(2), null, null, null, null, rs.getLong(3), rs.getLong(4), rs.getLong(5));
result.add(gpuCapacity);
}
return result;
} catch (SQLException e) {
throw new CloudRuntimeException("DB Exception on: " + finalQuery, e);
} catch (Throwable e) {
throw new CloudRuntimeException("Caught: " + finalQuery, e);
}
}
@Override
public List<VGPUTypesVO> listByGroupId(long groupId) {
SearchCriteria<VGPUTypesVO> sc = _searchByGroupId.create();

View File

@ -17,6 +17,7 @@
package com.cloud.vm.dao;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -133,4 +134,6 @@ public interface VMInstanceDao extends GenericDao<VMInstanceVO, Long>, StateDao<
void resetVmPowerStateTracking(long instanceId);
void resetHostPowerStateTracking(long hostId);
HashMap<String, Long> countVgpuVMs(Long dcId, Long podId, Long clusterId);
}

View File

@ -115,6 +115,13 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
private static final String ORDER_HOSTS_NUMBER_OF_VMS_FOR_ACCOUNT_PART2 = " GROUP BY host.id ORDER BY 2 ASC ";
private static final String COUNT_VMS_BASED_ON_VGPU_TYPES1 =
"SELECT pci, type, SUM(vmcount) FROM (SELECT MAX(IF(offering.name = 'pciDevice',value,'')) AS pci, MAX(IF(offering.name = 'vgpuType', value,'')) " +
"AS type, COUNT(DISTINCT vm.id) AS vmcount FROM service_offering_details offering INNER JOIN vm_instance vm ON offering.service_offering_id = vm.service_offering_id " +
"INNER JOIN `cloud`.`host` ON vm.host_id = host.id WHERE vm.state = 'Running' AND host.data_center_id = ? ";
private static final String COUNT_VMS_BASED_ON_VGPU_TYPES2 =
"GROUP BY offering.service_offering_id) results GROUP BY pci, type";
@Inject
protected HostDao _hostDao;
@ -640,6 +647,45 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
}
}
@Override
public HashMap<String, Long> countVgpuVMs(Long dcId, Long podId, Long clusterId) {
StringBuilder finalQuery = new StringBuilder();
TransactionLegacy txn = TransactionLegacy.currentTxn();
PreparedStatement pstmt = null;
List<Long> resourceIdList = new ArrayList<Long>();
HashMap<String, Long> result = new HashMap<String, Long>();
resourceIdList.add(dcId);
finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES1);
if (podId != null) {
finalQuery.append(" AND host.pod_id = ?");
resourceIdList.add(podId);
}
if (clusterId != null) {
finalQuery.append(" AND host.cluster_id = ?");
resourceIdList.add(clusterId);
}
finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES2);
try {
pstmt = txn.prepareAutoCloseStatement(finalQuery.toString());
for (int i = 0; i < resourceIdList.size(); i++) {
pstmt.setLong(1 + i, resourceIdList.get(i));
}
ResultSet rs = pstmt.executeQuery();
while (rs.next()) {
result.put(rs.getString(1).concat(rs.getString(2)), rs.getLong(3));
}
return result;
} catch (SQLException e) {
throw new CloudRuntimeException("DB Exception on: " + finalQuery, e);
} catch (Throwable e) {
throw new CloudRuntimeException("Caught: " + finalQuery, e);
}
}
@Override
public Long countRunningByAccount(long accountId) {
SearchCriteria<Long> sc = CountRunningByAccount.create();

View File

@ -121,7 +121,7 @@ public class XenServer620SP1Resource extends XenServer620Resource {
entry.setMaxVmCapacity(maxCapacity);
gpuCapacity.put(record.modelName, entry);
} else {
VgpuTypesInfo vgpuTypeRecord = new VgpuTypesInfo(record.modelName, record.framebufferSize, record.maxHeads,
VgpuTypesInfo vgpuTypeRecord = new VgpuTypesInfo(null, record.modelName, record.framebufferSize, record.maxHeads,
record.maxResolutionX, record.maxResolutionY, maxCapacity, remainingCapacity, maxCapacity);
gpuCapacity.put(record.modelName, vgpuTypeRecord);
}

View File

@ -18,6 +18,7 @@ package com.cloud.api;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
@ -64,6 +65,7 @@ import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import com.cloud.agent.api.VgpuTypesInfo;
import com.cloud.api.query.dao.AccountJoinDao;
import com.cloud.api.query.dao.AffinityGroupJoinDao;
import com.cloud.api.query.dao.AsyncJobJoinDao;
@ -1101,6 +1103,14 @@ public class ApiDBUtils {
return s_hostGpuGroupsDao.listByHostId(hostId);
}
public static List<VgpuTypesInfo> getGpuCapacites(Long zoneId, Long podId, Long clusterId) {
return s_vgpuTypesDao.listGPUCapacities(zoneId, podId, clusterId);
}
public static HashMap<String, Long> getVgpuVmsCount(Long zoneId, Long podId, Long clusterId) {
return s_vmDao.countVgpuVMs(zoneId, podId, clusterId);
}
public static List<VGPUTypesVO> getVgpus(long groupId) {
return s_vgpuTypesDao.listByGroupId(groupId);
}

View File

@ -150,6 +150,7 @@ import org.apache.cloudstack.usage.Usage;
import org.apache.cloudstack.usage.UsageService;
import org.apache.cloudstack.usage.UsageTypes;
import com.cloud.agent.api.VgpuTypesInfo;
import com.cloud.api.query.ViewResponseHelper;
import com.cloud.api.query.vo.AccountJoinVO;
import com.cloud.api.query.vo.AsyncJobJoinVO;
@ -193,6 +194,7 @@ import com.cloud.domain.Domain;
import com.cloud.event.Event;
import com.cloud.exception.InvalidParameterValueException;
import com.cloud.exception.PermissionDeniedException;
import com.cloud.gpu.GPU;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.hypervisor.HypervisorCapabilities;
@ -1467,6 +1469,44 @@ public class ApiResponseHelper implements ResponseGenerator {
capacityResponses.add(capacityResponse);
}
List<VgpuTypesInfo> gpuCapacities;
if ((gpuCapacities = ApiDBUtils.getGpuCapacites(result.get(0).getDataCenterId(), result.get(0).getPodId(), result.get(0).getClusterId())) != null) {
HashMap<String, Long> vgpuVMs = ApiDBUtils.getVgpuVmsCount(result.get(0).getDataCenterId(), result.get(0).getPodId(), result.get(0).getClusterId());
float capacityUsed = 0;
long capacityMax = 0;
for (VgpuTypesInfo capacity : gpuCapacities) {
if (vgpuVMs.containsKey(capacity.getGroupName().concat(capacity.getModelName()))) {
capacityUsed += (float)vgpuVMs.get(capacity.getGroupName().concat(capacity.getModelName())) / capacity.getMaxVpuPerGpu();
}
if (capacity.getModelName().equals(GPU.vGPUType.passthrough.toString())) {
capacityMax += capacity.getMaxCapacity();
}
}
DataCenter zone = ApiDBUtils.findZoneById(result.get(0).getDataCenterId());
CapacityResponse capacityResponse = new CapacityResponse();
if (zone != null) {
capacityResponse.setZoneId(zone.getUuid());
capacityResponse.setZoneName(zone.getName());
}
if (result.get(0).getPodId() != null) {
HostPodVO pod = ApiDBUtils.findPodById(result.get(0).getPodId());
capacityResponse.setPodId(pod.getUuid());
capacityResponse.setPodName(pod.getName());
}
if (result.get(0).getClusterId() != null) {
ClusterVO cluster = ApiDBUtils.findClusterById(result.get(0).getClusterId());
capacityResponse.setClusterId(cluster.getUuid());
capacityResponse.setClusterName(cluster.getName());
}
capacityResponse.setCapacityType(Capacity.CAPACITY_TYPE_GPU);
capacityResponse.setCapacityUsed((long)Math.ceil(capacityUsed));
capacityResponse.setCapacityTotal(capacityMax);
capacityResponse.setPercentUsed(format.format(capacityUsed / capacityMax * 100f));
capacityResponse.setObjectName("capacity");
capacityResponses.add(capacityResponse);
}
return capacityResponses;
}

View File

@ -1104,6 +1104,8 @@ cloudStack.converters = {
return "VLAN";
case 18:
return "Secondary Storage VM";
case 19:
return "GPU";
}
},

View File

@ -383,6 +383,9 @@
},
7: {
name: _l('label.vlan')
},
19: {
name: _l('GPU')
}
};