server: double check host capacity when start/migrate a vm (#3728)

When start a vm or migrate a vm (away from a host in host maintenance), cloudstack will check capacity of all hosts and choose one. If there are hundreds of hosts on the platform, it will take some seconds. When cloudstack choose a host and start/migrate vm to it, the resource consumption of the host might have been changed. This normally happens when we start/migrate multiple vms.
It would be better to double check the host capacity when start vm on a host.

This PR includes the fix for cpucore capacity when start/migrate a vm.
This commit is contained in:
Wei Zhou 2020-01-28 06:25:11 +01:00 committed by Rohit Yadav
parent 71e53ab01d
commit 136505b22c
2 changed files with 65 additions and 3 deletions

View File

@ -2353,6 +2353,17 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
_networkMgr.rollbackNicForMigration(vmSrc, profile); _networkMgr.rollbackNicForMigration(vmSrc, profile);
s_logger.info("Migration cancelled because " + e1.getMessage()); s_logger.info("Migration cancelled because " + e1.getMessage());
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage()); throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
} catch (final CloudRuntimeException e2) {
_networkMgr.rollbackNicForMigration(vmSrc, profile);
s_logger.info("Migration cancelled because " + e2.getMessage());
work.setStep(Step.Done);
_workDao.update(work.getId(), work);
try {
stateTransitTo(vm, Event.OperationFailed, srcHostId);
} catch (final NoTransitionException e3) {
s_logger.warn(e3.getMessage());
}
throw new CloudRuntimeException("Migration cancelled because " + e2.getMessage());
} }
boolean migrated = false; boolean migrated = false;

View File

@ -172,6 +172,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
final ServiceOfferingVO svo = _offeringsDao.findById(vm.getId(), vm.getServiceOfferingId()); final ServiceOfferingVO svo = _offeringsDao.findById(vm.getId(), vm.getServiceOfferingId());
CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU); CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU);
CapacityVO capacityMemory = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY); CapacityVO capacityMemory = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY);
CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE);
Long clusterId = null; Long clusterId = null;
if (hostId != null) { if (hostId != null) {
HostVO host = _hostDao.findById(hostId); HostVO host = _hostDao.findById(hostId);
@ -182,7 +183,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
clusterId = host.getClusterId(); clusterId = host.getClusterId();
} }
if (capacityCpu == null || capacityMemory == null || svo == null) { if (capacityCpu == null || capacityMemory == null || svo == null || capacityCpuCore == null) {
return false; return false;
} }
@ -190,20 +191,26 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
final Long clusterIdFinal = clusterId; final Long clusterIdFinal = clusterId;
final long capacityCpuId = capacityCpu.getId(); final long capacityCpuId = capacityCpu.getId();
final long capacityMemoryId = capacityMemory.getId(); final long capacityMemoryId = capacityMemory.getId();
final long capacityCpuCoreId = capacityCpuCore.getId();
Transaction.execute(new TransactionCallbackNoReturn() { Transaction.execute(new TransactionCallbackNoReturn() {
@Override @Override
public void doInTransactionWithoutResult(TransactionStatus status) { public void doInTransactionWithoutResult(TransactionStatus status) {
CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true); CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true);
CapacityVO capacityMemory = _capacityDao.lockRow(capacityMemoryId, true); CapacityVO capacityMemory = _capacityDao.lockRow(capacityMemoryId, true);
CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true);
long usedCpu = capacityCpu.getUsedCapacity(); long usedCpu = capacityCpu.getUsedCapacity();
long usedMem = capacityMemory.getUsedCapacity(); long usedMem = capacityMemory.getUsedCapacity();
long usedCpuCore = capacityCpuCore.getUsedCapacity();
long reservedCpu = capacityCpu.getReservedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity();
long reservedMem = capacityMemory.getReservedCapacity(); long reservedMem = capacityMemory.getReservedCapacity();
long reservedCpuCore = capacityCpuCore.getReservedCapacity();
long actualTotalCpu = capacityCpu.getTotalCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity();
float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "cpuOvercommitRatio").getValue()); float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "cpuOvercommitRatio").getValue());
float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "memoryOvercommitRatio").getValue()); float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "memoryOvercommitRatio").getValue());
int vmCPU = svo.getCpu() * svo.getSpeed(); int vmCPU = svo.getCpu() * svo.getSpeed();
int vmCPUCore = svo.getCpu();
long vmMem = svo.getRamSize() * 1024L * 1024L; long vmMem = svo.getRamSize() * 1024L * 1024L;
long actualTotalMem = capacityMemory.getTotalCapacity(); long actualTotalMem = capacityMemory.getTotalCapacity();
long totalMem = (long)(actualTotalMem * memoryOvercommitRatio); long totalMem = (long)(actualTotalMem * memoryOvercommitRatio);
@ -221,6 +228,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
if (usedMem >= vmMem) { if (usedMem >= vmMem) {
capacityMemory.setUsedCapacity(usedMem - vmMem); capacityMemory.setUsedCapacity(usedMem - vmMem);
} }
if (usedCpuCore >= vmCPUCore) {
capacityCpuCore.setUsedCapacity(usedCpuCore - vmCPUCore);
}
if (moveToReservered) { if (moveToReservered) {
if (reservedCpu + vmCPU <= totalCpu) { if (reservedCpu + vmCPU <= totalCpu) {
@ -229,6 +239,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
if (reservedMem + vmMem <= totalMem) { if (reservedMem + vmMem <= totalMem) {
capacityMemory.setReservedCapacity(reservedMem + vmMem); capacityMemory.setReservedCapacity(reservedMem + vmMem);
} }
capacityCpuCore.setReservedCapacity(reservedCpuCore + vmCPUCore);
} }
} else { } else {
if (reservedCpu >= vmCPU) { if (reservedCpu >= vmCPU) {
@ -237,6 +248,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
if (reservedMem >= vmMem) { if (reservedMem >= vmMem) {
capacityMemory.setReservedCapacity(reservedMem - vmMem); capacityMemory.setReservedCapacity(reservedMem - vmMem);
} }
if (reservedCpuCore >= vmCPUCore) {
capacityCpuCore.setReservedCapacity(reservedCpuCore - vmCPUCore);
}
} }
s_logger.debug("release cpu from host: " + hostId + ", old used: " + usedCpu + ",reserved: " + reservedCpu + ", actual total: " + actualTotalCpu + s_logger.debug("release cpu from host: " + hostId + ", old used: " + usedCpu + ",reserved: " + reservedCpu + ", actual total: " + actualTotalCpu +
@ -249,6 +263,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
_capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityCpu.getId(), capacityCpu);
_capacityDao.update(capacityMemory.getId(), capacityMemory); _capacityDao.update(capacityMemory.getId(), capacityMemory);
_capacityDao.update(capacityCpuCore.getId(), capacityCpuCore);
} }
}); });
@ -263,8 +278,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
@Override @Override
public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) { public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) {
final long vmId = vm.getId();
final long hostId = vm.getHostId(); final long hostId = vm.getHostId();
HostVO host = _hostDao.findById(hostId); final HostVO host = _hostDao.findById(hostId);
final long clusterId = host.getClusterId(); final long clusterId = host.getClusterId();
final float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue()); final float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue());
final float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue()); final float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue());
@ -273,28 +289,35 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU); CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU);
CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY); CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY);
CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE);
if (capacityCpu == null || capacityMem == null || svo == null) { if (capacityCpu == null || capacityMem == null || svo == null || capacityCpuCore == null) {
return; return;
} }
final int cpu = svo.getCpu() * svo.getSpeed(); final int cpu = svo.getCpu() * svo.getSpeed();
final int cpucore = svo.getCpu();
final int cpuspeed = svo.getSpeed();
final long ram = svo.getRamSize() * 1024L * 1024L; final long ram = svo.getRamSize() * 1024L * 1024L;
try { try {
final long capacityCpuId = capacityCpu.getId(); final long capacityCpuId = capacityCpu.getId();
final long capacityMemId = capacityMem.getId(); final long capacityMemId = capacityMem.getId();
final long capacityCpuCoreId = capacityCpuCore.getId();
Transaction.execute(new TransactionCallbackNoReturn() { Transaction.execute(new TransactionCallbackNoReturn() {
@Override @Override
public void doInTransactionWithoutResult(TransactionStatus status) { public void doInTransactionWithoutResult(TransactionStatus status) {
CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true); CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true);
CapacityVO capacityMem = _capacityDao.lockRow(capacityMemId, true); CapacityVO capacityMem = _capacityDao.lockRow(capacityMemId, true);
CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true);
long usedCpu = capacityCpu.getUsedCapacity(); long usedCpu = capacityCpu.getUsedCapacity();
long usedMem = capacityMem.getUsedCapacity(); long usedMem = capacityMem.getUsedCapacity();
long usedCpuCore = capacityCpuCore.getUsedCapacity();
long reservedCpu = capacityCpu.getReservedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity();
long reservedMem = capacityMem.getReservedCapacity(); long reservedMem = capacityMem.getReservedCapacity();
long reservedCpuCore = capacityCpuCore.getReservedCapacity();
long actualTotalCpu = capacityCpu.getTotalCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity();
long actualTotalMem = capacityMem.getTotalCapacity(); long actualTotalMem = capacityMem.getTotalCapacity();
long totalCpu = (long)(actualTotalCpu * cpuOvercommitRatio); long totalCpu = (long)(actualTotalCpu * cpuOvercommitRatio);
@ -313,6 +336,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
} }
capacityCpu.setUsedCapacity(usedCpu + cpu); capacityCpu.setUsedCapacity(usedCpu + cpu);
capacityMem.setUsedCapacity(usedMem + ram); capacityMem.setUsedCapacity(usedMem + ram);
capacityCpuCore.setUsedCapacity(usedCpuCore + cpucore);
if (fromLastHost) { if (fromLastHost) {
/* alloc from reserved */ /* alloc from reserved */
@ -324,6 +348,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
if (reservedCpu >= cpu && reservedMem >= ram) { if (reservedCpu >= cpu && reservedMem >= ram) {
capacityCpu.setReservedCapacity(reservedCpu - cpu); capacityCpu.setReservedCapacity(reservedCpu - cpu);
capacityMem.setReservedCapacity(reservedMem - ram); capacityMem.setReservedCapacity(reservedMem - ram);
capacityCpuCore.setReservedCapacity(reservedCpuCore - cpucore);
} }
} else { } else {
/* alloc from free resource */ /* alloc from free resource */
@ -343,12 +368,38 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager,
totalMem + "; new used: " + capacityMem.getUsedCapacity() + ", reserved: " + capacityMem.getReservedCapacity() + "; requested mem: " + ram + totalMem + "; new used: " + capacityMem.getUsedCapacity() + ", reserved: " + capacityMem.getReservedCapacity() + "; requested mem: " + ram +
",alloc_from_last:" + fromLastHost); ",alloc_from_last:" + fromLastHost);
long cluster_id = host.getClusterId();
ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio");
ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio");
Float cpuOvercommitRatio = Float.parseFloat(cluster_detail_cpu.getValue());
Float memoryOvercommitRatio = Float.parseFloat(cluster_detail_ram.getValue());
boolean hostHasCpuCapability, hostHasCapacity = false;
hostHasCpuCapability = checkIfHostHasCpuCapability(host.getId(), cpucore, cpuspeed);
if (hostHasCpuCapability) {
// first check from reserved capacity
hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, true, cpuOvercommitRatio, memoryOvercommitRatio, true);
// if not reserved, check the free capacity
if (!hostHasCapacity)
hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, false, cpuOvercommitRatio, memoryOvercommitRatio, true);
}
if (!hostHasCapacity || !hostHasCpuCapability) {
throw new CloudRuntimeException("Host does not have enough capacity for vm " + vmId);
}
_capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityCpu.getId(), capacityCpu);
_capacityDao.update(capacityMem.getId(), capacityMem); _capacityDao.update(capacityMem.getId(), capacityMem);
_capacityDao.update(capacityCpuCore.getId(), capacityCpuCore);
} }
}); });
} catch (Exception e) { } catch (Exception e) {
s_logger.error("Exception allocating VM capacity", e); s_logger.error("Exception allocating VM capacity", e);
if (e instanceof CloudRuntimeException) {
throw e;
}
return; return;
} }
} }