From 136505b22cd5cefa9c04faeb2dc305f7d653cf69 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 28 Jan 2020 06:25:11 +0100 Subject: [PATCH] server: double check host capacity when start/migrate a vm (#3728) When start a vm or migrate a vm (away from a host in host maintenance), cloudstack will check capacity of all hosts and choose one. If there are hundreds of hosts on the platform, it will take some seconds. When cloudstack choose a host and start/migrate vm to it, the resource consumption of the host might have been changed. This normally happens when we start/migrate multiple vms. It would be better to double check the host capacity when start vm on a host. This PR includes the fix for cpucore capacity when start/migrate a vm. --- .../cloud/vm/VirtualMachineManagerImpl.java | 11 ++++ .../cloud/capacity/CapacityManagerImpl.java | 57 ++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 712b534e505..c2af76a0144 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -2353,6 +2353,17 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac _networkMgr.rollbackNicForMigration(vmSrc, profile); s_logger.info("Migration cancelled because " + e1.getMessage()); throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage()); + } catch (final CloudRuntimeException e2) { + _networkMgr.rollbackNicForMigration(vmSrc, profile); + s_logger.info("Migration cancelled because " + e2.getMessage()); + work.setStep(Step.Done); + _workDao.update(work.getId(), work); + try { + stateTransitTo(vm, Event.OperationFailed, srcHostId); + } catch (final NoTransitionException e3) { + s_logger.warn(e3.getMessage()); + } + throw new CloudRuntimeException("Migration cancelled because " + e2.getMessage()); } boolean migrated = false; diff --git a/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java b/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java index b0121ca612d..b3f3a625f67 100644 --- a/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java +++ b/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java @@ -172,6 +172,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, final ServiceOfferingVO svo = _offeringsDao.findById(vm.getId(), vm.getServiceOfferingId()); CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU); CapacityVO capacityMemory = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY); + CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE); Long clusterId = null; if (hostId != null) { HostVO host = _hostDao.findById(hostId); @@ -182,7 +183,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, clusterId = host.getClusterId(); } - if (capacityCpu == null || capacityMemory == null || svo == null) { + if (capacityCpu == null || capacityMemory == null || svo == null || capacityCpuCore == null) { return false; } @@ -190,20 +191,26 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, final Long clusterIdFinal = clusterId; final long capacityCpuId = capacityCpu.getId(); final long capacityMemoryId = capacityMemory.getId(); + final long capacityCpuCoreId = capacityCpuCore.getId(); + Transaction.execute(new TransactionCallbackNoReturn() { @Override public void doInTransactionWithoutResult(TransactionStatus status) { CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true); CapacityVO capacityMemory = _capacityDao.lockRow(capacityMemoryId, true); + CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true); long usedCpu = capacityCpu.getUsedCapacity(); long usedMem = capacityMemory.getUsedCapacity(); + long usedCpuCore = capacityCpuCore.getUsedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity(); long reservedMem = capacityMemory.getReservedCapacity(); + long reservedCpuCore = capacityCpuCore.getReservedCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity(); float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "cpuOvercommitRatio").getValue()); float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "memoryOvercommitRatio").getValue()); int vmCPU = svo.getCpu() * svo.getSpeed(); + int vmCPUCore = svo.getCpu(); long vmMem = svo.getRamSize() * 1024L * 1024L; long actualTotalMem = capacityMemory.getTotalCapacity(); long totalMem = (long)(actualTotalMem * memoryOvercommitRatio); @@ -221,6 +228,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (usedMem >= vmMem) { capacityMemory.setUsedCapacity(usedMem - vmMem); } + if (usedCpuCore >= vmCPUCore) { + capacityCpuCore.setUsedCapacity(usedCpuCore - vmCPUCore); + } if (moveToReservered) { if (reservedCpu + vmCPU <= totalCpu) { @@ -229,6 +239,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (reservedMem + vmMem <= totalMem) { capacityMemory.setReservedCapacity(reservedMem + vmMem); } + capacityCpuCore.setReservedCapacity(reservedCpuCore + vmCPUCore); } } else { if (reservedCpu >= vmCPU) { @@ -237,6 +248,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (reservedMem >= vmMem) { capacityMemory.setReservedCapacity(reservedMem - vmMem); } + if (reservedCpuCore >= vmCPUCore) { + capacityCpuCore.setReservedCapacity(reservedCpuCore - vmCPUCore); + } } s_logger.debug("release cpu from host: " + hostId + ", old used: " + usedCpu + ",reserved: " + reservedCpu + ", actual total: " + actualTotalCpu + @@ -249,6 +263,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, _capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityMemory.getId(), capacityMemory); + _capacityDao.update(capacityCpuCore.getId(), capacityCpuCore); } }); @@ -263,8 +278,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, @Override public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) { + final long vmId = vm.getId(); final long hostId = vm.getHostId(); - HostVO host = _hostDao.findById(hostId); + final HostVO host = _hostDao.findById(hostId); final long clusterId = host.getClusterId(); final float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue()); final float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue()); @@ -273,28 +289,35 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU); CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY); + CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE); - if (capacityCpu == null || capacityMem == null || svo == null) { + if (capacityCpu == null || capacityMem == null || svo == null || capacityCpuCore == null) { return; } final int cpu = svo.getCpu() * svo.getSpeed(); + final int cpucore = svo.getCpu(); + final int cpuspeed = svo.getSpeed(); final long ram = svo.getRamSize() * 1024L * 1024L; try { final long capacityCpuId = capacityCpu.getId(); final long capacityMemId = capacityMem.getId(); + final long capacityCpuCoreId = capacityCpuCore.getId(); Transaction.execute(new TransactionCallbackNoReturn() { @Override public void doInTransactionWithoutResult(TransactionStatus status) { CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true); CapacityVO capacityMem = _capacityDao.lockRow(capacityMemId, true); + CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true); long usedCpu = capacityCpu.getUsedCapacity(); long usedMem = capacityMem.getUsedCapacity(); + long usedCpuCore = capacityCpuCore.getUsedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity(); long reservedMem = capacityMem.getReservedCapacity(); + long reservedCpuCore = capacityCpuCore.getReservedCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity(); long actualTotalMem = capacityMem.getTotalCapacity(); long totalCpu = (long)(actualTotalCpu * cpuOvercommitRatio); @@ -313,6 +336,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, } capacityCpu.setUsedCapacity(usedCpu + cpu); capacityMem.setUsedCapacity(usedMem + ram); + capacityCpuCore.setUsedCapacity(usedCpuCore + cpucore); if (fromLastHost) { /* alloc from reserved */ @@ -324,6 +348,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (reservedCpu >= cpu && reservedMem >= ram) { capacityCpu.setReservedCapacity(reservedCpu - cpu); capacityMem.setReservedCapacity(reservedMem - ram); + capacityCpuCore.setReservedCapacity(reservedCpuCore - cpucore); } } else { /* alloc from free resource */ @@ -343,12 +368,38 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, totalMem + "; new used: " + capacityMem.getUsedCapacity() + ", reserved: " + capacityMem.getReservedCapacity() + "; requested mem: " + ram + ",alloc_from_last:" + fromLastHost); + long cluster_id = host.getClusterId(); + ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio"); + ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio"); + Float cpuOvercommitRatio = Float.parseFloat(cluster_detail_cpu.getValue()); + Float memoryOvercommitRatio = Float.parseFloat(cluster_detail_ram.getValue()); + + boolean hostHasCpuCapability, hostHasCapacity = false; + hostHasCpuCapability = checkIfHostHasCpuCapability(host.getId(), cpucore, cpuspeed); + + if (hostHasCpuCapability) { + // first check from reserved capacity + hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, true, cpuOvercommitRatio, memoryOvercommitRatio, true); + + // if not reserved, check the free capacity + if (!hostHasCapacity) + hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, false, cpuOvercommitRatio, memoryOvercommitRatio, true); + } + + if (!hostHasCapacity || !hostHasCpuCapability) { + throw new CloudRuntimeException("Host does not have enough capacity for vm " + vmId); + } + _capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityMem.getId(), capacityMem); + _capacityDao.update(capacityCpuCore.getId(), capacityCpuCore); } }); } catch (Exception e) { s_logger.error("Exception allocating VM capacity", e); + if (e instanceof CloudRuntimeException) { + throw e; + } return; } }