From ff1c6e78f457f346bf888491e34c5216d0f7d9d6 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 28 Jan 2020 06:05:59 +0100 Subject: [PATCH 1/5] router: Set up metadata/password/dhcp server on gateway IP instead of guest IP in RVR (#3477) When we create a vm in the network with redundant VRs, the lease file in the vm (for example /var/lib/dhcp/dhclient.eth0.leases) shows the dhcp-server-identifier is the guest ip (not vip/gateway) of master VR. That's the ip ipaddress where the vm fetch password and metadata from. if we stop the master VR (then backup will be master) or restart the network with cleanup (VRs will be created), the guest ip of master VR changes so vm are not able to get metadata/ssh-key using the ips in dhcp lease file. Setting up metadata/password/dhcp server on gateway instead of guest IP in redundant VRs will fix the issues. FIxes #3409 --- systemvm/debian/etc/apache2/vhost.template | 10 +++--- systemvm/debian/opt/cloud/bin/configure.py | 21 +++++++++--- systemvm/debian/opt/cloud/bin/cs/CsAddress.py | 4 +++ systemvm/debian/opt/cloud/bin/cs/CsApp.py | 15 ++++++--- systemvm/debian/opt/cloud/bin/cs/CsDhcp.py | 32 +++++++++++++------ systemvm/debian/opt/cloud/bin/cs/CsFile.py | 14 ++++++++ systemvm/debian/opt/cloud/bin/cs/CsHelper.py | 2 +- .../debian/opt/cloud/bin/cs/CsRedundant.py | 14 +++----- .../debian/opt/cloud/bin/passwd_server_ip.py | 11 +++++-- .../debian/opt/cloud/bin/setup/secstorage.sh | 8 ++--- 10 files changed, 91 insertions(+), 40 deletions(-) diff --git a/systemvm/debian/etc/apache2/vhost.template b/systemvm/debian/etc/apache2/vhost.template index 688239cd8c0..0226bb44eee 100644 --- a/systemvm/debian/etc/apache2/vhost.template +++ b/systemvm/debian/etc/apache2/vhost.template @@ -1,4 +1,4 @@ - + ServerAdmin webmaster@localhost DocumentRoot /var/www/html @@ -42,7 +42,7 @@ - + ServerAdmin webmaster@localhost DocumentRoot /var/www/html @@ -227,14 +227,14 @@ # Debian etch). See /usr/share/doc/apache2.2-common/NEWS.Debian.gz and # README.Debian.gz -Listen 10.1.1.1:80 +Listen 10.1.1.1:8180 # Server Name Indication for SSL named virtual hosts is currently not # supported by MSIE on Windows XP. - Listen 10.1.1.1:443 + Listen 10.1.1.1:8443 - Listen 10.1.1.1:443 + Listen 10.1.1.1:8443 diff --git a/systemvm/debian/opt/cloud/bin/configure.py b/systemvm/debian/opt/cloud/bin/configure.py index 1e0c057a871..cd0ef350ecb 100755 --- a/systemvm/debian/opt/cloud/bin/configure.py +++ b/systemvm/debian/opt/cloud/bin/configure.py @@ -58,11 +58,22 @@ class CsPassword(CsDataBag): except IOError: logging.debug("File %s does not exist" % self.TOKEN_FILE) - ips_cmd = "ip addr show | grep inet | awk '{print $2}'" - ips = CsHelper.execute(ips_cmd) - for ip in ips: - server_ip = ip.split('/')[0] - proc = CsProcess(['/opt/cloud/bin/passwd_server_ip.py', server_ip]) + server_ip = None + guest_ip = None + for interface in self.config.address().get_interfaces(): + if interface.ip_in_subnet(vm_ip): + if self.config.cl.is_redundant(): + server_ip = interface.get_gateway() + guest_ip = interface.get_ip() + else: + server_ip = interface.get_ip() + break + + if server_ip is not None: + if guest_ip is None: + proc = CsProcess(['/opt/cloud/bin/passwd_server_ip.py', server_ip]) + else: + proc = CsProcess(['/opt/cloud/bin/passwd_server_ip.py', server_ip + "," + guest_ip]) if proc.find(): url = "http://%s:8080/" % server_ip payload = {"ip": vm_ip, "password": password, "token": token} diff --git a/systemvm/debian/opt/cloud/bin/cs/CsAddress.py b/systemvm/debian/opt/cloud/bin/cs/CsAddress.py index 8e678251fe3..ba9ee082740 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsAddress.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsAddress.py @@ -661,6 +661,10 @@ class CsIP: if not found: self.delete(ip) + def get_gateway(self): + interface = CsInterface(self.address, self.config) + return interface.get_gateway() + def is_guest_gateway(self, bag, ip): """ Exclude the vrrp maintained addresses on a redundant router """ interface = CsInterface(bag, self.config) diff --git a/systemvm/debian/opt/cloud/bin/cs/CsApp.py b/systemvm/debian/opt/cloud/bin/cs/CsApp.py index 575ab2ac5b2..9f3375f7980 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsApp.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsApp.py @@ -25,6 +25,7 @@ class CsApp: def __init__(self, ip): self.dev = ip.getDevice() self.ip = ip.get_ip_address() + self.gateway = ip.get_gateway() self.type = ip.get_type() self.fw = ip.fw self.config = ip.config @@ -44,10 +45,16 @@ class CsApache(CsApp): "/etc/apache2/sites-enabled/vhost-%s.conf" % self.ip) file = CsFile("/etc/apache2/sites-enabled/vhost-%s.conf" % (self.ip)) - file.search("", "\t" % (self.ip)) - file.search("", "\t" % (self.ip)) - file.search("Listen .*:80", "Listen %s:80" % (self.ip)) - file.search("Listen .*:443", "Listen %s:443" % (self.ip)) + if not self.config.cl.is_redundant(): + file.replaceIfFound("", "" % (self.ip)) + file.replaceIfFound("", "\t" % (self.ip)) + file.replaceIfFound("Listen .*:8180", "Listen %s:80" % (self.ip)) + file.replaceIfFound("Listen .*:8443", "Listen %s:443" % (self.ip)) + else: + file.replaceIfFound("", "" % (self.ip, self.gateway)) + file.replaceIfFound("", "\t" % (self.ip, self.gateway)) + file.replaceIfFound("Listen .*:8180", "Listen %s:80\nListen %s:80" % (self.ip, self.gateway)) + file.replaceIfFound("Listen .*:8443", "Listen %s:443\nListen %s:443" % (self.ip, self.gateway)) file.search("ServerName.*", "\tServerName %s.%s" % (self.config.cl.get_type(), self.config.get_domain())) if file.is_changed(): file.commit() diff --git a/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py b/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py index 72c5daec0cc..d0b40e52de5 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsDhcp.py @@ -77,15 +77,25 @@ class CsDhcp(CsDataBag): def configure_server(self): # self.conf.addeq("dhcp-hostsfile=%s" % DHCP_HOSTS) idx = 0 + listen_address = ["127.0.0.1"] for i in self.devinfo: if not i['dnsmasq']: continue device = i['dev'] ip = i['ip'].split('/')[0] - sline = "dhcp-range=set:interface-%s-%s" % (device, idx) - line = "dhcp-range=set:interface-%s-%s,%s,static" % (device, idx, ip) - self.conf.search(sline, line) gn = CsGuestNetwork(device, self.config) + # Gateway + gateway = '' + if self.config.is_vpc(): + gateway = gn.get_gateway() + else: + gateway = i['gateway'] + sline = "dhcp-range=set:interface-%s-%s" % (device, idx) + if self.cl.is_redundant(): + line = "dhcp-range=set:interface-%s-%s,%s,static" % (device, idx, gateway) + else: + line = "dhcp-range=set:interface-%s-%s,%s,static" % (device, idx, ip) + self.conf.search(sline, line) sline = "dhcp-option=tag:interface-%s-%s,15" % (device, idx) line = "dhcp-option=tag:interface-%s-%s,15,%s" % (device, idx, gn.get_domain()) self.conf.search(sline, line) @@ -95,12 +105,6 @@ class CsDhcp(CsDataBag): dns_list = [x for x in gn.get_dns() if x] line = "dhcp-option=tag:interface-%s-%s,6,%s" % (device, idx, ','.join(dns_list)) self.conf.search(sline, line) - # Gateway - gateway = '' - if self.config.is_vpc(): - gateway = gn.get_gateway() - else: - gateway = i['gateway'] if gateway != '0.0.0.0': sline = "dhcp-option=tag:interface-%s-%s,3," % (device, idx) line = "dhcp-option=tag:interface-%s-%s,3,%s" % (device, idx, gateway) @@ -114,8 +118,18 @@ class CsDhcp(CsDataBag): sline = "dhcp-option=tag:interface-%s-%s,1," % (device, idx) line = "dhcp-option=tag:interface-%s-%s,1,%s" % (device, idx, netmask) self.conf.search(sline, line) + # Listen Address + if self.cl.is_redundant(): + listen_address.append(gateway) + else: + listen_address.append(ip) idx += 1 + # Listen Address + sline = "listen-address=" + line = "listen-address=%s" % (','.join(listen_address)) + self.conf.search(sline, line) + def delete_leases(self): macs_dhcphosts = [] try: diff --git a/systemvm/debian/opt/cloud/bin/cs/CsFile.py b/systemvm/debian/opt/cloud/bin/cs/CsFile.py index f3b2a272631..2ee631a89d6 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsFile.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsFile.py @@ -116,6 +116,20 @@ class CsFile: logging.debug("Searching for %s and replacing with %s" % (search, replace)) self.new_config = [w.replace(search, replace) for w in self.new_config] + def replaceIfFound(self, search, replace): + found = False + replace_filtered = replace + if re.search("PSK \"", replace): + replace_filtered = re.sub(r'".*"', '"****"', replace) + logging.debug("Searching for %s and replacing with %s if found" % (search, replace_filtered)) + for index, line in enumerate(self.new_config): + if line.lstrip().startswith("#"): + continue + if re.search(search, line): + if replace not in line: + self.new_config[index] = replace + "\n" + return False + def search(self, search, replace): found = False replace_filtered = replace diff --git a/systemvm/debian/opt/cloud/bin/cs/CsHelper.py b/systemvm/debian/opt/cloud/bin/cs/CsHelper.py index 278e1e6544f..e29891796b8 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsHelper.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsHelper.py @@ -113,7 +113,7 @@ def bool_to_yn(val): def get_device_info(): """ Returns all devices on system with their ipv4 ip netmask """ list = [] - for i in execute("ip addr show"): + for i in execute("ip addr show |grep -v secondary"): vals = i.strip().lstrip().rstrip().split() if vals[0] == "inet": to = {} diff --git a/systemvm/debian/opt/cloud/bin/cs/CsRedundant.py b/systemvm/debian/opt/cloud/bin/cs/CsRedundant.py index 25a4a1a9438..441723b29a6 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsRedundant.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsRedundant.py @@ -245,8 +245,7 @@ class CsRedundant(object): interfaces = [interface for interface in self.address.get_interfaces() if interface.needs_vrrp()] for interface in interfaces: - CsPasswdSvc(interface.get_ip()).stop() - CsPasswdSvc(interface.get_gateway()).stop() + CsPasswdSvc(interface.get_gateway() + "," + interface.get_ip()).stop() self.cl.set_fault_state() self.cl.save() @@ -282,8 +281,7 @@ class CsRedundant(object): interfaces = [interface for interface in self.address.get_interfaces() if interface.needs_vrrp()] for interface in interfaces: - CsPasswdSvc(interface.get_ip()).stop() - CsPasswdSvc(interface.get_gateway()).stop() + CsPasswdSvc(interface.get_gateway() + "," + interface.get_ip()).stop() CsHelper.service("dnsmasq", "stop") @@ -341,8 +339,7 @@ class CsRedundant(object): interfaces = [interface for interface in self.address.get_interfaces() if interface.needs_vrrp()] for interface in interfaces: - CsPasswdSvc(interface.get_ip()).restart() - CsPasswdSvc(interface.get_gateway()).restart() + CsPasswdSvc(interface.get_gateway() + "," + interface.get_ip()).restart() CsHelper.service("dnsmasq", "restart") self.cl.set_master_state(True) @@ -408,9 +405,6 @@ class CsRedundant(object): cmdline = self.config.get_cmdline_instance() if not interface.is_added(): continue - if cmdline.get_type() == 'router': - str = " %s brd %s dev %s\n" % (cmdline.get_guest_gw(), interface.get_broadcast(), interface.get_device()) - else: - str = " %s brd %s dev %s\n" % (interface.get_gateway_cidr(), interface.get_broadcast(), interface.get_device()) + str = " %s brd %s dev %s\n" % (interface.get_gateway_cidr(), interface.get_broadcast(), interface.get_device()) lines.append(str) return lines diff --git a/systemvm/debian/opt/cloud/bin/passwd_server_ip.py b/systemvm/debian/opt/cloud/bin/passwd_server_ip.py index fc84910a117..85d76f65166 100755 --- a/systemvm/debian/opt/cloud/bin/passwd_server_ip.py +++ b/systemvm/debian/opt/cloud/bin/passwd_server_ip.py @@ -40,6 +40,7 @@ from SocketServer import ThreadingMixIn #, ForkingMixIn passMap = {} secureToken = None listeningAddress = '127.0.0.1' +allowAddresses = ['localhost', '127.0.0.1'] lock = threading.RLock() def getTokenFile(): @@ -139,7 +140,7 @@ class PasswordRequestHandler(BaseHTTPRequestHandler): self.send_response(200) self.end_headers() clientAddress = self.client_address[0] - if clientAddress not in ['localhost', '127.0.0.1', listeningAddress]: + if clientAddress not in allowAddresses: syslog.syslog('serve_password: non-localhost IP trying to save password: %s' % clientAddress) self.send_response(403) return @@ -170,8 +171,14 @@ def serve(HandlerClass = PasswordRequestHandler, ServerClass = ThreadedHTTPServer): global listeningAddress + global allowAddresses if len(sys.argv) > 1: - listeningAddress = sys.argv[1] + addresses = sys.argv[1].split(",") + if len(addresses) > 0: + listeningAddress = addresses[0] + allowAddresses.append(addresses[0]) + if len(addresses) > 1: + allowAddresses.append(addresses[1]) server_address = (listeningAddress, 8080) passwordServer = ServerClass(server_address, HandlerClass) diff --git a/systemvm/debian/opt/cloud/bin/setup/secstorage.sh b/systemvm/debian/opt/cloud/bin/setup/secstorage.sh index d3a6d21cf65..13ed5c5d0ae 100755 --- a/systemvm/debian/opt/cloud/bin/setup/secstorage.sh +++ b/systemvm/debian/opt/cloud/bin/setup/secstorage.sh @@ -49,10 +49,10 @@ setup_secstorage() { setup_apache2 $ETH2_IP # Deprecated, should move to Cs Python all of it - sed -e "s///" \ - -e "s///" \ - -e "s/Listen .*:80/Listen $ETH2_IP:80/g" \ - -e "s/Listen .*:443/Listen $ETH2_IP:443/g" /etc/apache2/vhost.template > /etc/apache2/sites-enabled/vhost-${ETH2_IP}.conf + sed -e "s///" \ + -e "s///" \ + -e "s/Listen .*:8180/Listen $ETH2_IP:80/g" \ + -e "s/Listen .*:8443/Listen $ETH2_IP:443/g" /etc/apache2/vhost.template > /etc/apache2/sites-enabled/vhost-${ETH2_IP}.conf log_it "Setting up apache2 for post upload of volume/template" a2enmod proxy From 71e53ab01d612de99f759e2b34611bbdf731d766 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 28 Jan 2020 06:24:32 +0100 Subject: [PATCH 2/5] server: Capacity check should take vms in Migrating state into calculation (#3727) When we calculate a resource consumption of a host, we need to take the vms in following states into calculation: Running, Starting, Stopping, Migrating (to the host), and vms are Migrating from the host. Because, when stop a vm, the resource on host will be released when vm is stopped. When migrate a vm, the resource on destination host will be increased before migration starts, and resource on source host will be decreased after migraiton succeeds. In cloudstack, there is a task named CapacityChecked which run every 5 minutes (capacity.check.period =300000 ms by default). It recalculates capacity of all hosts. However, it takes only vms in Running and Starting into consideration. We have faced some issues in host maintenance due to it. Steps to reproduce the issue (1) migrate N vms from host A to host B, cpu/ram resource increases before the migration. (2) capacity check recalculate the capacity of hosts. used capacity of Host B will be reset to original value (not including the vms in Migrating). (3) migrate some more vms from other host to host B, the migrations are allowed by cloudstack (because used capacity is incorrect). If the actual used memory exceed the physical memory on the host, there might be some critical issues (for example, libvirt dies) --- .../src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java | 2 +- .../main/java/com/cloud/capacity/CapacityManagerImpl.java | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java index 1945969f543..e4f5dba65ed 100755 --- a/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/vm/dao/VMInstanceDaoImpl.java @@ -416,7 +416,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase implem public List listUpByHostId(Long hostId) { SearchCriteria sc = HostUpSearch.create(); sc.setParameters("host", hostId); - sc.setParameters("states", new Object[] {State.Starting, State.Running}); + sc.setParameters("states", new Object[] {State.Starting, State.Running, State.Stopping, State.Migrating}); return listBy(sc); } diff --git a/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java b/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java index a7fee9603a0..b0121ca612d 100644 --- a/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java +++ b/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java @@ -590,6 +590,12 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, s_logger.debug("Found " + vms.size() + " VMs on host " + host.getId()); } + final List vosMigrating = _vmDao.listVmsMigratingFromHost(host.getId()); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found " + vosMigrating.size() + " VMs are Migrating from host " + host.getId()); + } + vms.addAll(vosMigrating); + ClusterVO cluster = _clusterDao.findById(host.getClusterId()); ClusterDetailsVO clusterDetailCpu = _clusterDetailsDao.findDetail(cluster.getId(), "cpuOvercommitRatio"); ClusterDetailsVO clusterDetailRam = _clusterDetailsDao.findDetail(cluster.getId(), "memoryOvercommitRatio"); From 136505b22cd5cefa9c04faeb2dc305f7d653cf69 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 28 Jan 2020 06:25:11 +0100 Subject: [PATCH 3/5] server: double check host capacity when start/migrate a vm (#3728) When start a vm or migrate a vm (away from a host in host maintenance), cloudstack will check capacity of all hosts and choose one. If there are hundreds of hosts on the platform, it will take some seconds. When cloudstack choose a host and start/migrate vm to it, the resource consumption of the host might have been changed. This normally happens when we start/migrate multiple vms. It would be better to double check the host capacity when start vm on a host. This PR includes the fix for cpucore capacity when start/migrate a vm. --- .../cloud/vm/VirtualMachineManagerImpl.java | 11 ++++ .../cloud/capacity/CapacityManagerImpl.java | 57 ++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 712b534e505..c2af76a0144 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -2353,6 +2353,17 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac _networkMgr.rollbackNicForMigration(vmSrc, profile); s_logger.info("Migration cancelled because " + e1.getMessage()); throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage()); + } catch (final CloudRuntimeException e2) { + _networkMgr.rollbackNicForMigration(vmSrc, profile); + s_logger.info("Migration cancelled because " + e2.getMessage()); + work.setStep(Step.Done); + _workDao.update(work.getId(), work); + try { + stateTransitTo(vm, Event.OperationFailed, srcHostId); + } catch (final NoTransitionException e3) { + s_logger.warn(e3.getMessage()); + } + throw new CloudRuntimeException("Migration cancelled because " + e2.getMessage()); } boolean migrated = false; diff --git a/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java b/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java index b0121ca612d..b3f3a625f67 100644 --- a/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java +++ b/server/src/main/java/com/cloud/capacity/CapacityManagerImpl.java @@ -172,6 +172,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, final ServiceOfferingVO svo = _offeringsDao.findById(vm.getId(), vm.getServiceOfferingId()); CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU); CapacityVO capacityMemory = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY); + CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE); Long clusterId = null; if (hostId != null) { HostVO host = _hostDao.findById(hostId); @@ -182,7 +183,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, clusterId = host.getClusterId(); } - if (capacityCpu == null || capacityMemory == null || svo == null) { + if (capacityCpu == null || capacityMemory == null || svo == null || capacityCpuCore == null) { return false; } @@ -190,20 +191,26 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, final Long clusterIdFinal = clusterId; final long capacityCpuId = capacityCpu.getId(); final long capacityMemoryId = capacityMemory.getId(); + final long capacityCpuCoreId = capacityCpuCore.getId(); + Transaction.execute(new TransactionCallbackNoReturn() { @Override public void doInTransactionWithoutResult(TransactionStatus status) { CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true); CapacityVO capacityMemory = _capacityDao.lockRow(capacityMemoryId, true); + CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true); long usedCpu = capacityCpu.getUsedCapacity(); long usedMem = capacityMemory.getUsedCapacity(); + long usedCpuCore = capacityCpuCore.getUsedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity(); long reservedMem = capacityMemory.getReservedCapacity(); + long reservedCpuCore = capacityCpuCore.getReservedCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity(); float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "cpuOvercommitRatio").getValue()); float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterIdFinal, "memoryOvercommitRatio").getValue()); int vmCPU = svo.getCpu() * svo.getSpeed(); + int vmCPUCore = svo.getCpu(); long vmMem = svo.getRamSize() * 1024L * 1024L; long actualTotalMem = capacityMemory.getTotalCapacity(); long totalMem = (long)(actualTotalMem * memoryOvercommitRatio); @@ -221,6 +228,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (usedMem >= vmMem) { capacityMemory.setUsedCapacity(usedMem - vmMem); } + if (usedCpuCore >= vmCPUCore) { + capacityCpuCore.setUsedCapacity(usedCpuCore - vmCPUCore); + } if (moveToReservered) { if (reservedCpu + vmCPU <= totalCpu) { @@ -229,6 +239,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (reservedMem + vmMem <= totalMem) { capacityMemory.setReservedCapacity(reservedMem + vmMem); } + capacityCpuCore.setReservedCapacity(reservedCpuCore + vmCPUCore); } } else { if (reservedCpu >= vmCPU) { @@ -237,6 +248,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (reservedMem >= vmMem) { capacityMemory.setReservedCapacity(reservedMem - vmMem); } + if (reservedCpuCore >= vmCPUCore) { + capacityCpuCore.setReservedCapacity(reservedCpuCore - vmCPUCore); + } } s_logger.debug("release cpu from host: " + hostId + ", old used: " + usedCpu + ",reserved: " + reservedCpu + ", actual total: " + actualTotalCpu + @@ -249,6 +263,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, _capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityMemory.getId(), capacityMemory); + _capacityDao.update(capacityCpuCore.getId(), capacityCpuCore); } }); @@ -263,8 +278,9 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, @Override public void allocateVmCapacity(VirtualMachine vm, final boolean fromLastHost) { + final long vmId = vm.getId(); final long hostId = vm.getHostId(); - HostVO host = _hostDao.findById(hostId); + final HostVO host = _hostDao.findById(hostId); final long clusterId = host.getClusterId(); final float cpuOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "cpuOvercommitRatio").getValue()); final float memoryOvercommitRatio = Float.parseFloat(_clusterDetailsDao.findDetail(clusterId, "memoryOvercommitRatio").getValue()); @@ -273,28 +289,35 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, CapacityVO capacityCpu = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU); CapacityVO capacityMem = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_MEMORY); + CapacityVO capacityCpuCore = _capacityDao.findByHostIdType(hostId, Capacity.CAPACITY_TYPE_CPU_CORE); - if (capacityCpu == null || capacityMem == null || svo == null) { + if (capacityCpu == null || capacityMem == null || svo == null || capacityCpuCore == null) { return; } final int cpu = svo.getCpu() * svo.getSpeed(); + final int cpucore = svo.getCpu(); + final int cpuspeed = svo.getSpeed(); final long ram = svo.getRamSize() * 1024L * 1024L; try { final long capacityCpuId = capacityCpu.getId(); final long capacityMemId = capacityMem.getId(); + final long capacityCpuCoreId = capacityCpuCore.getId(); Transaction.execute(new TransactionCallbackNoReturn() { @Override public void doInTransactionWithoutResult(TransactionStatus status) { CapacityVO capacityCpu = _capacityDao.lockRow(capacityCpuId, true); CapacityVO capacityMem = _capacityDao.lockRow(capacityMemId, true); + CapacityVO capacityCpuCore = _capacityDao.lockRow(capacityCpuCoreId, true); long usedCpu = capacityCpu.getUsedCapacity(); long usedMem = capacityMem.getUsedCapacity(); + long usedCpuCore = capacityCpuCore.getUsedCapacity(); long reservedCpu = capacityCpu.getReservedCapacity(); long reservedMem = capacityMem.getReservedCapacity(); + long reservedCpuCore = capacityCpuCore.getReservedCapacity(); long actualTotalCpu = capacityCpu.getTotalCapacity(); long actualTotalMem = capacityMem.getTotalCapacity(); long totalCpu = (long)(actualTotalCpu * cpuOvercommitRatio); @@ -313,6 +336,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, } capacityCpu.setUsedCapacity(usedCpu + cpu); capacityMem.setUsedCapacity(usedMem + ram); + capacityCpuCore.setUsedCapacity(usedCpuCore + cpucore); if (fromLastHost) { /* alloc from reserved */ @@ -324,6 +348,7 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, if (reservedCpu >= cpu && reservedMem >= ram) { capacityCpu.setReservedCapacity(reservedCpu - cpu); capacityMem.setReservedCapacity(reservedMem - ram); + capacityCpuCore.setReservedCapacity(reservedCpuCore - cpucore); } } else { /* alloc from free resource */ @@ -343,12 +368,38 @@ public class CapacityManagerImpl extends ManagerBase implements CapacityManager, totalMem + "; new used: " + capacityMem.getUsedCapacity() + ", reserved: " + capacityMem.getReservedCapacity() + "; requested mem: " + ram + ",alloc_from_last:" + fromLastHost); + long cluster_id = host.getClusterId(); + ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio"); + ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio"); + Float cpuOvercommitRatio = Float.parseFloat(cluster_detail_cpu.getValue()); + Float memoryOvercommitRatio = Float.parseFloat(cluster_detail_ram.getValue()); + + boolean hostHasCpuCapability, hostHasCapacity = false; + hostHasCpuCapability = checkIfHostHasCpuCapability(host.getId(), cpucore, cpuspeed); + + if (hostHasCpuCapability) { + // first check from reserved capacity + hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, true, cpuOvercommitRatio, memoryOvercommitRatio, true); + + // if not reserved, check the free capacity + if (!hostHasCapacity) + hostHasCapacity = checkIfHostHasCapacity(host.getId(), cpu, ram, false, cpuOvercommitRatio, memoryOvercommitRatio, true); + } + + if (!hostHasCapacity || !hostHasCpuCapability) { + throw new CloudRuntimeException("Host does not have enough capacity for vm " + vmId); + } + _capacityDao.update(capacityCpu.getId(), capacityCpu); _capacityDao.update(capacityMem.getId(), capacityMem); + _capacityDao.update(capacityCpuCore.getId(), capacityCpuCore); } }); } catch (Exception e) { s_logger.error("Exception allocating VM capacity", e); + if (e instanceof CloudRuntimeException) { + throw e; + } return; } } From a77d74ba0d3b82c5e191a11542cd9ad6ef827549 Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 28 Jan 2020 06:34:26 +0100 Subject: [PATCH 4/5] server: Fix NPE while update displayvm on vm with dynamic service offering (#3758) Steps to reproduce the issue (1) create a custom service offering (2) create a vm with the offering (3) update vm with displayvm=false, returns an error (local) > update virtualmachine id=f33fd06a-7643-40d1-833f-272845d9ba09 displayvm=false Error 530: {"updatevirtualmachineresponse":{"uuidList":[],"errorcode":530,"cserrorcode":9999}} --- server/src/main/java/com/cloud/vm/UserVmManagerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java index e21047fb510..270edceb92c 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java @@ -2520,7 +2520,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir vmInstance.setDisplayVm(isDisplayVm); // Resource limit changes - ServiceOffering offering = _serviceOfferingDao.findByIdIncludingRemoved(vmInstance.getServiceOfferingId()); + ServiceOffering offering = _serviceOfferingDao.findByIdIncludingRemoved(vmInstance.getId(), vmInstance.getServiceOfferingId()); _resourceLimitMgr.changeResourceCount(vmInstance.getAccountId(), ResourceType.user_vm, isDisplayVm); _resourceLimitMgr.changeResourceCount(vmInstance.getAccountId(), ResourceType.cpu, isDisplayVm, new Long(offering.getCpu())); _resourceLimitMgr.changeResourceCount(vmInstance.getAccountId(), ResourceType.memory, isDisplayVm, new Long(offering.getRamSize())); From 8792070f84db8357ea8245bb080d88a0ad010e1c Mon Sep 17 00:00:00 2001 From: Gregor Riepl Date: Tue, 28 Jan 2020 06:35:15 +0100 Subject: [PATCH 5/5] Rethrow takeVMSnapshot() exception instead of returning null in VMSnapshotManagerImpl (#3761) Fixes: #3518 --- .../main/java/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java b/server/src/main/java/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java index e2cae0651eb..86357f7700f 100644 --- a/server/src/main/java/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java @@ -515,7 +515,7 @@ public class VMSnapshotManagerImpl extends MutualExclusiveIdsManagerBase impleme return snapshot; } catch (Exception e) { s_logger.debug("Failed to create vm snapshot: " + vmSnapshotId, e); - return null; + throw new CloudRuntimeException("Failed to create vm snapshot: " + vmSnapshotId, e); } }