From adce18b2c05dc84bb6bcb8c2d76b349d2d97021c Mon Sep 17 00:00:00 2001 From: Kris McQueen Date: Mon, 23 Aug 2010 13:41:03 -0700 Subject: [PATCH 1/3] [merge from 2.1.x] Bug 5781: Only grab capacity global lock when accessing capacity tables. Once the queries are done, release the lock so that others can query for the data or update the data as needed. --- .../impl/UserConcentratedAllocator.java | 63 +++--- .../src/com/cloud/alert/AlertManagerImpl.java | 207 +++++++++--------- 2 files changed, 139 insertions(+), 131 deletions(-) diff --git a/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java b/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java index 29ba8bc511c..ef407afedd3 100755 --- a/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java +++ b/server/src/com/cloud/agent/manager/allocator/impl/UserConcentratedAllocator.java @@ -154,47 +154,23 @@ public class UserConcentratedAllocator implements PodAllocator { } private boolean dataCenterAndPodHasEnoughCapacity(long dataCenterId, long podId, long capacityNeeded, short capacityType, long[] hostCandidate) { + List capacities = null; if (m_capacityCheckLock.lock(120)) { // 2 minutes try { SearchCriteria sc = _capacityDao.createSearchCriteria(); sc.addAnd("capacityType", SearchCriteria.Op.EQ, capacityType); sc.addAnd("dataCenterId", SearchCriteria.Op.EQ, dataCenterId); sc.addAnd("podId", SearchCriteria.Op.EQ, podId); - List capacities = _capacityDao.search(sc, null); - boolean enoughCapacity = false; - if (capacities != null) { - for (CapacityVO capacity : capacities) { - if(capacityType == CapacityVO.CAPACITY_TYPE_CPU || capacityType == CapacityVO.CAPACITY_TYPE_MEMORY) { - // - // for CPU/Memory, we now switch to static allocation - // - if ((capacity.getTotalCapacity() - - calcHostAllocatedCpuMemoryCapacity(capacity.getHostOrPoolId(), capacityType)) >= capacityNeeded) { - - hostCandidate[0] = capacity.getHostOrPoolId(); - enoughCapacity = true; - break; - } - } else { - if ((capacity.getTotalCapacity() - capacity.getUsedCapacity()) >= capacityNeeded) { - - hostCandidate[0] = capacity.getHostOrPoolId(); - enoughCapacity = true; - break; - } - } - } - } - return enoughCapacity; + capacities = _capacityDao.search(sc, null); } finally { m_capacityCheckLock.unlock(); } } else { - s_logger.error("Unable to acquire synchronization lock for pod allocation"); - - // we now try to enforce reservation-style allocation, waiting time has been adjusted - // to 2 minutes - return false; + s_logger.error("Unable to acquire synchronization lock for pod allocation"); + + // we now try to enforce reservation-style allocation, waiting time has been adjusted + // to 2 minutes + return false; /* // If we can't lock the table, just return that there is enough capacity and allow instance creation to fail on the agent @@ -203,6 +179,31 @@ public class UserConcentratedAllocator implements PodAllocator { return true; */ } + + boolean enoughCapacity = false; + if (capacities != null) { + for (CapacityVO capacity : capacities) { + if(capacityType == CapacityVO.CAPACITY_TYPE_CPU || capacityType == CapacityVO.CAPACITY_TYPE_MEMORY) { + // + // for CPU/Memory, we now switch to static allocation + // + if ((capacity.getTotalCapacity() - + calcHostAllocatedCpuMemoryCapacity(capacity.getHostOrPoolId(), capacityType)) >= capacityNeeded) { + + hostCandidate[0] = capacity.getHostOrPoolId(); + enoughCapacity = true; + break; + } + } else { + if ((capacity.getTotalCapacity() - capacity.getUsedCapacity()) >= capacityNeeded) { + hostCandidate[0] = capacity.getHostOrPoolId(); + enoughCapacity = true; + break; + } + } + } + } + return enoughCapacity; } private boolean skipCalculation(VMInstanceVO vm) { diff --git a/server/src/com/cloud/alert/AlertManagerImpl.java b/server/src/com/cloud/alert/AlertManagerImpl.java index 1ad3fe7076b..1d1fc589492 100644 --- a/server/src/com/cloud/alert/AlertManagerImpl.java +++ b/server/src/com/cloud/alert/AlertManagerImpl.java @@ -326,115 +326,122 @@ public class AlertManagerImpl implements AlertManager { // is stopped we updated the amount allocated, and when VM sync reports a changed state, we update // the amount allocated. Hopefully it's limited to 3 entry points and will keep the amount allocated // per host accurate. - - if (m_capacityCheckLock.lock(5)) { // 5 second timeout - if (s_logger.isTraceEnabled()) { - s_logger.trace("recalculating system capacity"); - } - try { - // delete the old records - _capacityDao.clearNonStorageCapacities(); - - // get all hosts.. - SearchCriteria sc = _hostDao.createSearchCriteria(); - sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString()); - List hosts = _hostDao.search(sc, null); - - // prep the service offerings - List offerings = _offeringsDao.listAll(); - Map offeringsMap = new HashMap(); - for (ServiceOfferingVO offering : offerings) { - offeringsMap.put(offering.getId(), offering); - } - for (HostVO host : hosts) { - if (host.getType() != Host.Type.Routing) { - continue; - } - long cpu = 0; - long usedMemory = 0; - List domainRouters = _routerDao.listUpByHostId(host.getId()); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Found " + domainRouters.size() + " router domains on host " + host.getId()); - } - for (DomainRouterVO router : domainRouters) { - usedMemory += router.getRamSize() * 1024L * 1024L; - } - List proxys = _consoleProxyDao.listUpByHostId(host.getId()); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Found " + proxys.size() + " console proxy on host " + host.getId()); - } - for(ConsoleProxyVO proxy : proxys) { - usedMemory += proxy.getRamSize() * 1024L * 1024L; - } - - List secStorageVms = _secStorgaeVmDao.listUpByHostId(host.getId()); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Found " + secStorageVms.size() + " secondary storage VM on host " + host.getId()); - } - for(SecondaryStorageVmVO secStorageVm : secStorageVms) { - usedMemory += secStorageVm.getRamSize() * 1024L * 1024L; - } - - List vms = _userVmDao.listUpByHostId(host.getId()); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Found " + vms.size() + " user VM on host " + host.getId()); - } - - for (UserVmVO vm : vms) { - ServiceOffering so = offeringsMap.get(vm.getServiceOfferingId()); - usedMemory += so.getRamSize() * 1024L * 1024L; - cpu += so.getCpu() * (so.getSpeed() * 0.99); - } + if (s_logger.isTraceEnabled()) { + s_logger.trace("recalculating system capacity"); + } + List newCapacities = new ArrayList(); - long totalMemory = host.getTotalMemory(); + // get all hosts.. + SearchCriteria sc = _hostDao.createSearchCriteria(); + sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString()); + List hosts = _hostDao.search(sc, null); - CapacityVO newMemoryCapacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), usedMemory, totalMemory, CapacityVO.CAPACITY_TYPE_MEMORY); - CapacityVO newCPUCapacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), cpu, (long)(host.getCpus()*host.getSpeed()* _cpuOverProvisioningFactor), CapacityVO.CAPACITY_TYPE_CPU); - _capacityDao.persist(newMemoryCapacity); - _capacityDao.persist(newCPUCapacity); - } + // prep the service offerings + List offerings = _offeringsDao.listAll(); + Map offeringsMap = new HashMap(); + for (ServiceOfferingVO offering : offerings) { + offeringsMap.put(offering.getId(), offering); + } + for (HostVO host : hosts) { + if (host.getType() != Host.Type.Routing) { + continue; + } + + long cpu = 0; + long usedMemory = 0; + List domainRouters = _routerDao.listUpByHostId(host.getId()); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found " + domainRouters.size() + " router domains on host " + host.getId()); + } + for (DomainRouterVO router : domainRouters) { + usedMemory += router.getRamSize() * 1024L * 1024L; + } + + List proxys = _consoleProxyDao.listUpByHostId(host.getId()); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found " + proxys.size() + " console proxy on host " + host.getId()); + } + for(ConsoleProxyVO proxy : proxys) { + usedMemory += proxy.getRamSize() * 1024L * 1024L; + } + + List secStorageVms = _secStorgaeVmDao.listUpByHostId(host.getId()); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found " + secStorageVms.size() + " secondary storage VM on host " + host.getId()); + } + for(SecondaryStorageVmVO secStorageVm : secStorageVms) { + usedMemory += secStorageVm.getRamSize() * 1024L * 1024L; + } + + List vms = _userVmDao.listUpByHostId(host.getId()); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found " + vms.size() + " user VM on host " + host.getId()); + } + + for (UserVmVO vm : vms) { + ServiceOffering so = offeringsMap.get(vm.getServiceOfferingId()); + usedMemory += so.getRamSize() * 1024L * 1024L; + cpu += so.getCpu() * (so.getSpeed() * 0.99); + } + + long totalMemory = host.getTotalMemory(); - // Calculate storage pool capacity - List storagePools = _storagePoolDao.listAllActive(); - for (StoragePoolVO pool : storagePools) { - long disk = 0l; - Pair sizes = _volumeDao.getCountAndTotalByPool(pool.getId()); - disk = sizes.second(); - int provFactor = 1; - if( pool.getPoolType() == StoragePoolType.NetworkFilesystem ) { - provFactor = _overProvisioningFactor; - } - CapacityVO newStorageCapacity = new CapacityVO(pool.getId(), pool.getDataCenterId(), pool.getPodId(), disk, pool.getCapacityBytes() * provFactor, CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED); - _capacityDao.persist(newStorageCapacity); + CapacityVO newMemoryCapacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), usedMemory, totalMemory, CapacityVO.CAPACITY_TYPE_MEMORY); + CapacityVO newCPUCapacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), cpu, (long)(host.getCpus()*host.getSpeed()* _cpuOverProvisioningFactor), CapacityVO.CAPACITY_TYPE_CPU); + newCapacities.add(newMemoryCapacity); + newCapacities.add(newCPUCapacity); + } - continue; - } + // Calculate storage pool capacity + List storagePools = _storagePoolDao.listAllActive(); + for (StoragePoolVO pool : storagePools) { + long disk = 0l; + Pair sizes = _volumeDao.getCountAndTotalByPool(pool.getId()); + disk = sizes.second(); + int provFactor = 1; + if( pool.getPoolType() == StoragePoolType.NetworkFilesystem ) { + provFactor = _overProvisioningFactor; + } + CapacityVO newStorageCapacity = new CapacityVO(pool.getId(), pool.getDataCenterId(), pool.getPodId(), disk, pool.getCapacityBytes() * provFactor, CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED); + newCapacities.add(newStorageCapacity); - // Calculate new Public IP capacity - List datacenters = _dcDao.listAll(); - for (DataCenterVO datacenter : datacenters) { - long dcId = datacenter.getId(); + continue; + } - int totalPublicIPs = _publicIPAddressDao.countIPs(dcId, -1, false); - int allocatedPublicIPs = _publicIPAddressDao.countIPs(dcId, -1, true); + // Calculate new Public IP capacity + List datacenters = _dcDao.listAll(); + for (DataCenterVO datacenter : datacenters) { + long dcId = datacenter.getId(); - CapacityVO newPublicIPCapacity = new CapacityVO(null, dcId, null, allocatedPublicIPs, totalPublicIPs, CapacityVO.CAPACITY_TYPE_PUBLIC_IP); - _capacityDao.persist(newPublicIPCapacity); - } + int totalPublicIPs = _publicIPAddressDao.countIPs(dcId, -1, false); + int allocatedPublicIPs = _publicIPAddressDao.countIPs(dcId, -1, true); + + CapacityVO newPublicIPCapacity = new CapacityVO(null, dcId, null, allocatedPublicIPs, totalPublicIPs, CapacityVO.CAPACITY_TYPE_PUBLIC_IP); + newCapacities.add(newPublicIPCapacity); + } + + // Calculate new Private IP capacity + List pods = _podDao.listAll(); + for (HostPodVO pod : pods) { + long podId = pod.getId(); + long dcId = pod.getDataCenterId(); + + int totalPrivateIPs = _privateIPAddressDao.countIPs(podId, dcId, false); + int allocatedPrivateIPs = _privateIPAddressDao.countIPs(podId, dcId, true); - // Calculate new Private IP capacity - List pods = _podDao.listAll(); - for (HostPodVO pod : pods) { - long podId = pod.getId(); - long dcId = pod.getDataCenterId(); - - int totalPrivateIPs = _privateIPAddressDao.countIPs(podId, dcId, false); - int allocatedPrivateIPs = _privateIPAddressDao.countIPs(podId, dcId, true); - - CapacityVO newPrivateIPCapacity = new CapacityVO(null, dcId, podId, allocatedPrivateIPs, totalPrivateIPs, CapacityVO.CAPACITY_TYPE_PRIVATE_IP); - _capacityDao.persist(newPrivateIPCapacity); - } + CapacityVO newPrivateIPCapacity = new CapacityVO(null, dcId, podId, allocatedPrivateIPs, totalPrivateIPs, CapacityVO.CAPACITY_TYPE_PRIVATE_IP); + newCapacities.add(newPrivateIPCapacity); + } + + if (m_capacityCheckLock.lock(5)) { // 5 second timeout + try { + // delete the old records + _capacityDao.clearNonStorageCapacities(); + + for (CapacityVO newCapacity : newCapacities) { + _capacityDao.persist(newCapacity); + } } finally { m_capacityCheckLock.unlock(); } From d2ffa6b68d359fff80e87d5235b82785936ed186 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 20 Aug 2010 16:27:48 -0700 Subject: [PATCH 2/3] bug 5781: also reduce the scope of the global lock when updating storage capacity. --- .../src/com/cloud/alert/AlertManagerImpl.java | 2 +- .../src/com/cloud/server/StatsCollector.java | 106 ++++++++++-------- 2 files changed, 59 insertions(+), 49 deletions(-) diff --git a/server/src/com/cloud/alert/AlertManagerImpl.java b/server/src/com/cloud/alert/AlertManagerImpl.java index 1d1fc589492..378bc5dc699 100644 --- a/server/src/com/cloud/alert/AlertManagerImpl.java +++ b/server/src/com/cloud/alert/AlertManagerImpl.java @@ -433,7 +433,7 @@ public class AlertManagerImpl implements AlertManager { CapacityVO newPrivateIPCapacity = new CapacityVO(null, dcId, podId, allocatedPrivateIPs, totalPrivateIPs, CapacityVO.CAPACITY_TYPE_PRIVATE_IP); newCapacities.add(newPrivateIPCapacity); } - + if (m_capacityCheckLock.lock(5)) { // 5 second timeout try { // delete the old records diff --git a/server/src/com/cloud/server/StatsCollector.java b/server/src/com/cloud/server/StatsCollector.java index c3b87f7eb00..58cfe27cdf8 100644 --- a/server/src/com/cloud/server/StatsCollector.java +++ b/server/src/com/cloud/server/StatsCollector.java @@ -292,8 +292,50 @@ public class StatsCollector { } } _storagePoolStats = storagePoolStats; + + // a list to store the new capacity entries that will be committed once everything is calculated + List newCapacities = new ArrayList(); - if (m_capacityCheckLock.lock(5)) { // 5 second timeout + // create new entries + for (Long hostId : storageStats.keySet()) { + StorageStats stats = storageStats.get(hostId); + HostVO host = _hostDao.findById(hostId); + host.setTotalSize(stats.getCapacityBytes()); + _hostDao.update(host.getId(), host); + + if (Host.Type.SecondaryStorage.equals(host.getType())) { + CapacityVO capacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), stats.getByteUsed(), stats.getCapacityBytes(), CapacityVO.CAPACITY_TYPE_SECONDARY_STORAGE); + newCapacities.add(capacity); +// _capacityDao.persist(capacity); + } else if (Host.Type.Storage.equals(host.getType())) { + CapacityVO capacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), stats.getByteUsed(), stats.getCapacityBytes(), CapacityVO.CAPACITY_TYPE_STORAGE); + newCapacities.add(capacity); +// _capacityDao.persist(capacity); + } + } + + for (Long poolId : storagePoolStats.keySet()) { + StorageStats stats = storagePoolStats.get(poolId); + StoragePoolVO pool = _storagePoolDao.findById(poolId); + + if (pool == null) { + continue; + } + + pool.setCapacityBytes(stats.getCapacityBytes()); + long available = stats.getCapacityBytes() - stats.getByteUsed(); + if( available < 0 ) { + available = 0; + } + pool.setAvailableBytes(available); + _storagePoolDao.update(pool.getId(), pool); + + CapacityVO capacity = new CapacityVO(poolId, pool.getDataCenterId(), pool.getPodId(), stats.getByteUsed(), stats.getCapacityBytes(), CapacityVO.CAPACITY_TYPE_STORAGE); + newCapacities.add(capacity); +// _capacityDao.persist(capacity); + } + + if (m_capacityCheckLock.lock(5)) { // 5 second timeout if (s_logger.isTraceEnabled()) { s_logger.trace("recalculating system storage capacity"); } @@ -304,54 +346,22 @@ public class StatsCollector { // to collect the stats from an agent and update the database as needed. The // listener model has connects/disconnects to keep things in sync much better // than this model right now - _capacityDao.clearStorageCapacities(); - - // create new entries - for (Long hostId : storageStats.keySet()) { - StorageStats stats = storageStats.get(hostId); - HostVO host = _hostDao.findById(hostId); - host.setTotalSize(stats.getCapacityBytes()); - _hostDao.update(host.getId(), host); - - if (Host.Type.SecondaryStorage.equals(host.getType())) { - CapacityVO capacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), stats.getByteUsed(), stats.getCapacityBytes(), CapacityVO.CAPACITY_TYPE_SECONDARY_STORAGE); - _capacityDao.persist(capacity); - } else if (Host.Type.Storage.equals(host.getType())) { - CapacityVO capacity = new CapacityVO(host.getId(), host.getDataCenterId(), host.getPodId(), stats.getByteUsed(), stats.getCapacityBytes(), CapacityVO.CAPACITY_TYPE_STORAGE); - _capacityDao.persist(capacity); - } - } - - for (Long poolId : storagePoolStats.keySet()) { - StorageStats stats = storagePoolStats.get(poolId); - StoragePoolVO pool = _storagePoolDao.findById(poolId); - - if (pool == null) { - continue; - } - - pool.setCapacityBytes(stats.getCapacityBytes()); - long available = stats.getCapacityBytes() - stats.getByteUsed(); - if( available < 0 ) { - available = 0; - } - pool.setAvailableBytes(available); - _storagePoolDao.update(pool.getId(), pool); - - CapacityVO capacity = new CapacityVO(poolId, pool.getDataCenterId(), pool.getPodId(), stats.getByteUsed(), stats.getCapacityBytes(), CapacityVO.CAPACITY_TYPE_STORAGE); - _capacityDao.persist(capacity); - } - } finally { - m_capacityCheckLock.unlock(); + _capacityDao.clearStorageCapacities(); + + for (CapacityVO newCapacity : newCapacities) { + _capacityDao.persist(newCapacity); + } + } finally { + m_capacityCheckLock.unlock(); } - if (s_logger.isTraceEnabled()) { - s_logger.trace("done recalculating system storage capacity"); - } - } else { - if (s_logger.isTraceEnabled()) { - s_logger.trace("not recalculating system storage capacity, unable to lock capacity table"); - } - } + if (s_logger.isTraceEnabled()) { + s_logger.trace("done recalculating system storage capacity"); + } + } else { + if (s_logger.isTraceEnabled()) { + s_logger.trace("not recalculating system storage capacity, unable to lock capacity table"); + } + } } catch (Throwable t) { s_logger.error("Error trying to retrieve storage stats", t); } From 27b038efcb598852622eb61df35ca0019e0a6b4e Mon Sep 17 00:00:00 2001 From: abhishek Date: Mon, 23 Aug 2010 14:07:35 -0700 Subject: [PATCH 3/3] Correcting hypervisor type enum --- server/src/com/cloud/agent/manager/AgentManagerImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index 01ca1820670..7bdd846d2c0 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -1683,7 +1683,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory { // If this command is from a KVM agent, or from an agent that has a // null hypervisor type, don't do the CIDR check - if (hypervisorType == null || hypervisorType == Hypervisor.Type.KVM || hypervisorType == Hypervisor.Type.VMware) + if (hypervisorType == null || hypervisorType == Hypervisor.Type.KVM || hypervisorType == Hypervisor.Type.VmWare) doCidrCheck = false; if (doCidrCheck)