From 351de5fabd9d75ee3531140d2a397bb03d459de0 Mon Sep 17 00:00:00 2001 From: Vishesh Date: Mon, 24 Jun 2024 20:45:31 +0530 Subject: [PATCH 1/4] engine/orchestration: Update overcommit ratio during live VM migration (#9178) During live migration of a VM from between hosts having different cgroup versions (cgroupv2 & cgroup), overcommit ratio is ignored. This PR fixes the above issue. --- .../com/cloud/vm/VirtualMachineProfile.java | 4 ++ .../cloud/vm/VirtualMachineProfileImpl.java | 2 + .../cloud/vm/VirtualMachineManagerImpl.java | 40 ++++++++++++------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/api/src/main/java/com/cloud/vm/VirtualMachineProfile.java b/api/src/main/java/com/cloud/vm/VirtualMachineProfile.java index 62519412fdb..6770e4f3ba6 100644 --- a/api/src/main/java/com/cloud/vm/VirtualMachineProfile.java +++ b/api/src/main/java/com/cloud/vm/VirtualMachineProfile.java @@ -190,6 +190,10 @@ public interface VirtualMachineProfile { Map getParameters(); + void setCpuOvercommitRatio(Float cpuOvercommitRatio); + + void setMemoryOvercommitRatio(Float memoryOvercommitRatio); + Float getCpuOvercommitRatio(); Float getMemoryOvercommitRatio(); diff --git a/engine/components-api/src/main/java/com/cloud/vm/VirtualMachineProfileImpl.java b/engine/components-api/src/main/java/com/cloud/vm/VirtualMachineProfileImpl.java index bc689fef873..8297a73d77f 100644 --- a/engine/components-api/src/main/java/com/cloud/vm/VirtualMachineProfileImpl.java +++ b/engine/components-api/src/main/java/com/cloud/vm/VirtualMachineProfileImpl.java @@ -264,11 +264,13 @@ public class VirtualMachineProfileImpl implements VirtualMachineProfile { _offering = offering; } + @Override public void setCpuOvercommitRatio(Float cpuOvercommitRatio) { this.cpuOvercommitRatio = cpuOvercommitRatio; } + @Override public void setMemoryOvercommitRatio(Float memoryOvercommitRatio) { this.memoryOvercommitRatio = memoryOvercommitRatio; diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index a49609fc374..3a5e0c9e06c 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -1235,21 +1235,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac long destHostId = dest.getHost().getId(); vm.setPodIdToDeployIn(dest.getPod().getId()); - final Long cluster_id = dest.getCluster().getId(); - final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, VmDetailConstants.CPU_OVER_COMMIT_RATIO); - final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, VmDetailConstants.MEMORY_OVER_COMMIT_RATIO); + final Long clusterId = dest.getCluster().getId(); + updateOverCommitRatioForVmProfile(vmProfile, clusterId); - if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO) == null && - (Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) { - userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, cluster_detail_cpu.getValue(), true); - userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, cluster_detail_ram.getValue(), true); - } else if (userVmDetailsDao.findDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO) != null) { - userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, cluster_detail_cpu.getValue(), true); - userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, cluster_detail_ram.getValue(), true); - } - - vmProfile.setCpuOvercommitRatio(Float.parseFloat(cluster_detail_cpu.getValue())); - vmProfile.setMemoryOvercommitRatio(Float.parseFloat(cluster_detail_ram.getValue())); StartAnswer startAnswer = null; try { @@ -1264,7 +1252,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac resetVmNicsDeviceId(vm.getId()); _networkMgr.prepare(vmProfile, dest, ctx); if (vm.getHypervisorType() != HypervisorType.BareMetal) { - checkAndAttemptMigrateVmAcrossCluster(vm, cluster_id, dest.getStorageForDisks()); + checkAndAttemptMigrateVmAcrossCluster(vm, clusterId, dest.getStorageForDisks()); volumeMgr.prepare(vmProfile, dest); } @@ -1462,6 +1450,27 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } } + private void updateOverCommitRatioForVmProfile(VirtualMachineProfile vmProfile, long clusterId) { + final ClusterDetailsVO clusterDetailCpu = _clusterDetailsDao.findDetail(clusterId, VmDetailConstants.CPU_OVER_COMMIT_RATIO); + final ClusterDetailsVO clusterDetailRam = _clusterDetailsDao.findDetail(clusterId, VmDetailConstants.MEMORY_OVER_COMMIT_RATIO); + final float parsedClusterCpuDetailCpu = Float.parseFloat(clusterDetailCpu.getValue()); + final float parsedClusterDetailRam = Float.parseFloat(clusterDetailRam.getValue()); + UserVmDetailVO vmDetailCpu = userVmDetailsDao.findDetail(vmProfile.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO); + UserVmDetailVO vmDetailRam = userVmDetailsDao.findDetail(vmProfile.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO); + + if ((vmDetailCpu == null && parsedClusterCpuDetailCpu > 1f) || + (vmDetailCpu != null && Float.parseFloat(vmDetailCpu.getValue()) != parsedClusterCpuDetailCpu)) { + userVmDetailsDao.addDetail(vmProfile.getId(), VmDetailConstants.CPU_OVER_COMMIT_RATIO, clusterDetailCpu.getValue(), true); + } + if ((vmDetailRam == null && parsedClusterDetailRam > 1f) || + (vmDetailRam != null && Float.parseFloat(vmDetailRam.getValue()) != parsedClusterDetailRam)) { + userVmDetailsDao.addDetail(vmProfile.getId(), VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, clusterDetailRam.getValue(), true); + } + + vmProfile.setCpuOvercommitRatio(Float.parseFloat(clusterDetailCpu.getValue())); + vmProfile.setMemoryOvercommitRatio(Float.parseFloat(clusterDetailRam.getValue())); + } + /** * Setting pod id to null can result in migration of Volumes across pods. This is not desirable for VMs which * have a volume in Ready state (happens when a VM is shutdown and started again). @@ -2730,6 +2739,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac _networkMgr.prepareNicForMigration(profile, dest); volumeMgr.prepareForMigration(profile, dest); profile.setConfigDriveLabel(VmConfigDriveLabel.value()); + updateOverCommitRatioForVmProfile(profile, dest.getHost().getClusterId()); final VirtualMachineTO to = toVmTO(profile); final PrepareForMigrationCommand pfmc = new PrepareForMigrationCommand(to); From 6b25ed7a027b2291621dcdc6b03a3db36fb1912d Mon Sep 17 00:00:00 2001 From: dahn Date: Wed, 26 Jun 2024 17:32:08 +0200 Subject: [PATCH 2/4] prevent an NPE on an uninitialised TemplateObject (#8898) * prevent an NPE on an uninitialised TemplateObject * move npe handler up-stack * Update engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/store/TemplateObject.java * catch yet one level up * Update engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java * Update engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/store/TemplateObject.java * extra guard * Revert "prevent an NPE on an uninitialised TemplateObject" This reverts commit e602a65ea62e4707828483a4ddea288d81ff06f5. --- .../cloudstack/storage/image/TemplateDataFactoryImpl.java | 3 +++ .../cloudstack/storage/image/store/TemplateObject.java | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/TemplateDataFactoryImpl.java b/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/TemplateDataFactoryImpl.java index 8951b9d7c24..17cc80bcc9e 100644 --- a/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/TemplateDataFactoryImpl.java +++ b/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/TemplateDataFactoryImpl.java @@ -99,6 +99,9 @@ public class TemplateDataFactoryImpl implements TemplateDataFactory { @Override public TemplateInfo getTemplate(long templateId, DataStore store) { VMTemplateVO templ = imageDataDao.findById(templateId); + if (templ == null) { + return null; + } if (store == null && !templ.isDirectDownload()) { TemplateObject tmpl = TemplateObject.getTemplate(templ, null, null); return tmpl; diff --git a/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/store/TemplateObject.java b/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/store/TemplateObject.java index 3883637cd07..899f1d83e0b 100644 --- a/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/store/TemplateObject.java +++ b/engine/storage/image/src/main/java/org/apache/cloudstack/storage/image/store/TemplateObject.java @@ -81,6 +81,10 @@ public class TemplateObject implements TemplateInfo { } protected void configure(VMTemplateVO template, DataStore dataStore) { + if (template == null) { + String msg = String.format("Template Object is not properly initialised %s", this.toString()); + s_logger.warn(msg); + } imageVO = template; this.dataStore = dataStore; } @@ -97,6 +101,10 @@ public class TemplateObject implements TemplateInfo { } public VMTemplateVO getImage() { + if (imageVO == null) { + String msg = String.format("Template Object is not properly initialised %s", this.toString()); + s_logger.error(msg); + } // somehow the nullpointer is needed : refacter needed!?! return imageVO; } From 644f3a3f48fe2733ab3e6988ae4a15519cf8328b Mon Sep 17 00:00:00 2001 From: Abhisar Sinha <63767682+abh1sar@users.noreply.github.com> Date: Fri, 28 Jun 2024 18:18:08 +0530 Subject: [PATCH 3/4] Add, Delete Storage Pool commands should be able execute on a host in maintenance (#9301) * Restart agent when host comes out of maintenance * Don't send CreateStoragePoolCommand to hosts in maintenance mode * CreateStoragePoolCommand can run when host in maintenance. Reverted the change to restart agent when host was already up and in maintenance * Reverted changes done to ResourceManagerImplTest --- .../main/java/com/cloud/resource/ResourceManager.java | 2 ++ .../main/java/com/cloud/agent/manager/AgentAttache.java | 7 +++++-- .../CloudStackPrimaryDataStoreLifeCycleImpl.java | 2 +- .../java/com/cloud/resource/ResourceManagerImpl.java | 9 +++++++++ .../java/com/cloud/resource/MockResourceManagerImpl.java | 6 ++++++ 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java index 9308be5fb32..91197de6a84 100755 --- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java +++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java @@ -126,6 +126,8 @@ public interface ResourceManager extends ResourceService, Configurable { public List listAllUpAndEnabledHostsInOneZoneByHypervisor(HypervisorType type, long dcId); + public List listAllUpHostsInOneZoneByHypervisor(HypervisorType type, long dcId); + public List listAllUpAndEnabledHostsInOneZone(long dcId); public List listAllHostsInOneZoneByType(Host.Type type, long dcId); diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java index b12a72136dd..2d8d6f1c48e 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java @@ -44,6 +44,8 @@ import com.cloud.agent.api.CheckOnHostCommand; import com.cloud.agent.api.CheckVirtualMachineCommand; import com.cloud.agent.api.CleanupNetworkRulesCmd; import com.cloud.agent.api.Command; +import com.cloud.agent.api.CreateStoragePoolCommand; +import com.cloud.agent.api.DeleteStoragePoolCommand; import com.cloud.agent.api.MaintainCommand; import com.cloud.agent.api.MigrateCommand; import com.cloud.agent.api.ModifySshKeysCommand; @@ -119,8 +121,9 @@ public abstract class AgentAttache { StopCommand.class.toString(), CheckVirtualMachineCommand.class.toString(), PingTestCommand.class.toString(), CheckHealthCommand.class.toString(), ReadyCommand.class.toString(), ShutdownCommand.class.toString(), SetupCommand.class.toString(), CleanupNetworkRulesCmd.class.toString(), CheckNetworkCommand.class.toString(), PvlanSetupCommand.class.toString(), CheckOnHostCommand.class.toString(), - ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString(), SetupMSListCommand.class.toString(), RollingMaintenanceCommand.class.toString(), - CleanupPersistentNetworkResourceCommand.class.toString()}; + ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), + CreateStoragePoolCommand.class.toString(), DeleteStoragePoolCommand.class.toString(), ModifyStoragePoolCommand.class.toString(), + SetupMSListCommand.class.toString(), RollingMaintenanceCommand.class.toString(), CleanupPersistentNetworkResourceCommand.class.toString()}; protected final static String[] s_commandsNotAllowedInConnectingMode = new String[] { StartCommand.class.toString(), CreateCommand.class.toString() }; static { Arrays.sort(s_commandsAllowedInMaintenanceMode); diff --git a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/lifecycle/CloudStackPrimaryDataStoreLifeCycleImpl.java b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/lifecycle/CloudStackPrimaryDataStoreLifeCycleImpl.java index 213e5620553..a5730d90802 100644 --- a/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/lifecycle/CloudStackPrimaryDataStoreLifeCycleImpl.java +++ b/plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/lifecycle/CloudStackPrimaryDataStoreLifeCycleImpl.java @@ -467,7 +467,7 @@ public class CloudStackPrimaryDataStoreLifeCycleImpl implements PrimaryDataStore @Override public boolean attachZone(DataStore dataStore, ZoneScope scope, HypervisorType hypervisorType) { - List hosts = _resourceMgr.listAllUpAndEnabledHostsInOneZoneByHypervisor(hypervisorType, scope.getScopeId()); + List hosts = _resourceMgr.listAllUpHostsInOneZoneByHypervisor(hypervisorType, scope.getScopeId()); s_logger.debug("In createPool. Attaching the pool to each of the hosts."); List poolHosts = new ArrayList(); for (HostVO host : hosts) { diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 1909063dfe3..04a5f0e8d01 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -3255,6 +3255,15 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, return sc.list(); } + @Override + public List listAllUpHostsInOneZoneByHypervisor(final HypervisorType type, final long dcId) { + final QueryBuilder sc = QueryBuilder.create(HostVO.class); + sc.and(sc.entity().getHypervisorType(), Op.EQ, type); + sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); + sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); + return sc.list(); + } + @Override public List listAllUpAndEnabledHostsInOneZone(final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 4d5b5ba584b..538125a1dbc 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -588,6 +588,12 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana return null; } + @Override + public List listAllUpHostsInOneZoneByHypervisor(final HypervisorType type, final long dcId) { + // TODO Auto-generated method stub + return null; + } + @Override public List listAllUpAndEnabledHostsInOneZone(final long dcId) { // TODO Auto-generated method stub From 983f164c57fb47ccacd6b9eb1eb9448191a72150 Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Fri, 28 Jun 2024 18:46:06 +0530 Subject: [PATCH 4/4] Fixed src datastore on copy check for PowerFlex/ScaleIO storage driver (#9310) --- .../storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java index 431fddb566f..600720d594a 100644 --- a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java +++ b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/driver/ScaleIOPrimaryDataStoreDriver.java @@ -1153,7 +1153,7 @@ public class ScaleIOPrimaryDataStoreDriver implements PrimaryDataStoreDriver { @Override public boolean canCopy(DataObject srcData, DataObject destData) { - DataStore srcStore = destData.getDataStore(); + DataStore srcStore = srcData.getDataStore(); DataStore destStore = destData.getDataStore(); if ((srcStore.getRole() == DataStoreRole.Primary && (srcData.getType() == DataObjectType.TEMPLATE || srcData.getType() == DataObjectType.VOLUME)) && (destStore.getRole() == DataStoreRole.Primary && destData.getType() == DataObjectType.VOLUME)) {