From de557663ec2a16a5372f608f01793b83bedfd6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Beims=20Br=C3=A4scher?= Date: Mon, 19 Apr 2021 04:41:42 -0300 Subject: [PATCH] Migrate/Stop VMs with local storage when preparing host for maintenance (#4212) --- .../com/cloud/resource/ResourceManager.java | 8 ++ .../cloud/resource/ResourceManagerImpl.java | 92 ++++++++++++++++++- .../main/java/com/cloud/vm/UserVmManager.java | 2 + .../java/com/cloud/vm/UserVmManagerImpl.java | 10 +- 4 files changed, 103 insertions(+), 9 deletions(-) diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java index ade2eeb3f84..2857bbc9c44 100755 --- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java +++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java @@ -53,6 +53,14 @@ public interface ResourceManager extends ResourceService, Configurable { "Number of retries when preparing a host into Maintenance Mode is faulty before failing", false); + ConfigKey HOST_MAINTENANCE_LOCAL_STRATEGY = new ConfigKey<>("Advanced", String.class, + "host.maintenance.local.storage.strategy", "Error", + "Defines the strategy towards VMs with volumes on local storage when putting a host in maintenance. " + + "The default strategy is 'Error', preventing maintenance in such a case. " + + "Choose 'Migration' strategy to migrate away VMs running on local storage. " + + "To force-stop VMs, choose 'ForceStop' strategy", + true, ConfigKey.Scope.Global); + /** * Register a listener for different types of resource life cycle events. * There can only be one type of listener per type of host. diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index f2455020c5a..a3b9df8fb1c 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -30,6 +30,18 @@ import java.util.Random; import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.deploy.DataCenterDeployment; +import com.cloud.deploy.DeployDestination; +import com.cloud.deploy.DeploymentPlanner; +import com.cloud.deploy.DeploymentPlanningManager; +import com.cloud.exception.InsufficientServerCapacityException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.service.dao.ServiceOfferingDao; +import com.cloud.storage.dao.DiskOfferingDao; +import com.cloud.vm.UserVmManager; +import com.cloud.vm.VirtualMachineProfile; +import com.cloud.vm.VirtualMachineProfileImpl; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.command.admin.cluster.AddClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd; @@ -206,6 +218,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, @Inject private CapacityDao _capacityDao; @Inject + private DiskOfferingDao diskOfferingDao; + @Inject + private ServiceOfferingDao serviceOfferingDao; + @Inject private HostDao _hostDao; @Inject private HostDetailsDao _hostDetailsDao; @@ -226,6 +242,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, @Inject private IPAddressDao _publicIPAddressDao; @Inject + private DeploymentPlanningManager deploymentManager; + @Inject private VirtualMachineManager _vmMgr; @Inject private VMInstanceDao _vmDao; @@ -239,6 +257,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, private DedicatedResourceDao _dedicatedDao; @Inject private ServiceOfferingDetailsDao _serviceOfferingDetailsDao; + @Inject + private UserVmManager userVmManager; private List _discoverers; @@ -1273,6 +1293,19 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } else if (HypervisorType.LXC.equals(host.getHypervisorType()) && VirtualMachine.Type.User.equals(vm.getType())){ //Migration is not supported for LXC Vms. Schedule restart instead. _haMgr.scheduleRestart(vm, false); + } else if (userVmManager.isVMUsingLocalStorage(vm)) { + if (isMaintenanceLocalStrategyForceStop()) { + _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop); + } else if (isMaintenanceLocalStrategyMigrate()) { + migrateAwayVmWithVolumes(host, vm); + } else if (!isMaintenanceLocalStrategyDefault()){ + String logMessage = String.format( + "Unsupported host.maintenance.local.storage.strategy: %s. Please set a strategy according to the global settings description: " + + "'Error', 'Migration', or 'ForceStop'.", + HOST_MAINTENANCE_LOCAL_STRATEGY.value().toString()); + s_logger.error(logMessage); + throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); + } } else { s_logger.info("Maintenance: scheduling migration of VM " + vm.getUuid() + " from host " + host.getUuid()); _haMgr.scheduleMigration(vm); @@ -1282,6 +1315,32 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, return true; } + /** + * Looks for Hosts able to allocate the VM and migrates the VM with its volume. + */ + private void migrateAwayVmWithVolumes(HostVO host, VMInstanceVO vm) { + final DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(), null, null, null); + ServiceOfferingVO offeringVO = serviceOfferingDao.findById(vm.getServiceOfferingId()); + final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, offeringVO, null, null); + plan.setMigrationPlan(true); + DeployDestination dest = null; + try { + dest = deploymentManager.planDeployment(profile, plan, new DeploymentPlanner.ExcludeList(), null); + } catch (InsufficientServerCapacityException e) { + throw new CloudRuntimeException(String.format("Maintenance failed, could not find deployment destination for VM [id=%s, name=%s].", vm.getId(), vm.getInstanceName()), + e); + } + Host destHost = dest.getHost(); + + try { + _vmMgr.migrateWithStorage(vm.getUuid(), host.getId(), destHost.getId(), null); + } catch (ResourceUnavailableException e) { + throw new CloudRuntimeException( + String.format("Maintenance failed, could not migrate VM [id=%s, name=%s] with local storage from host [id=%s, name=%s] to host [id=%s, name=%s].", vm.getId(), + vm.getInstanceName(), host.getId(), host.getName(), destHost.getId(), destHost.getName()), e); + } + } + @Override public boolean maintain(final long hostId) throws AgentUnavailableException { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminAskMaintenance); @@ -1322,9 +1381,13 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } if (_storageMgr.isLocalStorageActiveOnHost(host.getId())) { - throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); + if(!isMaintenanceLocalStrategyMigrate() && !isMaintenanceLocalStrategyForceStop()) { + throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); + } } + List migratingInVMs = _vmDao.findByHostInStates(hostId, State.Migrating); + if (migratingInVMs.size() > 0) { throw new CloudRuntimeException("Host contains incoming VMs migrating. Please wait for them to complete before putting to maintenance."); } @@ -1350,6 +1413,31 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } } + protected boolean isMaintenanceLocalStrategyMigrate() { + if(org.apache.commons.lang3.StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value())) { + return false; + } + return HOST_MAINTENANCE_LOCAL_STRATEGY.value().toLowerCase().equals(WorkType.Migration.toString().toLowerCase()); + } + + protected boolean isMaintenanceLocalStrategyForceStop() { + if(org.apache.commons.lang3.StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value())) { + return false; + } + return HOST_MAINTENANCE_LOCAL_STRATEGY.value().toLowerCase().equals(WorkType.ForceStop.toString().toLowerCase()); + } + + /** + * Returns true if the host.maintenance.local.storage.strategy is the Default: "Error", blank, empty, or null. + */ + protected boolean isMaintenanceLocalStrategyDefault() { + if (org.apache.commons.lang3.StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value().toString()) + || HOST_MAINTENANCE_LOCAL_STRATEGY.value().toLowerCase().equals(State.Error.toString().toLowerCase())) { + return true; + } + return false; + } + /** * Add VNC details as user VM details for each VM in 'vms' (KVM hosts only) */ @@ -3094,6 +3182,6 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[0]; + return new ConfigKey[] {KvmSshToAgentEnabled, HOST_MAINTENANCE_LOCAL_STRATEGY}; } } diff --git a/server/src/main/java/com/cloud/vm/UserVmManager.java b/server/src/main/java/com/cloud/vm/UserVmManager.java index e8f709729c1..e4206efe5d8 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManager.java +++ b/server/src/main/java/com/cloud/vm/UserVmManager.java @@ -96,6 +96,8 @@ public interface UserVmManager extends UserVmService { void removeInstanceFromInstanceGroup(long vmId); + boolean isVMUsingLocalStorage(VMInstanceVO vm); + boolean expunge(UserVmVO vm, long callerUserId, Account caller); Pair> startVirtualMachine(long vmId, Long hostId, Map additionalParams, String deploymentPlannerToUse) diff --git a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java index f71ae199b2a..fc23028735d 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java @@ -5839,7 +5839,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir } - private boolean isVMUsingLocalStorage(VMInstanceVO vm) { + public boolean isVMUsingLocalStorage(VMInstanceVO vm) { boolean usesLocalStorage = false; List volumes = _volsDao.findByInstance(vm.getId()); @@ -5892,9 +5892,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir } if (!isOnSupportedHypevisorForMigration(vm)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug(vm + " is not XenServer/VMware/KVM/Ovm/Hyperv, cannot migrate this VM form hypervisor type " + vm.getHypervisorType()); - } + s_logger.error(vm + " is not XenServer/VMware/KVM/Ovm/Hyperv, cannot migrate this VM from hypervisor type " + vm.getHypervisorType()); throw new InvalidParameterValueException("Unsupported Hypervisor Type for VM migration, we support XenServer/VMware/KVM/Ovm/Hyperv/Ovm3 only"); } @@ -5903,9 +5901,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir } if (isVMUsingLocalStorage(vm)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug(vm + " is using Local Storage, cannot migrate this VM."); - } + s_logger.error(vm + " is using Local Storage, cannot migrate this VM."); throw new InvalidParameterValueException("Unsupported operation, VM uses Local storage, cannot migrate"); }