Migrate/Stop VMs with local storage when preparing host for maintenance (#4212)

This commit is contained in:
Gabriel Beims Bräscher 2021-04-19 04:41:42 -03:00 committed by GitHub
parent f42024714c
commit de557663ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 103 additions and 9 deletions

View File

@ -53,6 +53,14 @@ public interface ResourceManager extends ResourceService, Configurable {
"Number of retries when preparing a host into Maintenance Mode is faulty before failing",
false);
ConfigKey<String> HOST_MAINTENANCE_LOCAL_STRATEGY = new ConfigKey<>("Advanced", String.class,
"host.maintenance.local.storage.strategy", "Error",
"Defines the strategy towards VMs with volumes on local storage when putting a host in maintenance. "
+ "The default strategy is 'Error', preventing maintenance in such a case. "
+ "Choose 'Migration' strategy to migrate away VMs running on local storage. "
+ "To force-stop VMs, choose 'ForceStop' strategy",
true, ConfigKey.Scope.Global);
/**
* Register a listener for different types of resource life cycle events.
* There can only be one type of listener per type of host.

View File

@ -30,6 +30,18 @@ import java.util.Random;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.deploy.DataCenterDeployment;
import com.cloud.deploy.DeployDestination;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.deploy.DeploymentPlanningManager;
import com.cloud.exception.InsufficientServerCapacityException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.service.ServiceOfferingVO;
import com.cloud.service.dao.ServiceOfferingDao;
import com.cloud.storage.dao.DiskOfferingDao;
import com.cloud.vm.UserVmManager;
import com.cloud.vm.VirtualMachineProfile;
import com.cloud.vm.VirtualMachineProfileImpl;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.command.admin.cluster.AddClusterCmd;
import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd;
@ -206,6 +218,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
@Inject
private CapacityDao _capacityDao;
@Inject
private DiskOfferingDao diskOfferingDao;
@Inject
private ServiceOfferingDao serviceOfferingDao;
@Inject
private HostDao _hostDao;
@Inject
private HostDetailsDao _hostDetailsDao;
@ -226,6 +242,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
@Inject
private IPAddressDao _publicIPAddressDao;
@Inject
private DeploymentPlanningManager deploymentManager;
@Inject
private VirtualMachineManager _vmMgr;
@Inject
private VMInstanceDao _vmDao;
@ -239,6 +257,8 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
private DedicatedResourceDao _dedicatedDao;
@Inject
private ServiceOfferingDetailsDao _serviceOfferingDetailsDao;
@Inject
private UserVmManager userVmManager;
private List<? extends Discoverer> _discoverers;
@ -1273,6 +1293,19 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
} else if (HypervisorType.LXC.equals(host.getHypervisorType()) && VirtualMachine.Type.User.equals(vm.getType())){
//Migration is not supported for LXC Vms. Schedule restart instead.
_haMgr.scheduleRestart(vm, false);
} else if (userVmManager.isVMUsingLocalStorage(vm)) {
if (isMaintenanceLocalStrategyForceStop()) {
_haMgr.scheduleStop(vm, hostId, WorkType.ForceStop);
} else if (isMaintenanceLocalStrategyMigrate()) {
migrateAwayVmWithVolumes(host, vm);
} else if (!isMaintenanceLocalStrategyDefault()){
String logMessage = String.format(
"Unsupported host.maintenance.local.storage.strategy: %s. Please set a strategy according to the global settings description: "
+ "'Error', 'Migration', or 'ForceStop'.",
HOST_MAINTENANCE_LOCAL_STRATEGY.value().toString());
s_logger.error(logMessage);
throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage.");
}
} else {
s_logger.info("Maintenance: scheduling migration of VM " + vm.getUuid() + " from host " + host.getUuid());
_haMgr.scheduleMigration(vm);
@ -1282,6 +1315,32 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
return true;
}
/**
* Looks for Hosts able to allocate the VM and migrates the VM with its volume.
*/
private void migrateAwayVmWithVolumes(HostVO host, VMInstanceVO vm) {
final DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(), null, null, null);
ServiceOfferingVO offeringVO = serviceOfferingDao.findById(vm.getServiceOfferingId());
final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, offeringVO, null, null);
plan.setMigrationPlan(true);
DeployDestination dest = null;
try {
dest = deploymentManager.planDeployment(profile, plan, new DeploymentPlanner.ExcludeList(), null);
} catch (InsufficientServerCapacityException e) {
throw new CloudRuntimeException(String.format("Maintenance failed, could not find deployment destination for VM [id=%s, name=%s].", vm.getId(), vm.getInstanceName()),
e);
}
Host destHost = dest.getHost();
try {
_vmMgr.migrateWithStorage(vm.getUuid(), host.getId(), destHost.getId(), null);
} catch (ResourceUnavailableException e) {
throw new CloudRuntimeException(
String.format("Maintenance failed, could not migrate VM [id=%s, name=%s] with local storage from host [id=%s, name=%s] to host [id=%s, name=%s].", vm.getId(),
vm.getInstanceName(), host.getId(), host.getName(), destHost.getId(), destHost.getName()), e);
}
}
@Override
public boolean maintain(final long hostId) throws AgentUnavailableException {
final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminAskMaintenance);
@ -1322,9 +1381,13 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
if (_storageMgr.isLocalStorageActiveOnHost(host.getId())) {
if(!isMaintenanceLocalStrategyMigrate() && !isMaintenanceLocalStrategyForceStop()) {
throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage.");
}
}
List<VMInstanceVO> migratingInVMs = _vmDao.findByHostInStates(hostId, State.Migrating);
if (migratingInVMs.size() > 0) {
throw new CloudRuntimeException("Host contains incoming VMs migrating. Please wait for them to complete before putting to maintenance.");
}
@ -1350,6 +1413,31 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
}
protected boolean isMaintenanceLocalStrategyMigrate() {
if(org.apache.commons.lang3.StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value())) {
return false;
}
return HOST_MAINTENANCE_LOCAL_STRATEGY.value().toLowerCase().equals(WorkType.Migration.toString().toLowerCase());
}
protected boolean isMaintenanceLocalStrategyForceStop() {
if(org.apache.commons.lang3.StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value())) {
return false;
}
return HOST_MAINTENANCE_LOCAL_STRATEGY.value().toLowerCase().equals(WorkType.ForceStop.toString().toLowerCase());
}
/**
* Returns true if the host.maintenance.local.storage.strategy is the Default: "Error", blank, empty, or null.
*/
protected boolean isMaintenanceLocalStrategyDefault() {
if (org.apache.commons.lang3.StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value().toString())
|| HOST_MAINTENANCE_LOCAL_STRATEGY.value().toLowerCase().equals(State.Error.toString().toLowerCase())) {
return true;
}
return false;
}
/**
* Add VNC details as user VM details for each VM in 'vms' (KVM hosts only)
*/
@ -3094,6 +3182,6 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey[0];
return new ConfigKey<?>[] {KvmSshToAgentEnabled, HOST_MAINTENANCE_LOCAL_STRATEGY};
}
}

View File

@ -96,6 +96,8 @@ public interface UserVmManager extends UserVmService {
void removeInstanceFromInstanceGroup(long vmId);
boolean isVMUsingLocalStorage(VMInstanceVO vm);
boolean expunge(UserVmVO vm, long callerUserId, Account caller);
Pair<UserVmVO, Map<VirtualMachineProfile.Param, Object>> startVirtualMachine(long vmId, Long hostId, Map<VirtualMachineProfile.Param, Object> additionalParams, String deploymentPlannerToUse)

View File

@ -5839,7 +5839,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
}
private boolean isVMUsingLocalStorage(VMInstanceVO vm) {
public boolean isVMUsingLocalStorage(VMInstanceVO vm) {
boolean usesLocalStorage = false;
List<VolumeVO> volumes = _volsDao.findByInstance(vm.getId());
@ -5892,9 +5892,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
}
if (!isOnSupportedHypevisorForMigration(vm)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(vm + " is not XenServer/VMware/KVM/Ovm/Hyperv, cannot migrate this VM form hypervisor type " + vm.getHypervisorType());
}
s_logger.error(vm + " is not XenServer/VMware/KVM/Ovm/Hyperv, cannot migrate this VM from hypervisor type " + vm.getHypervisorType());
throw new InvalidParameterValueException("Unsupported Hypervisor Type for VM migration, we support XenServer/VMware/KVM/Ovm/Hyperv/Ovm3 only");
}
@ -5903,9 +5901,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Vir
}
if (isVMUsingLocalStorage(vm)) {
if (s_logger.isDebugEnabled()) {
s_logger.debug(vm + " is using Local Storage, cannot migrate this VM.");
}
s_logger.error(vm + " is using Local Storage, cannot migrate this VM.");
throw new InvalidParameterValueException("Unsupported operation, VM uses Local storage, cannot migrate");
}