From 95ef292860457c1cbf08548b9e16cafcaa742b5c Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Wed, 15 Sep 2021 17:38:16 +0200 Subject: [PATCH] kvm: honor migrate.wait and abort vm migration job (#5388) * kvm: honor migrate.wait and abort vm migration job * kvm: propogate migratewait to all cloudstack agents on kvm hosts * update #5388 * update #5388: display error msg --- agent/conf/agent.properties | 6 ++++ .../cloud/agent/manager/AgentManagerImpl.java | 4 ++- .../resource/LibvirtComputingResource.java | 16 +++++++++++ .../wrapper/LibvirtMigrateCommandWrapper.java | 28 ++++++++++++++++++- .../ConfigurationManagerImpl.java | 4 ++- 5 files changed, 55 insertions(+), 3 deletions(-) diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index f4ffd4b1d7f..dafe7086d98 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -97,6 +97,12 @@ domr.scripts.dir=scripts/network/domr/kvm # migration will finish quickly. Less than 1 means disabled. #vm.migrate.pauseafter=0 +# Time (in seconds) to wait for VM migrate finish. Less than 1 means disabled. +# If vm migration is not finished in the time, the vm job will be cancelled by libvirt. +# It will be configured by cloudstack management server when cloudstack agent connects. +# please change the global setting 'migratewait' if needed (default value: 3600) +#vm.migrate.wait=0 + # Agent hooks is the way to override default agent behavior to extend the functionality without excessive coding # for a custom deployment. The first hook promoted is libvirt-vm-xml-transformer which allows provider to modify # VM XML specification before send to libvirt. Hooks are implemented in Groovy and must be implemented in the way diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index f69f54c2f0d..c15edcf7309 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -38,6 +38,7 @@ import java.util.concurrent.locks.ReentrantLock; import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.configuration.Config; import com.cloud.utils.NumbersUtil; import org.apache.cloudstack.agent.lb.IndirectAgentLB; import org.apache.cloudstack.ca.CAManager; @@ -1758,7 +1759,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl if (cmd instanceof StartupRoutingCommand) { if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { Map params = new HashMap(); - params.put("router.aggregation.command.each.timeout", _configDao.getValue("router.aggregation.command.each.timeout")); + params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); + params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); try { SetHostParamsCommand cmds = new SetHostParamsCommand(params); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 23943075773..6620fc24418 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -46,6 +46,7 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import com.cloud.configuration.Config; import org.apache.cloudstack.storage.configdrive.ConfigDrive; import org.apache.cloudstack.storage.to.PrimaryDataStoreTO; import org.apache.cloudstack.storage.to.TemplateObjectTO; @@ -356,6 +357,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv protected int _migrateSpeed; protected int _migrateDowntime; protected int _migratePauseAfter; + protected int _migrateWait; protected boolean _diskActivityCheckEnabled; protected RollingMaintenanceExecutor rollingMaintenanceExecutor; protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB @@ -540,6 +542,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv return _migratePauseAfter; } + public int getMigrateWait() { + return _migrateWait; + } + public int getMigrateSpeed() { return _migrateSpeed; } @@ -1228,6 +1234,9 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv value = (String) params.get("vm.migrate.pauseafter"); _migratePauseAfter = NumbersUtil.parseInt(value, -1); + value = (String) params.get("vm.migrate.wait"); + _migrateWait = NumbersUtil.parseInt(value, -1); + configureAgentHooks(params); value = (String)params.get("vm.migrate.speed"); @@ -1291,6 +1300,13 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv storage.persist("router.aggregation.command.each.timeout", String.valueOf(longValue)); } + if (params.get(Config.MigrateWait.toString()) != null) { + String value = (String)params.get(Config.MigrateWait.toString()); + Integer intValue = NumbersUtil.parseInt(value, -1); + storage.persist("vm.migrate.wait", String.valueOf(intValue)); + _migrateWait = intValue; + } + return true; } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java index a72d58430fb..1ad1802e949 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java @@ -51,6 +51,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.libvirt.Connect; import org.libvirt.Domain; +import org.libvirt.DomainJobInfo; import org.libvirt.DomainInfo.DomainState; import org.libvirt.LibvirtException; import org.libvirt.StorageVol; @@ -219,6 +220,29 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper 0 && sleeptime > migrateWait * 1000) { + DomainState state = null; + try { + state = dm.getInfo().state; + } catch (final LibvirtException e) { + s_logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage()); + } + if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) { + try { + DomainJobInfo job = dm.getJobInfo(); + s_logger.info("Aborting " + vmName + " domain job: " + job); + dm.abortJob(); + result = String.format("Migration of VM %s was cancelled by cloudstack due to time out after %d seconds", vmName, migrateWait); + s_logger.debug(result); + break; + } catch (final LibvirtException e) { + s_logger.info("Failed to abort the vm migration job of vm " + vmName + " : " + e.getMessage()); + } + } + } + // pause vm if we meet the vm.migrate.pauseafter threshold and not already paused final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter(); if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) { @@ -262,7 +286,9 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper params = new HashMap(); params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); + params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); _agentManager.propagateChangeToAgents(params); } }