kvm: honor migrate.wait and abort vm migration job (#5388)

* kvm: honor migrate.wait and abort vm migration job

* kvm: propogate migratewait to all cloudstack agents on kvm hosts

* update #5388

* update #5388: display error msg
This commit is contained in:
Wei Zhou 2021-09-15 17:38:16 +02:00 committed by GitHub
parent e6058b09a7
commit 95ef292860
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 55 additions and 3 deletions

View File

@ -97,6 +97,12 @@ domr.scripts.dir=scripts/network/domr/kvm
# migration will finish quickly. Less than 1 means disabled.
#vm.migrate.pauseafter=0
# Time (in seconds) to wait for VM migrate finish. Less than 1 means disabled.
# If vm migration is not finished in the time, the vm job will be cancelled by libvirt.
# It will be configured by cloudstack management server when cloudstack agent connects.
# please change the global setting 'migratewait' if needed (default value: 3600)
#vm.migrate.wait=0
# Agent hooks is the way to override default agent behavior to extend the functionality without excessive coding
# for a custom deployment. The first hook promoted is libvirt-vm-xml-transformer which allows provider to modify
# VM XML specification before send to libvirt. Hooks are implemented in Groovy and must be implemented in the way

View File

@ -38,6 +38,7 @@ import java.util.concurrent.locks.ReentrantLock;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.configuration.Config;
import com.cloud.utils.NumbersUtil;
import org.apache.cloudstack.agent.lb.IndirectAgentLB;
import org.apache.cloudstack.ca.CAManager;
@ -1758,7 +1759,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
if (cmd instanceof StartupRoutingCommand) {
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
Map<String, String> params = new HashMap<String, String>();
params.put("router.aggregation.command.each.timeout", _configDao.getValue("router.aggregation.command.each.timeout"));
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
try {
SetHostParamsCommand cmds = new SetHostParamsCommand(params);

View File

@ -46,6 +46,7 @@ import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import com.cloud.configuration.Config;
import org.apache.cloudstack.storage.configdrive.ConfigDrive;
import org.apache.cloudstack.storage.to.PrimaryDataStoreTO;
import org.apache.cloudstack.storage.to.TemplateObjectTO;
@ -356,6 +357,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
protected int _migrateSpeed;
protected int _migrateDowntime;
protected int _migratePauseAfter;
protected int _migrateWait;
protected boolean _diskActivityCheckEnabled;
protected RollingMaintenanceExecutor rollingMaintenanceExecutor;
protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB
@ -540,6 +542,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
return _migratePauseAfter;
}
public int getMigrateWait() {
return _migrateWait;
}
public int getMigrateSpeed() {
return _migrateSpeed;
}
@ -1228,6 +1234,9 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
value = (String) params.get("vm.migrate.pauseafter");
_migratePauseAfter = NumbersUtil.parseInt(value, -1);
value = (String) params.get("vm.migrate.wait");
_migrateWait = NumbersUtil.parseInt(value, -1);
configureAgentHooks(params);
value = (String)params.get("vm.migrate.speed");
@ -1291,6 +1300,13 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
storage.persist("router.aggregation.command.each.timeout", String.valueOf(longValue));
}
if (params.get(Config.MigrateWait.toString()) != null) {
String value = (String)params.get(Config.MigrateWait.toString());
Integer intValue = NumbersUtil.parseInt(value, -1);
storage.persist("vm.migrate.wait", String.valueOf(intValue));
_migrateWait = intValue;
}
return true;
}

View File

@ -51,6 +51,7 @@ import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.libvirt.Connect;
import org.libvirt.Domain;
import org.libvirt.DomainJobInfo;
import org.libvirt.DomainInfo.DomainState;
import org.libvirt.LibvirtException;
import org.libvirt.StorageVol;
@ -219,6 +220,29 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
s_logger.info("Waiting for migration of " + vmName + " to complete, waited " + sleeptime + "ms");
}
// abort the vm migration if the job is executed more than vm.migrate.wait
final int migrateWait = libvirtComputingResource.getMigrateWait();
if (migrateWait > 0 && sleeptime > migrateWait * 1000) {
DomainState state = null;
try {
state = dm.getInfo().state;
} catch (final LibvirtException e) {
s_logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
}
if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) {
try {
DomainJobInfo job = dm.getJobInfo();
s_logger.info("Aborting " + vmName + " domain job: " + job);
dm.abortJob();
result = String.format("Migration of VM %s was cancelled by cloudstack due to time out after %d seconds", vmName, migrateWait);
s_logger.debug(result);
break;
} catch (final LibvirtException e) {
s_logger.info("Failed to abort the vm migration job of vm " + vmName + " : " + e.getMessage());
}
}
}
// pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter();
if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) {
@ -262,7 +286,9 @@ public final class LibvirtMigrateCommandWrapper extends CommandWrapper<MigrateCo
| TransformerException
| URISyntaxException e) {
s_logger.debug(String.format("%s : %s", e.getClass().getSimpleName(), e.getMessage()));
result = "Exception during migrate: " + e.getMessage();
if (result == null) {
result = "Exception during migrate: " + e.getMessage();
}
} finally {
try {
if (dm != null && result != null) {

View File

@ -543,9 +543,11 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
if (globalSettingUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) ||
globalSettingUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) {
_indirectAgentLB.propagateMSListToAgents();
} else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())) {
} else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())
|| globalSettingUpdated.equals(Config.MigrateWait.toString())) {
Map<String, String> params = new HashMap<String, String>();
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
_agentManager.propagateChangeToAgents(params);
}
}