CE-113 configure workervm gc based on job expiry

This commit is contained in:
Daan Hoogland 2017-04-05 14:19:14 +02:00
parent c689d4a696
commit 61189db1d9
3 changed files with 19 additions and 13 deletions

View File

@ -85,9 +85,9 @@ import com.cloud.vm.dao.VMInstanceDao;
public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable {
// Advanced
private static final ConfigKey<Long> JobExpireMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.expire.minutes", "1440",
public static final ConfigKey<Long> JobExpireMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.expire.minutes", "1440",
"Time (in minutes) for async-jobs to be kept in system", true, ConfigKey.Scope.Global);
private static final ConfigKey<Long> JobCancelThresholdMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.cancel.threshold.minutes", "60",
public static final ConfigKey<Long> JobCancelThresholdMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.cancel.threshold.minutes", "60",
"Time (in minutes) for async-jobs to be forcely cancelled if it has been in process for long", true, ConfigKey.Scope.Global);
private static final ConfigKey<Integer> VmJobLockTimeout = new ConfigKey<Integer>("Advanced",
Integer.class, "vm.job.lock.timeout", "1800",

View File

@ -35,6 +35,9 @@ public interface VmwareManager {
public static final ConfigKey<Long> s_vmwareNicHotplugWaitTimeout = new ConfigKey<Long>("Advanced", Long.class, "vmware.nic.hotplug.wait.timeout", "15000",
"Wait timeout (milli seconds) for hot plugged NIC of VM to be detected by guest OS.", false, ConfigKey.Scope.Global);
public static final ConfigKey<Boolean> s_vmwareCleanOldWorderVMs = new ConfigKey<Boolean>("Advanced", Boolean.class, "vmware.clean.old.worker.vms", "false",
"If a worker vm is older then twice the 'job.expire.minutes' + 'job.cancel.threshold.minutes' , remove it.", true, ConfigKey.Scope.Global);
String composeWorkerName();
String getSystemVMIsoFileNameOnDatastore();

View File

@ -35,6 +35,7 @@ import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import org.apache.cloudstack.framework.jobs.impl.AsyncJobManagerImpl;
import org.apache.log4j.Logger;
import com.vmware.vim25.AboutInfo;
@ -128,6 +129,7 @@ import com.cloud.vm.DomainRouterVO;
public class VmwareManagerImpl extends ManagerBase implements VmwareManager, VmwareStorageMount, Listener, VmwareDatacenterService, Configurable {
private static final Logger s_logger = Logger.getLogger(VmwareManagerImpl.class);
private static final long MILISECONDS_PER_MINUTE = 60000;
private static final int STARTUP_DELAY = 60000; // 60 seconds
private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes
private long _hostScanInterval = DEFAULT_HOST_SCAN_INTERVAL;
@ -212,7 +214,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {s_vmwareNicHotplugWaitTimeout};
return new ConfigKey<?>[] {s_vmwareNicHotplugWaitTimeout, s_vmwareCleanOldWorderVMs};
}
@Override
@ -534,7 +536,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw
return false;
}
Long.parseLong(tokens[0]);
long startTick = Long.parseLong(tokens[0]);
long msid = Long.parseLong(tokens[1]);
long runid = Long.parseLong(tokens[2]);
@ -550,15 +552,16 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw
return true;
}
// disable time-out check until we have found out a VMware API that can check if
// there are pending tasks on the subject VM
/*
if(System.currentTimeMillis() - startTick > _hungWorkerTimeout) {
if(s_logger.isInfoEnabled())
s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000);
return true;
}
*/
// this time-out check was disabled
// "until we have found out a VMware API that can check if there are pending tasks on the subject VM"
// but as we expire jobs and those stale worker VMs stay around untill an MS reboot we opt in to have them removed anyway
Long hungWorkerTimeout = 2 * (AsyncJobManagerImpl.JobExpireMinutes.value() + AsyncJobManagerImpl.JobCancelThresholdMinutes.value()) * MILISECONDS_PER_MINUTE;
if(s_vmwareCleanOldWorderVMs.value() && System.currentTimeMillis() - startTick > hungWorkerTimeout) {
if(s_logger.isInfoEnabled()) {
s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000);
}
return true;
}
return false;
}