CE-113 configure workervm gc based on job expiry

This commit is contained in:
Daan Hoogland 2017-04-05 14:19:14 +02:00
parent c689d4a696
commit 61189db1d9
3 changed files with 19 additions and 13 deletions

View File

@ -85,9 +85,9 @@ import com.cloud.vm.dao.VMInstanceDao;
public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable { public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable {
// Advanced // Advanced
private static final ConfigKey<Long> JobExpireMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.expire.minutes", "1440", public static final ConfigKey<Long> JobExpireMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.expire.minutes", "1440",
"Time (in minutes) for async-jobs to be kept in system", true, ConfigKey.Scope.Global); "Time (in minutes) for async-jobs to be kept in system", true, ConfigKey.Scope.Global);
private static final ConfigKey<Long> JobCancelThresholdMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.cancel.threshold.minutes", "60", public static final ConfigKey<Long> JobCancelThresholdMinutes = new ConfigKey<Long>("Advanced", Long.class, "job.cancel.threshold.minutes", "60",
"Time (in minutes) for async-jobs to be forcely cancelled if it has been in process for long", true, ConfigKey.Scope.Global); "Time (in minutes) for async-jobs to be forcely cancelled if it has been in process for long", true, ConfigKey.Scope.Global);
private static final ConfigKey<Integer> VmJobLockTimeout = new ConfigKey<Integer>("Advanced", private static final ConfigKey<Integer> VmJobLockTimeout = new ConfigKey<Integer>("Advanced",
Integer.class, "vm.job.lock.timeout", "1800", Integer.class, "vm.job.lock.timeout", "1800",

View File

@ -35,6 +35,9 @@ public interface VmwareManager {
public static final ConfigKey<Long> s_vmwareNicHotplugWaitTimeout = new ConfigKey<Long>("Advanced", Long.class, "vmware.nic.hotplug.wait.timeout", "15000", public static final ConfigKey<Long> s_vmwareNicHotplugWaitTimeout = new ConfigKey<Long>("Advanced", Long.class, "vmware.nic.hotplug.wait.timeout", "15000",
"Wait timeout (milli seconds) for hot plugged NIC of VM to be detected by guest OS.", false, ConfigKey.Scope.Global); "Wait timeout (milli seconds) for hot plugged NIC of VM to be detected by guest OS.", false, ConfigKey.Scope.Global);
public static final ConfigKey<Boolean> s_vmwareCleanOldWorderVMs = new ConfigKey<Boolean>("Advanced", Boolean.class, "vmware.clean.old.worker.vms", "false",
"If a worker vm is older then twice the 'job.expire.minutes' + 'job.cancel.threshold.minutes' , remove it.", true, ConfigKey.Scope.Global);
String composeWorkerName(); String composeWorkerName();
String getSystemVMIsoFileNameOnDatastore(); String getSystemVMIsoFileNameOnDatastore();

View File

@ -35,6 +35,7 @@ import java.util.concurrent.TimeUnit;
import javax.inject.Inject; import javax.inject.Inject;
import javax.naming.ConfigurationException; import javax.naming.ConfigurationException;
import org.apache.cloudstack.framework.jobs.impl.AsyncJobManagerImpl;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import com.vmware.vim25.AboutInfo; import com.vmware.vim25.AboutInfo;
@ -128,6 +129,7 @@ import com.cloud.vm.DomainRouterVO;
public class VmwareManagerImpl extends ManagerBase implements VmwareManager, VmwareStorageMount, Listener, VmwareDatacenterService, Configurable { public class VmwareManagerImpl extends ManagerBase implements VmwareManager, VmwareStorageMount, Listener, VmwareDatacenterService, Configurable {
private static final Logger s_logger = Logger.getLogger(VmwareManagerImpl.class); private static final Logger s_logger = Logger.getLogger(VmwareManagerImpl.class);
private static final long MILISECONDS_PER_MINUTE = 60000;
private static final int STARTUP_DELAY = 60000; // 60 seconds private static final int STARTUP_DELAY = 60000; // 60 seconds
private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes
private long _hostScanInterval = DEFAULT_HOST_SCAN_INTERVAL; private long _hostScanInterval = DEFAULT_HOST_SCAN_INTERVAL;
@ -212,7 +214,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw
@Override @Override
public ConfigKey<?>[] getConfigKeys() { public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {s_vmwareNicHotplugWaitTimeout}; return new ConfigKey<?>[] {s_vmwareNicHotplugWaitTimeout, s_vmwareCleanOldWorderVMs};
} }
@Override @Override
@ -534,7 +536,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw
return false; return false;
} }
Long.parseLong(tokens[0]); long startTick = Long.parseLong(tokens[0]);
long msid = Long.parseLong(tokens[1]); long msid = Long.parseLong(tokens[1]);
long runid = Long.parseLong(tokens[2]); long runid = Long.parseLong(tokens[2]);
@ -550,15 +552,16 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw
return true; return true;
} }
// disable time-out check until we have found out a VMware API that can check if // this time-out check was disabled
// there are pending tasks on the subject VM // "until we have found out a VMware API that can check if there are pending tasks on the subject VM"
/* // but as we expire jobs and those stale worker VMs stay around untill an MS reboot we opt in to have them removed anyway
if(System.currentTimeMillis() - startTick > _hungWorkerTimeout) { Long hungWorkerTimeout = 2 * (AsyncJobManagerImpl.JobExpireMinutes.value() + AsyncJobManagerImpl.JobCancelThresholdMinutes.value()) * MILISECONDS_PER_MINUTE;
if(s_logger.isInfoEnabled()) if(s_vmwareCleanOldWorderVMs.value() && System.currentTimeMillis() - startTick > hungWorkerTimeout) {
if(s_logger.isInfoEnabled()) {
s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000); s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000);
}
return true; return true;
} }
*/
return false; return false;
} }