diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 121246bc11c..aa38258c4b4 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -85,9 +85,9 @@ import com.cloud.vm.dao.VMInstanceDao; public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, ClusterManagerListener, Configurable { // Advanced - private static final ConfigKey JobExpireMinutes = new ConfigKey("Advanced", Long.class, "job.expire.minutes", "1440", + public static final ConfigKey JobExpireMinutes = new ConfigKey("Advanced", Long.class, "job.expire.minutes", "1440", "Time (in minutes) for async-jobs to be kept in system", true, ConfigKey.Scope.Global); - private static final ConfigKey JobCancelThresholdMinutes = new ConfigKey("Advanced", Long.class, "job.cancel.threshold.minutes", "60", + public static final ConfigKey JobCancelThresholdMinutes = new ConfigKey("Advanced", Long.class, "job.cancel.threshold.minutes", "60", "Time (in minutes) for async-jobs to be forcely cancelled if it has been in process for long", true, ConfigKey.Scope.Global); private static final ConfigKey VmJobLockTimeout = new ConfigKey("Advanced", Integer.class, "vm.job.lock.timeout", "1800", diff --git a/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManager.java b/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManager.java index 8a3b201f352..12c48fee026 100644 --- a/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManager.java +++ b/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManager.java @@ -35,6 +35,9 @@ public interface VmwareManager { public static final ConfigKey s_vmwareNicHotplugWaitTimeout = new ConfigKey("Advanced", Long.class, "vmware.nic.hotplug.wait.timeout", "15000", "Wait timeout (milli seconds) for hot plugged NIC of VM to be detected by guest OS.", false, ConfigKey.Scope.Global); + public static final ConfigKey s_vmwareCleanOldWorderVMs = new ConfigKey("Advanced", Boolean.class, "vmware.clean.old.worker.vms", "false", + "If a worker vm is older then twice the 'job.expire.minutes' + 'job.cancel.threshold.minutes' , remove it.", true, ConfigKey.Scope.Global); + String composeWorkerName(); String getSystemVMIsoFileNameOnDatastore(); diff --git a/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java b/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java index 8688235019d..2bf4692e798 100644 --- a/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java +++ b/plugins/hypervisors/vmware/src/com/cloud/hypervisor/vmware/manager/VmwareManagerImpl.java @@ -35,6 +35,7 @@ import java.util.concurrent.TimeUnit; import javax.inject.Inject; import javax.naming.ConfigurationException; +import org.apache.cloudstack.framework.jobs.impl.AsyncJobManagerImpl; import org.apache.log4j.Logger; import com.vmware.vim25.AboutInfo; @@ -128,6 +129,7 @@ import com.cloud.vm.DomainRouterVO; public class VmwareManagerImpl extends ManagerBase implements VmwareManager, VmwareStorageMount, Listener, VmwareDatacenterService, Configurable { private static final Logger s_logger = Logger.getLogger(VmwareManagerImpl.class); + private static final long MILISECONDS_PER_MINUTE = 60000; private static final int STARTUP_DELAY = 60000; // 60 seconds private static final long DEFAULT_HOST_SCAN_INTERVAL = 600000; // every 10 minutes private long _hostScanInterval = DEFAULT_HOST_SCAN_INTERVAL; @@ -212,7 +214,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[] {s_vmwareNicHotplugWaitTimeout}; + return new ConfigKey[] {s_vmwareNicHotplugWaitTimeout, s_vmwareCleanOldWorderVMs}; } @Override @@ -534,7 +536,7 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw return false; } - Long.parseLong(tokens[0]); + long startTick = Long.parseLong(tokens[0]); long msid = Long.parseLong(tokens[1]); long runid = Long.parseLong(tokens[2]); @@ -550,15 +552,16 @@ public class VmwareManagerImpl extends ManagerBase implements VmwareManager, Vmw return true; } - // disable time-out check until we have found out a VMware API that can check if - // there are pending tasks on the subject VM - /* - if(System.currentTimeMillis() - startTick > _hungWorkerTimeout) { - if(s_logger.isInfoEnabled()) - s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000); - return true; - } - */ + // this time-out check was disabled + // "until we have found out a VMware API that can check if there are pending tasks on the subject VM" + // but as we expire jobs and those stale worker VMs stay around untill an MS reboot we opt in to have them removed anyway + Long hungWorkerTimeout = 2 * (AsyncJobManagerImpl.JobExpireMinutes.value() + AsyncJobManagerImpl.JobCancelThresholdMinutes.value()) * MILISECONDS_PER_MINUTE; + if(s_vmwareCleanOldWorderVMs.value() && System.currentTimeMillis() - startTick > hungWorkerTimeout) { + if(s_logger.isInfoEnabled()) { + s_logger.info("Worker VM expired, seconds elapsed: " + (System.currentTimeMillis() - startTick) / 1000); + } + return true; + } return false; }