diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java index 3855927f7e5..bdc0f34dcd5 100755 --- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -4221,6 +4221,16 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac case Stopped: case Migrating: s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it"); + if(vm.isHaEnabled() && vm.getState() == State.Running && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) { + s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); + if(!_haMgr.hasPendingHaWork(vm.getId())) + _haMgr.scheduleRestart(vm, true); + else + s_logger.info("VM " + vm.getInstanceName() + " already has an pending HA task working on it"); + + return; + } + VirtualMachineGuru vmGuru = getVmGuru(vm); VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm); sendStop(vmGuru, profile, true); @@ -4406,9 +4416,11 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac super(VirtualMachine.class, job, VmJobCheckInterval.value(), new Predicate() { @Override public boolean checkCondition() { - VMInstanceVO instance = _vmDao.findById(vmId); - if (instance.getPowerState() == desiredPowerState && (srcHostIdForMigration != null && instance.getPowerHostId() != srcHostIdForMigration)) + AsyncJobVO jobVo = _entityMgr.findById(AsyncJobVO.class, job.getId()); + assert (jobVo != null); + if (jobVo == null || jobVo.getStatus() != JobInfo.Status.IN_PROGRESS) return true; + return false; } }, Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE); diff --git a/engine/schema/src/com/cloud/vm/VMInstanceVO.java b/engine/schema/src/com/cloud/vm/VMInstanceVO.java index 9ebade120eb..376b8357f03 100644 --- a/engine/schema/src/com/cloud/vm/VMInstanceVO.java +++ b/engine/schema/src/com/cloud/vm/VMInstanceVO.java @@ -465,7 +465,7 @@ public class VMInstanceVO implements VirtualMachine, FiniteStateObject { +public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener { protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class); WorkerThread[] _workers; @@ -236,7 +235,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai return; } - s_logger.warn("Scheduling restart for VMs on host " + host.getId()); + s_logger.warn("Scheduling restart for VMs on host " + host.getId() + "-" + host.getName()); final List vms = _instanceDao.listByHostId(host.getId()); final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); @@ -806,7 +805,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai _stopped = true; _executor = Executors.newScheduledThreadPool(count, new NamedThreadFactory("HA")); - VirtualMachine.State.getStateMachine().registerListener(this); return true; } @@ -921,6 +919,12 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai work.setTimeToTry(nextTime); work.setServerId(null); work.setDateTaken(null); + + // if restart failed in the middle due to exception, VM state may has been changed + // recapture into the HA worker so that it can really continue in it next turn + VMInstanceVO vm = _instanceDao.findById(work.getInstanceId()); + work.setUpdateTime(vm.getUpdated()); + work.setPreviousState(vm.getState()); } _haDao.update(work.getId(), work); } catch (final Throwable th) { @@ -962,35 +966,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai return _haPlanners.get(0); } - @Override - public boolean preStateTransitionEvent(State oldState, VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, Object opaque) { - return true; - } - - @Override - public boolean postStateTransitionEvent(State oldState, VirtualMachine.Event event, State newState, VirtualMachine vo, boolean status, Object opaque) { - if (oldState == State.Running && event == VirtualMachine.Event.FollowAgentPowerOffReport && newState == State.Stopped) { - final VMInstanceVO vm = _instanceDao.findById(vo.getId()); - if (vm.isHaEnabled()) { - if (vm.getState() != State.Stopped) - s_logger.warn("Sanity check failed. postStateTransitionEvent reports transited to Stopped but VM " + vm + " is still at state " + vm.getState()); - - s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); - _executor.submit(new ManagedContextRunnable() { - @Override - protected void runInContext() { - try { - scheduleRestart(vm, false); - } catch (Exception e) { - s_logger.warn("Unexpected exception when scheduling a HA restart", e); - } - } - }); - } - } - return true; - } - @Override public boolean hasPendingHaWork(long vmId) { List haWorks = _haDao.listRunningHaWorkForVm(vmId);