bug 10414: update VM sync logic and capacity management to sync VM state change initiated from external source

This commit is contained in:
Kelven Yang 2011-06-22 18:23:34 -07:00
parent 784caafe2f
commit 7f8b129dc5
3 changed files with 80 additions and 60 deletions

View File

@ -72,12 +72,14 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, StateObject
s_fsm.addTransition(State.Stopped, VirtualMachine.Event.OperationFailed, State.Error);
s_fsm.addTransition(State.Stopped, VirtualMachine.Event.ExpungeOperation, State.Expunging);
s_fsm.addTransition(State.Stopped, VirtualMachine.Event.AgentReportShutdowned, State.Stopped);
s_fsm.addTransition(State.Stopped, VirtualMachine.Event.AgentReportMigrated, State.Stopped);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.OperationRetry, State.Starting);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.OperationSucceeded, State.Running);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.OperationFailed, State.Stopped);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.AgentReportRunning, State.Running);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.AgentReportStopped, State.Stopped);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.AgentReportShutdowned, State.Stopped);
s_fsm.addTransition(State.Starting, VirtualMachine.Event.AgentReportMigrated, State.Starting);
s_fsm.addTransition(State.Destroyed, VirtualMachine.Event.RecoveryRequested, State.Stopped);
s_fsm.addTransition(State.Destroyed, VirtualMachine.Event.ExpungeOperation, State.Expunging);
s_fsm.addTransition(State.Running, VirtualMachine.Event.MigrationRequested, State.Migrating);
@ -85,6 +87,7 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, StateObject
s_fsm.addTransition(State.Running, VirtualMachine.Event.AgentReportStopped, State.Stopped);
s_fsm.addTransition(State.Running, VirtualMachine.Event.StopRequested, State.Stopping);
s_fsm.addTransition(State.Running, VirtualMachine.Event.AgentReportShutdowned, State.Stopped);
s_fsm.addTransition(State.Running, VirtualMachine.Event.AgentReportMigrated, State.Running);
s_fsm.addTransition(State.Migrating, VirtualMachine.Event.MigrationRequested, State.Migrating);
s_fsm.addTransition(State.Migrating, VirtualMachine.Event.OperationSucceeded, State.Running);
s_fsm.addTransition(State.Migrating, VirtualMachine.Event.OperationFailed, State.Running);
@ -97,6 +100,7 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, StateObject
s_fsm.addTransition(State.Stopping, VirtualMachine.Event.AgentReportStopped, State.Stopped);
s_fsm.addTransition(State.Stopping, VirtualMachine.Event.StopRequested, State.Stopping);
s_fsm.addTransition(State.Stopping, VirtualMachine.Event.AgentReportShutdowned, State.Stopped);
s_fsm.addTransition(State.Stopping, VirtualMachine.Event.AgentReportMigrated, State.Stopping);
s_fsm.addTransition(State.Expunging, VirtualMachine.Event.OperationFailed, State.Expunging);
s_fsm.addTransition(State.Expunging, VirtualMachine.Event.ExpungeOperation, State.Expunging);
s_fsm.addTransition(State.Error, VirtualMachine.Event.DestroyRequested, State.Expunging);
@ -138,7 +142,8 @@ public interface VirtualMachine extends RunningOn, ControlledEntity, StateObject
OperationSucceeded,
OperationFailed,
OperationRetry,
AgentReportShutdowned
AgentReportShutdowned,
AgentReportMigrated
};
public enum Type {

View File

@ -519,10 +519,14 @@ public class CapacityManagerImpl implements CapacityManager, StateListener<State
releaseVmCapacity(vm, false, false, oldHostId);
} else if (event == Event.AgentReportStopped) {
releaseVmCapacity(vm, false, true, oldHostId);
} else if(event == Event.AgentReportMigrated) {
releaseVmCapacity(vm, false, false, oldHostId);
}
} else if (oldState == State.Running) {
if (event == Event.AgentReportStopped) {
releaseVmCapacity(vm, false, true, oldHostId);
} else if(event == Event.AgentReportMigrated) {
releaseVmCapacity(vm, false, false, oldHostId);
}
} else if (oldState == State.Migrating) {
if (event == Event.AgentReportStopped) {
@ -538,14 +542,18 @@ public class CapacityManagerImpl implements CapacityManager, StateListener<State
} else if (oldState == State.Stopping) {
if (event == Event.AgentReportStopped || event == Event.OperationSucceeded) {
releaseVmCapacity(vm, false, true, oldHostId);
} else if(event == Event.AgentReportMigrated) {
releaseVmCapacity(vm, false, false, oldHostId);
}
} else if (oldState == State.Stopped) {
if (event == Event.DestroyRequested) {
releaseVmCapacity(vm, true, false, vm.getLastHostId());
} else if(event == Event.AgentReportMigrated) {
releaseVmCapacity(vm, false, false, oldHostId);
}
}
if ((newState == State.Starting || newState == State.Migrating) && vm.getHostId() != null) {
if ((newState == State.Starting || newState == State.Migrating || event == Event.AgentReportMigrated) && vm.getHostId() != null) {
boolean fromLastHost = false;
if (vm.getLastHostId() == vm.getHostId()) {
s_logger.debug("VM starting again on the last host it was stopped on");

View File

@ -1508,14 +1508,12 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
* something should be cleaned up
*
*/
protected Command compareState(long hostId, VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync, boolean nativeHA) {
protected Command compareState(long hostId, VMInstanceVO vm, final AgentVmInfo info, final boolean fullSync, boolean trackExternalChange) {
State agentState = info.state;
final String agentName = info.name;
final State serverState = vm.getState();
final String serverName = vm.getInstanceName();
VirtualMachineGuru<VMInstanceVO> vmGuru = getVmGuru(vm);
Command command = null;
if (s_logger.isDebugEnabled()) {
@ -1540,6 +1538,16 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
_alertMgr.sendAlert(alertType, vm.getDataCenterIdToDeployIn(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getInstanceName() + ", id: " + vm.getId() + ") stopped on host " + hostDesc + " due to storage failure",
"Virtual Machine " + vm.getInstanceName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped due to storage failure.");
}
if(trackExternalChange) {
if(hostId != vm.getHostId()) {
try {
stateTransitTo(vm, VirtualMachine.Event.AgentReportMigrated, hostId);
} catch (NoTransitionException e) {
s_logger.warn(e.getMessage());
}
}
}
// if (serverState == State.Migrating) {
// s_logger.debug("Skipping vm in migrating state: " + vm);
@ -1553,17 +1561,14 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
assert (agentState == State.Stopped || agentState == State.Running) : "If the states we send up is changed, this must be changed.";
if (agentState == State.Running) {
try {
if(nativeHA) {
stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, hostId);
} else {
stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, vm.getHostId());
}
stateTransitTo(vm, VirtualMachine.Event.AgentReportRunning, hostId);
} catch (NoTransitionException e) {
s_logger.warn(e.getMessage());
}
// FIXME: What if someone comes in and sets it to stopping? Then what?
return null;
}
s_logger.debug("State matches but the agent said stopped so let's send a cleanup command anyways.");
return cleanup(agentName);
}
@ -1595,7 +1600,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
// Our records showed that it should be running so let's restart it.
_haMgr.scheduleRestart(vm, false);
} else if (serverState == State.Stopping) {
_haMgr.scheduleStop(vm, vm.getHostId(), WorkType.ForceStop);
_haMgr.scheduleStop(vm, hostId, WorkType.ForceStop);
s_logger.debug("Scheduling a check stop for VM in stopping mode: " + vm);
} else if (serverState == State.Starting) {
s_logger.debug("Ignoring VM in starting mode: " + vm.getInstanceName());
@ -1605,59 +1610,21 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
} else if (agentState == State.Running) {
if (serverState == State.Starting) {
if (fullSync) {
s_logger.debug("VM state is starting on full sync so updating it to running");
vm = findById(vm.getType(), vm.getId());
try {
stateTransitTo(vm, Event.AgentReportRunning, vm.getHostId());
} catch (NoTransitionException e1) {
s_logger.warn(e1.getMessage());
}
s_logger.debug("VM's " + vm + " state is starting on full sync so updating it to Running");
vm = vmGuru.findById(vm.getId());
VirtualMachineProfile<VMInstanceVO> profile = new VirtualMachineProfileImpl<VMInstanceVO>(vm);
List<NicVO> nics = _nicsDao.listByVmId(profile.getId());
for (NicVO nic : nics) {
Network network = _networkMgr.getNetwork(nic.getNetworkId());
NicProfile nicProfile = new NicProfile(nic, network, nic.getBroadcastUri(), nic.getIsolationUri(), null);
profile.addNic(nicProfile);
}
Commands cmds = new Commands(OnError.Stop);
s_logger.debug("Finalizing commands that need to be send to complete Start process for the vm " + vm);
if (vmGuru.finalizeCommandsOnStart(cmds, profile)) {
if (cmds.size() != 0) {
try {
_agentMgr.send(vm.getHostId(), cmds);
} catch (OperationTimedoutException e) {
s_logger.error("Exception during update for running vm: " + vm, e);
return null;
} catch (ResourceUnavailableException e) {
s_logger.error("Exception during update for running vm: " + vm, e);
return null;
}
}
if (vmGuru.finalizeStart(profile, vm.getHostId(), cmds, null)) {
try {
stateTransitTo(vm, Event.AgentReportRunning, vm.getHostId());
} catch (NoTransitionException e) {
s_logger.warn(e.getMessage());
}
} else {
s_logger.error("Exception during update for running vm: " + vm);
return null;
}
} else {
s_logger.error("Unable to finalize commands on start for vm: " + vm);
try {
ensureVmRunningContext(hostId, vm, Event.AgentReportRunning);
} catch (OperationTimedoutException e) {
s_logger.error("Exception during update for running vm: " + vm, e);
return null;
} catch (ResourceUnavailableException e) {
s_logger.error("Exception during update for running vm: " + vm, e);
return null;
} catch (NoTransitionException e) {
s_logger.warn(e.getMessage());
}
}
} else if (serverState == State.Stopping) {
s_logger.debug("Scheduling a stop command for " + vm);
_haMgr.scheduleStop(vm, vm.getHostId(), WorkType.Stop);
_haMgr.scheduleStop(vm, hostId, WorkType.Stop);
} else {
s_logger.debug("VM state is in stopped so stopping it on the agent");
command = cleanup(agentName);
@ -1665,6 +1632,46 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
}
return command;
}
private void ensureVmRunningContext(long hostId, VMInstanceVO vm, Event cause) throws OperationTimedoutException, ResourceUnavailableException, NoTransitionException {
VirtualMachineGuru<VMInstanceVO> vmGuru = getVmGuru(vm);
s_logger.debug("VM state is starting on full sync so updating it to running");
vm = findById(vm.getType(), vm.getId());
try {
stateTransitTo(vm, cause, hostId);
} catch (NoTransitionException e1) {
s_logger.warn(e1.getMessage());
}
s_logger.debug("VM's " + vm + " state is starting on full sync so updating it to Running");
vm = vmGuru.findById(vm.getId()); // this should ensure vm has the most up to date info
VirtualMachineProfile<VMInstanceVO> profile = new VirtualMachineProfileImpl<VMInstanceVO>(vm);
List<NicVO> nics = _nicsDao.listByVmId(profile.getId());
for (NicVO nic : nics) {
Network network = _networkMgr.getNetwork(nic.getNetworkId());
NicProfile nicProfile = new NicProfile(nic, network, nic.getBroadcastUri(), nic.getIsolationUri(), null);
profile.addNic(nicProfile);
}
Commands cmds = new Commands(OnError.Stop);
s_logger.debug("Finalizing commands that need to be send to complete Start process for the vm " + vm);
if (vmGuru.finalizeCommandsOnStart(cmds, profile)) {
if (cmds.size() != 0) {
_agentMgr.send(vm.getHostId(), cmds);
}
if (vmGuru.finalizeStart(profile, vm.getHostId(), cmds, null)) {
stateTransitTo(vm, cause, vm.getHostId());
} else {
s_logger.error("Unable to finish finialization for running vm: " + vm);
}
} else {
s_logger.error("Unable to finalize commands on start for vm: " + vm);
}
}
public Commands fullSync(final long hostId, final Map<String, State> newStates) {
Commands commands = new Commands(OnError.Continue);