when host is pingtimeout and CCP can not determine the host status, put the host to Alert status , no VM HA.

This commit is contained in:
Anthony Xu 2014-10-22 14:53:22 -07:00
parent ee0f0a1cff
commit 4a13f81485
3 changed files with 10 additions and 8 deletions

View File

@ -123,7 +123,7 @@ public enum Status {
s_fsm.addTransition(Status.Connecting, Event.Ready, Status.Up);
s_fsm.addTransition(Status.Connecting, Event.PingTimeout, Status.Alert);
s_fsm.addTransition(Status.Connecting, Event.ShutdownRequested, Status.Disconnected);
s_fsm.addTransition(Status.Connecting, Event.HostDown, Status.Alert);
s_fsm.addTransition(Status.Connecting, Event.HostDown, Status.Down);
s_fsm.addTransition(Status.Connecting, Event.Ping, Status.Connecting);
s_fsm.addTransition(Status.Connecting, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Connecting, Event.AgentDisconnected, Status.Alert);

View File

@ -825,6 +825,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
if (determinedState == Status.Down) {
s_logger.error("Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs");
if ((host.getType() != Host.Type.SecondaryStorage) && (host.getType() != Host.Type.ConsoleProxy)) {
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host disconnected, " + host.getId(),
"Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs");
}
event = Status.Event.HostDown;
} else if (determinedState == Status.Up) {
/* Got ping response from host, bring it back*/
@ -857,20 +861,18 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
HostPodVO podVO = _podDao.findById(host.getPodId());
String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc,
"In availability zone " + host.getDataCenterId()
+ ", host is in alert state: " + host.getId() + "-" + host.getName());
"In availability zone " + host.getDataCenterId() + ", " + host.getId() + "-" + host.getName()
+ " disconnect due to event " + event + ", ms can't determine the host status" );
}
} else {
s_logger.debug("The next status of Agent " + host.getId() + " is not Alert, no need to investigate what happened");
}
}
handleDisconnectWithoutInvestigation(attache, event, true, true);
host = _hostDao.findById(hostId); // Maybe the host magically reappeared?
if (host != null && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
if (host != null && host.getStatus() == Status.Down) {
_haMgr.scheduleRestartForVmsOnHost(host, true);
}
return true;
}

View File

@ -202,7 +202,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
public Status investigate(final long hostId) {
final HostVO host = _hostDao.findById(hostId);
if (host == null) {
return null;
return Status.Alert;
}
Status hostState = null;
@ -219,7 +219,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
}
}
return null;
return Status.Alert;
}
@Override