mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
check_heartbeat and pingtest execute through ssh, not XAPI, because XAPI may hang when master host is downi
This commit is contained in:
parent
7b08bb7cab
commit
88c1da679c
@ -406,17 +406,28 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
||||
|
||||
}
|
||||
|
||||
protected boolean pingXenServer() {
|
||||
protected boolean pingXAPI() {
|
||||
Connection conn = getConnection();
|
||||
try {
|
||||
Host host = Host.getByUuid(conn, _host.uuid);
|
||||
if( !host.getEnabled(conn) ) {
|
||||
s_logger.debug("Host " + _host.ip + " is not enabled!");
|
||||
return false;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
s_logger.debug("cannot get host enabled status, host " + _host.ip + " due to " + e.toString(), e);
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
callHostPlugin(conn, "echo", "main");
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
s_logger.debug("cannot ping host " + _host.ip + " due to " + e.toString(), e);
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
protected String logX(XenAPIObject obj, String msg) {
|
||||
return new StringBuilder("Host ").append(_host.ip).append(" ").append(obj.toWireString()).append(": ").append(msg).toString();
|
||||
}
|
||||
@ -2006,12 +2017,24 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
||||
}
|
||||
|
||||
private boolean doPingTest(Connection conn, final String computingHostIp) {
|
||||
String args = "-h " + computingHostIp;
|
||||
String result = callHostPlugin(conn, "vmops", "pingtest", "args", args);
|
||||
if (result == null || result.isEmpty()) {
|
||||
com.trilead.ssh2.Connection sshConnection = new com.trilead.ssh2.Connection(_host.ip, 22);
|
||||
try {
|
||||
sshConnection.connect(null, 60000, 60000);
|
||||
if (!sshConnection.authenticateWithPassword(_username, _password.peek())) {
|
||||
throw new CloudRuntimeException("Unable to authenticate");
|
||||
}
|
||||
|
||||
String cmd = "ping -c 2 " + computingHostIp;
|
||||
if (!SSHCmdHelper.sshExecuteCmd(sshConnection, cmd)) {
|
||||
throw new CloudRuntimeException("Cannot ping host " + computingHostIp + " from host " + _host.ip);
|
||||
}
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Catch exception " + e.toString(), e);
|
||||
return false;
|
||||
} finally {
|
||||
sshConnection.close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) {
|
||||
@ -2238,7 +2261,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
||||
}
|
||||
|
||||
protected CheckHealthAnswer execute(CheckHealthCommand cmd) {
|
||||
boolean result = pingXenServer();
|
||||
boolean result = pingXAPI();
|
||||
return new CheckHealthAnswer(cmd, result);
|
||||
}
|
||||
|
||||
@ -4341,9 +4364,9 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
||||
@Override
|
||||
public PingCommand getCurrentStatus(long id) {
|
||||
try {
|
||||
if (!pingXenServer()) {
|
||||
if (!pingXAPI()) {
|
||||
Thread.sleep(1000);
|
||||
if (!pingXenServer()) {
|
||||
if (!pingXAPI()) {
|
||||
s_logger.warn(" can not ping xenserver " + _host.uuid);
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -73,12 +73,10 @@ public class XenServer56FP1Resource extends XenServer56Resource {
|
||||
protected FenceAnswer execute(FenceCommand cmd) {
|
||||
Connection conn = getConnection();
|
||||
try {
|
||||
String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHostGuid(), "interval", Integer.toString(_heartbeatInterval * 2));
|
||||
if (!result.contains("> DEAD <")) {
|
||||
if (check_heartbeat(cmd.getHostGuid())) {
|
||||
s_logger.debug("Heart beat is still going so unable to fence");
|
||||
return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence");
|
||||
}
|
||||
|
||||
Set<VM> vms = VM.getByNameLabel(conn, cmd.getVmName());
|
||||
for (VM vm : vms) {
|
||||
Set<VDI> vdis = new HashSet<VDI>();
|
||||
|
||||
@ -28,6 +28,7 @@ import com.cloud.agent.api.StartupCommand;
|
||||
import com.cloud.resource.ServerResource;
|
||||
import com.cloud.utils.exception.CloudRuntimeException;
|
||||
import com.cloud.utils.script.Script;
|
||||
import com.cloud.utils.ssh.SSHCmdHelper;
|
||||
import com.xensource.xenapi.Connection;
|
||||
import com.xensource.xenapi.Host;
|
||||
import com.xensource.xenapi.Network;
|
||||
@ -208,15 +209,37 @@ public class XenServer56Resource extends CitrixResourceBase {
|
||||
}
|
||||
}
|
||||
|
||||
protected Boolean check_heartbeat(String hostuuid) {
|
||||
com.trilead.ssh2.Connection sshConnection = new com.trilead.ssh2.Connection(_host.ip, 22);
|
||||
try {
|
||||
sshConnection.connect(null, 60000, 60000);
|
||||
if (!sshConnection.authenticateWithPassword(_username, _password.peek())) {
|
||||
throw new CloudRuntimeException("Unable to authenticate");
|
||||
}
|
||||
|
||||
String shcmd = "/opt/cloud/bin/check_heartbeat.sh " + hostuuid + " "
|
||||
+ Integer.toString(_heartbeatInterval * 2);
|
||||
if (!SSHCmdHelper.sshExecuteCmd(sshConnection, shcmd)) {
|
||||
s_logger.debug("Heart beat is gone so dead.");
|
||||
return false;
|
||||
}
|
||||
s_logger.debug("Heart beat is still going");
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
s_logger.debug("health check failed due to catch exception " + e.toString());
|
||||
return null;
|
||||
} finally {
|
||||
sshConnection.close();
|
||||
}
|
||||
}
|
||||
|
||||
protected FenceAnswer execute(FenceCommand cmd) {
|
||||
Connection conn = getConnection();
|
||||
try {
|
||||
String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHostGuid(), "interval", Integer.toString(_heartbeatInterval * 2));
|
||||
if (!result.contains("> DEAD <")) {
|
||||
if (check_heartbeat(cmd.getHostGuid())) {
|
||||
s_logger.debug("Heart beat is still going so unable to fence");
|
||||
return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence");
|
||||
}
|
||||
|
||||
Set<VM> vms = VM.getByNameLabel(conn, cmd.getVmName());
|
||||
for (VM vm : vms) {
|
||||
synchronized (_cluster.intern()) {
|
||||
@ -236,6 +259,7 @@ public class XenServer56Resource extends CitrixResourceBase {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected boolean transferManagementNetwork(Connection conn, Host host, PIF src, PIF.Record spr, PIF dest) throws XmlRpcException, XenAPIException {
|
||||
dest.reconfigureIp(conn, spr.ipConfigurationMode, spr.IP, spr.netmask, spr.gateway, spr.DNS);
|
||||
@ -269,33 +293,29 @@ public class XenServer56Resource extends CitrixResourceBase {
|
||||
|
||||
@Override
|
||||
public StartupCommand[] initialize() {
|
||||
pingXenServer();
|
||||
pingXAPI();
|
||||
StartupCommand[] cmds = super.initialize();
|
||||
return cmds;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) {
|
||||
try {
|
||||
Connection conn = getConnection();
|
||||
String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHost().getGuid(), "interval", Integer.toString(_heartbeatInterval * 2));
|
||||
if (result == null) {
|
||||
return new CheckOnHostAnswer(cmd, "Unable to call plugin");
|
||||
}
|
||||
if (result.contains("> DEAD <")) {
|
||||
s_logger.debug("Heart beat is gone so dead.");
|
||||
return new CheckOnHostAnswer(cmd, false, "Heart Beat is done");
|
||||
} else if (result.contains("> ALIVE <")) {
|
||||
s_logger.debug("Heart beat is still going");
|
||||
return new CheckOnHostAnswer(cmd, true, "Heartbeat is still going");
|
||||
}
|
||||
return new CheckOnHostAnswer(cmd, null, "Unable to determine");
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to fence", e);
|
||||
return new CheckOnHostAnswer(cmd, e.getMessage());
|
||||
Boolean alive = check_heartbeat(cmd.getHost().getGuid());
|
||||
String msg = "";
|
||||
if (alive == null) {
|
||||
msg = " cannot determine ";
|
||||
} else if ( alive == true) {
|
||||
msg = "Heart beat is still going";
|
||||
} else {
|
||||
msg = "Heart beat is gone so dead.";
|
||||
}
|
||||
s_logger.debug(msg);
|
||||
return new CheckOnHostAnswer(cmd, alive, msg);
|
||||
|
||||
}
|
||||
|
||||
|
||||
public XenServer56Resource() {
|
||||
super();
|
||||
}
|
||||
|
||||
@ -72,3 +72,4 @@ do
|
||||
done
|
||||
|
||||
echo "=====> DEAD <======"
|
||||
exit 1
|
||||
|
||||
@ -123,18 +123,7 @@ def setup_heartbeat_file(session, args):
|
||||
txt = ''
|
||||
return txt
|
||||
|
||||
@echo
|
||||
def check_heartbeat(session, args):
|
||||
host = args['host']
|
||||
interval = args['interval']
|
||||
try:
|
||||
cmd = ["bash", "/opt/cloud/bin/check_heartbeat.sh", host, interval]
|
||||
txt = util.pread2(cmd)
|
||||
except:
|
||||
txt=''
|
||||
return txt
|
||||
|
||||
|
||||
|
||||
@echo
|
||||
def heartbeat(session, args):
|
||||
host = args['host']
|
||||
@ -156,5 +145,4 @@ def asmonitor(session, args):
|
||||
return 'fail'
|
||||
|
||||
if __name__ == "__main__":
|
||||
XenAPIPlugin.dispatch({"forceShutdownVM":forceShutdownVM, "upgrade_snapshot":upgrade_snapshot, "create_privatetemplate_from_snapshot":create_privatetemplate_from_snapshot, "copy_vhd_to_secondarystorage":copy_vhd_to_secondarystorage, "copy_vhd_from_secondarystorage":copy_vhd_from_secondarystorage, "setup_heartbeat_sr":setup_heartbeat_sr, "setup_heartbeat_file":setup_heartbeat_file, "check_heartbeat":check_heartbeat, "heartbeat": heartbeat, "asmonitor": asmonitor})
|
||||
|
||||
XenAPIPlugin.dispatch({"forceShutdownVM":forceShutdownVM, "upgrade_snapshot":upgrade_snapshot, "create_privatetemplate_from_snapshot":create_privatetemplate_from_snapshot, "copy_vhd_to_secondarystorage":copy_vhd_to_secondarystorage, "copy_vhd_from_secondarystorage":copy_vhd_from_secondarystorage, "setup_heartbeat_sr":setup_heartbeat_sr, "setup_heartbeat_file":setup_heartbeat_file, "heartbeat": heartbeat, "asmonitor": asmonitor})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user