mirror of
https://github.com/apache/cloudstack.git
synced 2025-11-03 04:12:31 +01:00
check_heartbeat and pingtest execute through ssh, not XAPI, because XAPI may hang when master host is downi
This commit is contained in:
parent
7b08bb7cab
commit
88c1da679c
@ -406,17 +406,28 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean pingXenServer() {
|
protected boolean pingXAPI() {
|
||||||
Connection conn = getConnection();
|
Connection conn = getConnection();
|
||||||
|
try {
|
||||||
|
Host host = Host.getByUuid(conn, _host.uuid);
|
||||||
|
if( !host.getEnabled(conn) ) {
|
||||||
|
s_logger.debug("Host " + _host.ip + " is not enabled!");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
s_logger.debug("cannot get host enabled status, host " + _host.ip + " due to " + e.toString(), e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
callHostPlugin(conn, "echo", "main");
|
callHostPlugin(conn, "echo", "main");
|
||||||
return true;
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
s_logger.debug("cannot ping host " + _host.ip + " due to " + e.toString(), e);
|
s_logger.debug("cannot ping host " + _host.ip + " due to " + e.toString(), e);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected String logX(XenAPIObject obj, String msg) {
|
protected String logX(XenAPIObject obj, String msg) {
|
||||||
return new StringBuilder("Host ").append(_host.ip).append(" ").append(obj.toWireString()).append(": ").append(msg).toString();
|
return new StringBuilder("Host ").append(_host.ip).append(" ").append(obj.toWireString()).append(": ").append(msg).toString();
|
||||||
}
|
}
|
||||||
@ -2006,12 +2017,24 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean doPingTest(Connection conn, final String computingHostIp) {
|
private boolean doPingTest(Connection conn, final String computingHostIp) {
|
||||||
String args = "-h " + computingHostIp;
|
com.trilead.ssh2.Connection sshConnection = new com.trilead.ssh2.Connection(_host.ip, 22);
|
||||||
String result = callHostPlugin(conn, "vmops", "pingtest", "args", args);
|
try {
|
||||||
if (result == null || result.isEmpty()) {
|
sshConnection.connect(null, 60000, 60000);
|
||||||
|
if (!sshConnection.authenticateWithPassword(_username, _password.peek())) {
|
||||||
|
throw new CloudRuntimeException("Unable to authenticate");
|
||||||
|
}
|
||||||
|
|
||||||
|
String cmd = "ping -c 2 " + computingHostIp;
|
||||||
|
if (!SSHCmdHelper.sshExecuteCmd(sshConnection, cmd)) {
|
||||||
|
throw new CloudRuntimeException("Cannot ping host " + computingHostIp + " from host " + _host.ip);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
s_logger.warn("Catch exception " + e.toString(), e);
|
||||||
return false;
|
return false;
|
||||||
|
} finally {
|
||||||
|
sshConnection.close();
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) {
|
protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) {
|
||||||
@ -2238,7 +2261,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected CheckHealthAnswer execute(CheckHealthCommand cmd) {
|
protected CheckHealthAnswer execute(CheckHealthCommand cmd) {
|
||||||
boolean result = pingXenServer();
|
boolean result = pingXAPI();
|
||||||
return new CheckHealthAnswer(cmd, result);
|
return new CheckHealthAnswer(cmd, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4341,9 +4364,9 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
|
|||||||
@Override
|
@Override
|
||||||
public PingCommand getCurrentStatus(long id) {
|
public PingCommand getCurrentStatus(long id) {
|
||||||
try {
|
try {
|
||||||
if (!pingXenServer()) {
|
if (!pingXAPI()) {
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
if (!pingXenServer()) {
|
if (!pingXAPI()) {
|
||||||
s_logger.warn(" can not ping xenserver " + _host.uuid);
|
s_logger.warn(" can not ping xenserver " + _host.uuid);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -73,12 +73,10 @@ public class XenServer56FP1Resource extends XenServer56Resource {
|
|||||||
protected FenceAnswer execute(FenceCommand cmd) {
|
protected FenceAnswer execute(FenceCommand cmd) {
|
||||||
Connection conn = getConnection();
|
Connection conn = getConnection();
|
||||||
try {
|
try {
|
||||||
String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHostGuid(), "interval", Integer.toString(_heartbeatInterval * 2));
|
if (check_heartbeat(cmd.getHostGuid())) {
|
||||||
if (!result.contains("> DEAD <")) {
|
|
||||||
s_logger.debug("Heart beat is still going so unable to fence");
|
s_logger.debug("Heart beat is still going so unable to fence");
|
||||||
return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence");
|
return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence");
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<VM> vms = VM.getByNameLabel(conn, cmd.getVmName());
|
Set<VM> vms = VM.getByNameLabel(conn, cmd.getVmName());
|
||||||
for (VM vm : vms) {
|
for (VM vm : vms) {
|
||||||
Set<VDI> vdis = new HashSet<VDI>();
|
Set<VDI> vdis = new HashSet<VDI>();
|
||||||
|
|||||||
@ -28,6 +28,7 @@ import com.cloud.agent.api.StartupCommand;
|
|||||||
import com.cloud.resource.ServerResource;
|
import com.cloud.resource.ServerResource;
|
||||||
import com.cloud.utils.exception.CloudRuntimeException;
|
import com.cloud.utils.exception.CloudRuntimeException;
|
||||||
import com.cloud.utils.script.Script;
|
import com.cloud.utils.script.Script;
|
||||||
|
import com.cloud.utils.ssh.SSHCmdHelper;
|
||||||
import com.xensource.xenapi.Connection;
|
import com.xensource.xenapi.Connection;
|
||||||
import com.xensource.xenapi.Host;
|
import com.xensource.xenapi.Host;
|
||||||
import com.xensource.xenapi.Network;
|
import com.xensource.xenapi.Network;
|
||||||
@ -208,15 +209,37 @@ public class XenServer56Resource extends CitrixResourceBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Boolean check_heartbeat(String hostuuid) {
|
||||||
|
com.trilead.ssh2.Connection sshConnection = new com.trilead.ssh2.Connection(_host.ip, 22);
|
||||||
|
try {
|
||||||
|
sshConnection.connect(null, 60000, 60000);
|
||||||
|
if (!sshConnection.authenticateWithPassword(_username, _password.peek())) {
|
||||||
|
throw new CloudRuntimeException("Unable to authenticate");
|
||||||
|
}
|
||||||
|
|
||||||
|
String shcmd = "/opt/cloud/bin/check_heartbeat.sh " + hostuuid + " "
|
||||||
|
+ Integer.toString(_heartbeatInterval * 2);
|
||||||
|
if (!SSHCmdHelper.sshExecuteCmd(sshConnection, shcmd)) {
|
||||||
|
s_logger.debug("Heart beat is gone so dead.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
s_logger.debug("Heart beat is still going");
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
s_logger.debug("health check failed due to catch exception " + e.toString());
|
||||||
|
return null;
|
||||||
|
} finally {
|
||||||
|
sshConnection.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected FenceAnswer execute(FenceCommand cmd) {
|
protected FenceAnswer execute(FenceCommand cmd) {
|
||||||
Connection conn = getConnection();
|
Connection conn = getConnection();
|
||||||
try {
|
try {
|
||||||
String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHostGuid(), "interval", Integer.toString(_heartbeatInterval * 2));
|
if (check_heartbeat(cmd.getHostGuid())) {
|
||||||
if (!result.contains("> DEAD <")) {
|
|
||||||
s_logger.debug("Heart beat is still going so unable to fence");
|
s_logger.debug("Heart beat is still going so unable to fence");
|
||||||
return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence");
|
return new FenceAnswer(cmd, false, "Heartbeat is still going on unable to fence");
|
||||||
}
|
}
|
||||||
|
|
||||||
Set<VM> vms = VM.getByNameLabel(conn, cmd.getVmName());
|
Set<VM> vms = VM.getByNameLabel(conn, cmd.getVmName());
|
||||||
for (VM vm : vms) {
|
for (VM vm : vms) {
|
||||||
synchronized (_cluster.intern()) {
|
synchronized (_cluster.intern()) {
|
||||||
@ -236,6 +259,7 @@ public class XenServer56Resource extends CitrixResourceBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean transferManagementNetwork(Connection conn, Host host, PIF src, PIF.Record spr, PIF dest) throws XmlRpcException, XenAPIException {
|
protected boolean transferManagementNetwork(Connection conn, Host host, PIF src, PIF.Record spr, PIF dest) throws XmlRpcException, XenAPIException {
|
||||||
dest.reconfigureIp(conn, spr.ipConfigurationMode, spr.IP, spr.netmask, spr.gateway, spr.DNS);
|
dest.reconfigureIp(conn, spr.ipConfigurationMode, spr.IP, spr.netmask, spr.gateway, spr.DNS);
|
||||||
@ -269,33 +293,29 @@ public class XenServer56Resource extends CitrixResourceBase {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public StartupCommand[] initialize() {
|
public StartupCommand[] initialize() {
|
||||||
pingXenServer();
|
pingXAPI();
|
||||||
StartupCommand[] cmds = super.initialize();
|
StartupCommand[] cmds = super.initialize();
|
||||||
return cmds;
|
return cmds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) {
|
protected CheckOnHostAnswer execute(CheckOnHostCommand cmd) {
|
||||||
try {
|
Boolean alive = check_heartbeat(cmd.getHost().getGuid());
|
||||||
Connection conn = getConnection();
|
String msg = "";
|
||||||
String result = callHostPluginPremium(conn, "check_heartbeat", "host", cmd.getHost().getGuid(), "interval", Integer.toString(_heartbeatInterval * 2));
|
if (alive == null) {
|
||||||
if (result == null) {
|
msg = " cannot determine ";
|
||||||
return new CheckOnHostAnswer(cmd, "Unable to call plugin");
|
} else if ( alive == true) {
|
||||||
}
|
msg = "Heart beat is still going";
|
||||||
if (result.contains("> DEAD <")) {
|
} else {
|
||||||
s_logger.debug("Heart beat is gone so dead.");
|
msg = "Heart beat is gone so dead.";
|
||||||
return new CheckOnHostAnswer(cmd, false, "Heart Beat is done");
|
|
||||||
} else if (result.contains("> ALIVE <")) {
|
|
||||||
s_logger.debug("Heart beat is still going");
|
|
||||||
return new CheckOnHostAnswer(cmd, true, "Heartbeat is still going");
|
|
||||||
}
|
|
||||||
return new CheckOnHostAnswer(cmd, null, "Unable to determine");
|
|
||||||
} catch (Exception e) {
|
|
||||||
s_logger.warn("Unable to fence", e);
|
|
||||||
return new CheckOnHostAnswer(cmd, e.getMessage());
|
|
||||||
}
|
}
|
||||||
|
s_logger.debug(msg);
|
||||||
|
return new CheckOnHostAnswer(cmd, alive, msg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public XenServer56Resource() {
|
public XenServer56Resource() {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -72,3 +72,4 @@ do
|
|||||||
done
|
done
|
||||||
|
|
||||||
echo "=====> DEAD <======"
|
echo "=====> DEAD <======"
|
||||||
|
exit 1
|
||||||
|
|||||||
@ -123,18 +123,7 @@ def setup_heartbeat_file(session, args):
|
|||||||
txt = ''
|
txt = ''
|
||||||
return txt
|
return txt
|
||||||
|
|
||||||
@echo
|
|
||||||
def check_heartbeat(session, args):
|
|
||||||
host = args['host']
|
|
||||||
interval = args['interval']
|
|
||||||
try:
|
|
||||||
cmd = ["bash", "/opt/cloud/bin/check_heartbeat.sh", host, interval]
|
|
||||||
txt = util.pread2(cmd)
|
|
||||||
except:
|
|
||||||
txt=''
|
|
||||||
return txt
|
|
||||||
|
|
||||||
|
|
||||||
@echo
|
@echo
|
||||||
def heartbeat(session, args):
|
def heartbeat(session, args):
|
||||||
host = args['host']
|
host = args['host']
|
||||||
@ -156,5 +145,4 @@ def asmonitor(session, args):
|
|||||||
return 'fail'
|
return 'fail'
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
XenAPIPlugin.dispatch({"forceShutdownVM":forceShutdownVM, "upgrade_snapshot":upgrade_snapshot, "create_privatetemplate_from_snapshot":create_privatetemplate_from_snapshot, "copy_vhd_to_secondarystorage":copy_vhd_to_secondarystorage, "copy_vhd_from_secondarystorage":copy_vhd_from_secondarystorage, "setup_heartbeat_sr":setup_heartbeat_sr, "setup_heartbeat_file":setup_heartbeat_file, "check_heartbeat":check_heartbeat, "heartbeat": heartbeat, "asmonitor": asmonitor})
|
XenAPIPlugin.dispatch({"forceShutdownVM":forceShutdownVM, "upgrade_snapshot":upgrade_snapshot, "create_privatetemplate_from_snapshot":create_privatetemplate_from_snapshot, "copy_vhd_to_secondarystorage":copy_vhd_to_secondarystorage, "copy_vhd_from_secondarystorage":copy_vhd_from_secondarystorage, "setup_heartbeat_sr":setup_heartbeat_sr, "setup_heartbeat_file":setup_heartbeat_file, "heartbeat": heartbeat, "asmonitor": asmonitor})
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user