diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java index 723335b97f7..5ceaef2bb19 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java @@ -16,9 +16,9 @@ // under the License. package com.cloud.hypervisor.kvm.resource; -import java.util.ArrayList; import java.util.List; import java.util.concurrent.Callable; +import java.util.stream.Collectors; import org.apache.log4j.Logger; @@ -27,13 +27,13 @@ import com.cloud.utils.script.Script; public class KVMHAChecker extends KVMHABase implements Callable { private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class); - private List _pools; - private String _hostIP; - private long _heartBeatCheckerTimeout = 360000; /* 6 minutes */ + private List nfsStoragePools; + private String hostIp; + private long heartBeatCheckerTimeout = 360000; // 6 minutes public KVMHAChecker(List pools, String host) { - this._pools = pools; - this._hostIP = host; + this.nfsStoragePools = pools; + this.hostIp = host; } /* @@ -42,35 +42,40 @@ public class KVMHAChecker extends KVMHABase implements Callable { */ @Override public Boolean checkingHeartBeat() { - List results = new ArrayList(); - for (NfsStoragePool pool : _pools) { - Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger); + boolean validResult = false; + + String hostAndPools = String.format("host IP [%s] in pools [%s]", hostIp, nfsStoragePools.stream().map(pool -> pool._poolIp).collect(Collectors.joining(", "))); + + s_logger.debug(String.format("Checking heart beat with KVMHAChecker for %s", hostAndPools)); + + for (NfsStoragePool pool : nfsStoragePools) { + Script cmd = new Script(s_heartBeatPath, heartBeatCheckerTimeout, s_logger); cmd.add("-i", pool._poolIp); cmd.add("-p", pool._poolMountSourcePath); cmd.add("-m", pool._mountDestPath); - cmd.add("-h", _hostIP); + cmd.add("-h", hostIp); cmd.add("-r"); cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000)); OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); String result = cmd.execute(parser); - s_logger.debug("KVMHAChecker pool: " + pool._poolIp); - s_logger.debug("KVMHAChecker result: " + result); - s_logger.debug("KVMHAChecker parser: " + parser.getLine()); - if (result == null && parser.getLine().contains("> DEAD <")) { - s_logger.debug("read heartbeat failed: "); - results.add(false); + String parsedLine = parser.getLine(); + + s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine, + pool._poolIp)); + + if (result == null && parsedLine.contains("DEAD")) { + s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(), + result, parsedLine, hostIp)); } else { - results.add(true); + validResult = true; } } - for (Boolean r : results) { - if (r) { - return true; - } + if (!validResult) { + s_logger.warn(String.format("All checks with KVMHAChecker for %s considered it as dead. It may cause a shutdown of the host.", hostAndPools)); } - return false; + return validResult; } @Override diff --git a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh index df2e54db85a..a931d94aaf2 100755 --- a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh +++ b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh @@ -138,7 +138,7 @@ check_hbLog() { diff=`expr $now - $hb` if [ $diff -gt $interval ] then - return 1 + return $diff fi return 0 } @@ -146,11 +146,12 @@ check_hbLog() { if [ "$rflag" == "1" ] then check_hbLog - if [ $? == 0 ] + diff=$? + if [ $diff == 0 ] then echo "=====> ALIVE <=====" else - echo "=====> DEAD <======" + echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======" fi exit 0 elif [ "$cflag" == "1" ]