Improve HA logs (#5241)

Co-authored-by: GutoVeronezi <daniel@scclouds.com.br>
This commit is contained in:
Daniel Augusto Veronezi Salvador 2021-07-30 16:13:16 -03:00 committed by GitHub
parent 0d8b4de1b2
commit 82df04ecc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 25 deletions

View File

@ -16,9 +16,9 @@
// under the License.
package com.cloud.hypervisor.kvm.resource;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
import org.apache.log4j.Logger;
@ -27,13 +27,13 @@ import com.cloud.utils.script.Script;
public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class);
private List<NfsStoragePool> _pools;
private String _hostIP;
private long _heartBeatCheckerTimeout = 360000; /* 6 minutes */
private List<NfsStoragePool> nfsStoragePools;
private String hostIp;
private long heartBeatCheckerTimeout = 360000; // 6 minutes
public KVMHAChecker(List<NfsStoragePool> pools, String host) {
this._pools = pools;
this._hostIP = host;
this.nfsStoragePools = pools;
this.hostIp = host;
}
/*
@ -42,35 +42,40 @@ public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
*/
@Override
public Boolean checkingHeartBeat() {
List<Boolean> results = new ArrayList<Boolean>();
for (NfsStoragePool pool : _pools) {
Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger);
boolean validResult = false;
String hostAndPools = String.format("host IP [%s] in pools [%s]", hostIp, nfsStoragePools.stream().map(pool -> pool._poolIp).collect(Collectors.joining(", ")));
s_logger.debug(String.format("Checking heart beat with KVMHAChecker for %s", hostAndPools));
for (NfsStoragePool pool : nfsStoragePools) {
Script cmd = new Script(s_heartBeatPath, heartBeatCheckerTimeout, s_logger);
cmd.add("-i", pool._poolIp);
cmd.add("-p", pool._poolMountSourcePath);
cmd.add("-m", pool._mountDestPath);
cmd.add("-h", _hostIP);
cmd.add("-h", hostIp);
cmd.add("-r");
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
String result = cmd.execute(parser);
s_logger.debug("KVMHAChecker pool: " + pool._poolIp);
s_logger.debug("KVMHAChecker result: " + result);
s_logger.debug("KVMHAChecker parser: " + parser.getLine());
if (result == null && parser.getLine().contains("> DEAD <")) {
s_logger.debug("read heartbeat failed: ");
results.add(false);
String parsedLine = parser.getLine();
s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine,
pool._poolIp));
if (result == null && parsedLine.contains("DEAD")) {
s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(),
result, parsedLine, hostIp));
} else {
results.add(true);
validResult = true;
}
}
for (Boolean r : results) {
if (r) {
return true;
}
if (!validResult) {
s_logger.warn(String.format("All checks with KVMHAChecker for %s considered it as dead. It may cause a shutdown of the host.", hostAndPools));
}
return false;
return validResult;
}
@Override

View File

@ -138,7 +138,7 @@ check_hbLog() {
diff=`expr $now - $hb`
if [ $diff -gt $interval ]
then
return 1
return $diff
fi
return 0
}
@ -146,11 +146,12 @@ check_hbLog() {
if [ "$rflag" == "1" ]
then
check_hbLog
if [ $? == 0 ]
diff=$?
if [ $diff == 0 ]
then
echo "=====> ALIVE <====="
else
echo "=====> DEAD <======"
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
fi
exit 0
elif [ "$cflag" == "1" ]