mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Improve HA logs (#5241)
Co-authored-by: GutoVeronezi <daniel@scclouds.com.br>
This commit is contained in:
parent
0d8b4de1b2
commit
82df04ecc8
@ -16,9 +16,9 @@
|
||||
// under the License.
|
||||
package com.cloud.hypervisor.kvm.resource;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
@ -27,13 +27,13 @@ import com.cloud.utils.script.Script;
|
||||
|
||||
public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
|
||||
private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class);
|
||||
private List<NfsStoragePool> _pools;
|
||||
private String _hostIP;
|
||||
private long _heartBeatCheckerTimeout = 360000; /* 6 minutes */
|
||||
private List<NfsStoragePool> nfsStoragePools;
|
||||
private String hostIp;
|
||||
private long heartBeatCheckerTimeout = 360000; // 6 minutes
|
||||
|
||||
public KVMHAChecker(List<NfsStoragePool> pools, String host) {
|
||||
this._pools = pools;
|
||||
this._hostIP = host;
|
||||
this.nfsStoragePools = pools;
|
||||
this.hostIp = host;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -42,35 +42,40 @@ public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
|
||||
*/
|
||||
@Override
|
||||
public Boolean checkingHeartBeat() {
|
||||
List<Boolean> results = new ArrayList<Boolean>();
|
||||
for (NfsStoragePool pool : _pools) {
|
||||
Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger);
|
||||
boolean validResult = false;
|
||||
|
||||
String hostAndPools = String.format("host IP [%s] in pools [%s]", hostIp, nfsStoragePools.stream().map(pool -> pool._poolIp).collect(Collectors.joining(", ")));
|
||||
|
||||
s_logger.debug(String.format("Checking heart beat with KVMHAChecker for %s", hostAndPools));
|
||||
|
||||
for (NfsStoragePool pool : nfsStoragePools) {
|
||||
Script cmd = new Script(s_heartBeatPath, heartBeatCheckerTimeout, s_logger);
|
||||
cmd.add("-i", pool._poolIp);
|
||||
cmd.add("-p", pool._poolMountSourcePath);
|
||||
cmd.add("-m", pool._mountDestPath);
|
||||
cmd.add("-h", _hostIP);
|
||||
cmd.add("-h", hostIp);
|
||||
cmd.add("-r");
|
||||
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
|
||||
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
|
||||
String result = cmd.execute(parser);
|
||||
s_logger.debug("KVMHAChecker pool: " + pool._poolIp);
|
||||
s_logger.debug("KVMHAChecker result: " + result);
|
||||
s_logger.debug("KVMHAChecker parser: " + parser.getLine());
|
||||
if (result == null && parser.getLine().contains("> DEAD <")) {
|
||||
s_logger.debug("read heartbeat failed: ");
|
||||
results.add(false);
|
||||
String parsedLine = parser.getLine();
|
||||
|
||||
s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine,
|
||||
pool._poolIp));
|
||||
|
||||
if (result == null && parsedLine.contains("DEAD")) {
|
||||
s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(),
|
||||
result, parsedLine, hostIp));
|
||||
} else {
|
||||
results.add(true);
|
||||
validResult = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (Boolean r : results) {
|
||||
if (r) {
|
||||
return true;
|
||||
}
|
||||
if (!validResult) {
|
||||
s_logger.warn(String.format("All checks with KVMHAChecker for %s considered it as dead. It may cause a shutdown of the host.", hostAndPools));
|
||||
}
|
||||
|
||||
return false;
|
||||
return validResult;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -138,7 +138,7 @@ check_hbLog() {
|
||||
diff=`expr $now - $hb`
|
||||
if [ $diff -gt $interval ]
|
||||
then
|
||||
return 1
|
||||
return $diff
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
@ -146,11 +146,12 @@ check_hbLog() {
|
||||
if [ "$rflag" == "1" ]
|
||||
then
|
||||
check_hbLog
|
||||
if [ $? == 0 ]
|
||||
diff=$?
|
||||
if [ $diff == 0 ]
|
||||
then
|
||||
echo "=====> ALIVE <====="
|
||||
else
|
||||
echo "=====> DEAD <======"
|
||||
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
|
||||
fi
|
||||
exit 0
|
||||
elif [ "$cflag" == "1" ]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user