mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Improve HA logs (#5241)
Co-authored-by: GutoVeronezi <daniel@scclouds.com.br>
This commit is contained in:
parent
0d8b4de1b2
commit
82df04ecc8
@ -16,9 +16,9 @@
|
|||||||
// under the License.
|
// under the License.
|
||||||
package com.cloud.hypervisor.kvm.resource;
|
package com.cloud.hypervisor.kvm.resource;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
@ -27,13 +27,13 @@ import com.cloud.utils.script.Script;
|
|||||||
|
|
||||||
public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
|
public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
|
||||||
private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class);
|
private static final Logger s_logger = Logger.getLogger(KVMHAChecker.class);
|
||||||
private List<NfsStoragePool> _pools;
|
private List<NfsStoragePool> nfsStoragePools;
|
||||||
private String _hostIP;
|
private String hostIp;
|
||||||
private long _heartBeatCheckerTimeout = 360000; /* 6 minutes */
|
private long heartBeatCheckerTimeout = 360000; // 6 minutes
|
||||||
|
|
||||||
public KVMHAChecker(List<NfsStoragePool> pools, String host) {
|
public KVMHAChecker(List<NfsStoragePool> pools, String host) {
|
||||||
this._pools = pools;
|
this.nfsStoragePools = pools;
|
||||||
this._hostIP = host;
|
this.hostIp = host;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -42,35 +42,40 @@ public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Boolean checkingHeartBeat() {
|
public Boolean checkingHeartBeat() {
|
||||||
List<Boolean> results = new ArrayList<Boolean>();
|
boolean validResult = false;
|
||||||
for (NfsStoragePool pool : _pools) {
|
|
||||||
Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger);
|
String hostAndPools = String.format("host IP [%s] in pools [%s]", hostIp, nfsStoragePools.stream().map(pool -> pool._poolIp).collect(Collectors.joining(", ")));
|
||||||
|
|
||||||
|
s_logger.debug(String.format("Checking heart beat with KVMHAChecker for %s", hostAndPools));
|
||||||
|
|
||||||
|
for (NfsStoragePool pool : nfsStoragePools) {
|
||||||
|
Script cmd = new Script(s_heartBeatPath, heartBeatCheckerTimeout, s_logger);
|
||||||
cmd.add("-i", pool._poolIp);
|
cmd.add("-i", pool._poolIp);
|
||||||
cmd.add("-p", pool._poolMountSourcePath);
|
cmd.add("-p", pool._poolMountSourcePath);
|
||||||
cmd.add("-m", pool._mountDestPath);
|
cmd.add("-m", pool._mountDestPath);
|
||||||
cmd.add("-h", _hostIP);
|
cmd.add("-h", hostIp);
|
||||||
cmd.add("-r");
|
cmd.add("-r");
|
||||||
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
|
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
|
||||||
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
|
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
|
||||||
String result = cmd.execute(parser);
|
String result = cmd.execute(parser);
|
||||||
s_logger.debug("KVMHAChecker pool: " + pool._poolIp);
|
String parsedLine = parser.getLine();
|
||||||
s_logger.debug("KVMHAChecker result: " + result);
|
|
||||||
s_logger.debug("KVMHAChecker parser: " + parser.getLine());
|
s_logger.debug(String.format("Checking heart beat with KVMHAChecker [{command=\"%s\", result: \"%s\", log: \"%s\", pool: \"%s\"}].", cmd.toString(), result, parsedLine,
|
||||||
if (result == null && parser.getLine().contains("> DEAD <")) {
|
pool._poolIp));
|
||||||
s_logger.debug("read heartbeat failed: ");
|
|
||||||
results.add(false);
|
if (result == null && parsedLine.contains("DEAD")) {
|
||||||
|
s_logger.warn(String.format("Checking heart beat with KVMHAChecker command [%s] returned [%s]. [%s]. It may cause a shutdown of host IP [%s].", cmd.toString(),
|
||||||
|
result, parsedLine, hostIp));
|
||||||
} else {
|
} else {
|
||||||
results.add(true);
|
validResult = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Boolean r : results) {
|
if (!validResult) {
|
||||||
if (r) {
|
s_logger.warn(String.format("All checks with KVMHAChecker for %s considered it as dead. It may cause a shutdown of the host.", hostAndPools));
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return validResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@ -138,7 +138,7 @@ check_hbLog() {
|
|||||||
diff=`expr $now - $hb`
|
diff=`expr $now - $hb`
|
||||||
if [ $diff -gt $interval ]
|
if [ $diff -gt $interval ]
|
||||||
then
|
then
|
||||||
return 1
|
return $diff
|
||||||
fi
|
fi
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
@ -146,11 +146,12 @@ check_hbLog() {
|
|||||||
if [ "$rflag" == "1" ]
|
if [ "$rflag" == "1" ]
|
||||||
then
|
then
|
||||||
check_hbLog
|
check_hbLog
|
||||||
if [ $? == 0 ]
|
diff=$?
|
||||||
|
if [ $diff == 0 ]
|
||||||
then
|
then
|
||||||
echo "=====> ALIVE <====="
|
echo "=====> ALIVE <====="
|
||||||
else
|
else
|
||||||
echo "=====> DEAD <======"
|
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
|
||||||
fi
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
elif [ "$cflag" == "1" ]
|
elif [ "$cflag" == "1" ]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user