mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-10310 Fix KVM reboot on storage issue (#2722)
This commit is contained in:
parent
9b772db0f1
commit
023dcec5ef
@ -34,7 +34,8 @@ public class KVMHABase {
|
||||
protected static String s_heartBeatPath;
|
||||
protected long _heartBeatUpdateTimeout = 60000;
|
||||
protected long _heartBeatUpdateFreq = 60000;
|
||||
protected long _heartBeatUpdateMaxRetry = 3;
|
||||
protected long _heartBeatUpdateMaxTries = 5;
|
||||
protected long _heartBeatUpdateRetrySleep = 15000;
|
||||
|
||||
public static enum PoolType {
|
||||
PrimaryStorage, SecondaryStorage
|
||||
|
||||
@ -119,7 +119,8 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
|
||||
}
|
||||
|
||||
String result = null;
|
||||
for (int i = 0; i < 5; i++) {
|
||||
// Try multiple times, but sleep in between tries to ensure it isn't a short lived transient error
|
||||
for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
|
||||
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
|
||||
cmd.add("-i", primaryStoragePool._poolIp);
|
||||
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
|
||||
@ -127,14 +128,21 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
|
||||
cmd.add("-h", _hostIP);
|
||||
result = cmd.execute();
|
||||
if (result != null) {
|
||||
s_logger.warn("write heartbeat failed: " + result + ", retry: " + i);
|
||||
s_logger.warn("write heartbeat failed: " + result + ", try: " + i + " of " + _heartBeatUpdateMaxTries);
|
||||
try {
|
||||
Thread.sleep(_heartBeatUpdateRetrySleep);
|
||||
} catch (InterruptedException e) {
|
||||
s_logger.debug("[ignored] interupted between heartbeat retries.");
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result != null) {
|
||||
s_logger.warn("write heartbeat failed: " + result + "; reboot the host");
|
||||
// Stop cloudstack-agent if can't write to heartbeat file.
|
||||
// This will raise an alert on the mgmt server
|
||||
s_logger.warn("write heartbeat failed: " + result + "; stopping cloudstack-agent");
|
||||
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
|
||||
cmd.add("-i", primaryStoragePool._poolIp);
|
||||
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
|
||||
|
||||
@ -155,10 +155,10 @@ then
|
||||
exit 0
|
||||
elif [ "$cflag" == "1" ]
|
||||
then
|
||||
/usr/bin/logger -t heartbeat "kvmheartbeat.sh rebooted system because it was unable to write the heartbeat to the storage."
|
||||
/usr/bin/logger -t heartbeat "kvmheartbeat.sh stopped cloudstack-agent because it was unable to write the heartbeat to the storage."
|
||||
sync &
|
||||
sleep 5
|
||||
echo b > /proc/sysrq-trigger
|
||||
service cloudstack-agent stop
|
||||
exit $?
|
||||
else
|
||||
write_hbLog
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user