CLOUDSTACK-9397: Add Watchdog timer to KVM Instance (#1707)

The watchdog timer adds functionality where the Hypervisor can detect if an
instance has crashed or stopped functioning.
The watchdog timer adds functionality where the Hypervisor can detect if an
instance has crashed or stopped functioning.

When the Instance has the 'watchdog' daemon running it will send heartbeats
to the /dev/watchdog device.

If these heartbeats are no longer received by the HV it will reset the Instance.

If the Instance never sends the heartbeats the HV does not take action. It only
takes action if it stops sending heartbeats.

This is supported since Libvirt 0.7.3 and can be defined in the XML format as
described in the docs: https://libvirt.org/formatdomain.html#elementsWatchdog

To the 'devices' section this will be added:

In the agent.properties the action to be taken can be defined:

vm.watchdog.action=reset

The same goes for the model. The Intel i6300esb is however the most commonly used.

vm.watchdog.model=i6300esb

When the Instance has the 'watchdog' daemon running it will send heartbeats
to the /dev/watchdog device.

If these heartbeats are no longer received by the HV it will reset the Instance.

If the Instance never sends the heartbeats the HV does not take action. It only
takes action if it stops sending heartbeats.

This is supported since Libvirt 0.7.3 and can be defined in the XML format as
described in the docs: https://libvirt.org/formatdomain.html#elementsWatchdog

To the 'devices' section this will be added:

  <watchdog model='i6300esb' action='reset'/>

In the agent.properties the action to be taken can be defined:

  vm.watchdog.action=reset

The same goes for the model. The Intel i6300esb is however the most commonly used.

  vm.watchdog.model=i6300esb

Signed-off-by: Wido den Hollander <wido@widodh.nl>
This commit is contained in:
Wido den Hollander 2017-09-28 10:26:15 +02:00 committed by Rohit Yadav
parent e1cff7d435
commit b130e55088
7 changed files with 137 additions and 0 deletions

View File

@ -180,3 +180,15 @@ hypervisor.type=kvm
# router.aggregation.command.each.timeout=600
# timeout value for aggregation commands send to virtual router
#
#
# vm.watchdog.model=i6300esb
# The model of Watchdog timer to present to the Guest
# For all models refer to the libvirt documentation.
# Recommend value is: i6300esb
#
# vm.watchdog.action=none
# Action to take when the Guest/Instance is no longer notifiying the Watchdog
# timer.
# For all actions refer to the libvirt documentation.
# Recommended values are: none, reset and poweroff.

View File

@ -133,6 +133,9 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.TermPolicy;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.VideoDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef.RngBackendModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogAction;
import com.cloud.hypervisor.kvm.resource.wrapper.LibvirtRequestWrapper;
import com.cloud.hypervisor.kvm.resource.wrapper.LibvirtUtilitiesHelper;
import com.cloud.hypervisor.kvm.storage.KVMPhysicalDisk;
@ -274,6 +277,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
private File _qemuSocketsPath;
private final String _qemuGuestAgentSocketName = "org.qemu.guest_agent.0";
private long _totalMemory;
protected WatchDogAction _watchDogAction = WatchDogAction.NONE;
protected WatchDogModel _watchDogModel = WatchDogModel.I6300ESB;
private final Map <String, String> _pifs = new HashMap<String, String>();
private final Map<String, VmStats> _vmStats = new ConcurrentHashMap<String, VmStats>();
@ -870,6 +875,16 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
_rngRatePeriod = NumbersUtil.parseInt(value, new Integer(_rngRatePeriod));
}
value = (String) params.get("vm.watchdog.model");
if (!Strings.isNullOrEmpty(value)) {
_watchDogModel = WatchDogModel.valueOf(value.toUpperCase());
}
value = (String) params.get("vm.watchdog.action");
if (!Strings.isNullOrEmpty(value)) {
_watchDogAction = WatchDogAction.valueOf(value.toUpperCase());
}
LibvirtConnection.initialize(_hypervisorURI);
Connect conn = null;
try {
@ -2066,6 +2081,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
devices.addDevice(new ChannelDef(_qemuGuestAgentSocketName, ChannelDef.ChannelType.UNIX,
new File(_qemuSocketsPath + "/" + vmTO.getName() + "." + _qemuGuestAgentSocketName)));
devices.addDevice(new WatchDogDef(_watchDogAction, _watchDogModel));
final VideoDef videoCard = new VideoDef(_videoHw, _videoRam);
devices.addDevice(videoCard);

View File

@ -43,6 +43,9 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef.NicModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef.RngBackendModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogModel;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogAction;
public class LibvirtDomainXMLParser {
private static final Logger s_logger = Logger.getLogger(LibvirtDomainXMLParser.class);
@ -50,6 +53,7 @@ public class LibvirtDomainXMLParser {
private final List<DiskDef> diskDefs = new ArrayList<DiskDef>();
private final List<RngDef> rngDefs = new ArrayList<RngDef>();
private final List<ChannelDef> channels = new ArrayList<ChannelDef>();
private final List<WatchDogDef> watchDogDefs = new ArrayList<WatchDogDef>();
private Integer vncPort;
private String desc;
@ -237,6 +241,27 @@ public class LibvirtDomainXMLParser {
rngDefs.add(def);
}
NodeList watchDogs = devices.getElementsByTagName("watchdog");
for (int i = 0; i < watchDogs.getLength(); i++) {
WatchDogDef def = null;
Element watchDog = (Element)watchDogs.item(i);
String action = watchDog.getAttribute("action");
String model = watchDog.getAttribute("model");
if (Strings.isNullOrEmpty(model)) {
continue;
}
if (Strings.isNullOrEmpty(action)) {
def = new WatchDogDef(WatchDogModel.valueOf(model.toUpperCase()));
} else {
def = new WatchDogDef(WatchDogAction.valueOf(action.toUpperCase()),
WatchDogModel.valueOf(model.toUpperCase()));
}
watchDogDefs.add(def);
}
return true;
} catch (ParserConfigurationException e) {
s_logger.debug(e.toString());
@ -290,6 +315,10 @@ public class LibvirtDomainXMLParser {
return Collections.unmodifiableList(channels);
}
public List<WatchDogDef> getWatchDogs() {
return watchDogDefs;
}
public String getDescription() {
return desc;
}

View File

@ -1612,6 +1612,67 @@ public class LibvirtVMDef {
}
}
public static class WatchDogDef {
enum WatchDogModel {
I6300ESB("i6300esb"), IB700("ib700"), DIAG288("diag288");
String model;
WatchDogModel(String model) {
this.model = model;
}
@Override
public String toString() {
return model;
}
}
enum WatchDogAction {
RESET("reset"), SHUTDOWN("shutdown"), POWEROFF("poweroff"), PAUSE("pause"), NONE("none"), DUMP("dump");
String action;
WatchDogAction(String action) {
this.action = action;
}
@Override
public String toString() {
return action;
}
}
WatchDogModel model = WatchDogModel.I6300ESB;
WatchDogAction action = WatchDogAction.NONE;
public WatchDogDef(WatchDogAction action) {
this.action = action;
}
public WatchDogDef(WatchDogModel model) {
this.model = model;
}
public WatchDogDef(WatchDogAction action, WatchDogModel model) {
this.action = action;
this.model = model;
}
public WatchDogAction getAction() {
return action;
}
public WatchDogModel getModel() {
return model;
}
@Override
public String toString() {
StringBuilder wacthDogBuilder = new StringBuilder();
wacthDogBuilder.append("<watchdog model='" + model + "' action='" + action + "'/>\n");
return wacthDogBuilder.toString();
}
}
public void setHvsType(String hvs) {
_hvsType = hvs;
}

View File

@ -366,6 +366,9 @@ public class LibvirtComputingResourceTest {
assertXpath(domainDoc, "/domain/on_reboot/text()", "restart");
assertXpath(domainDoc, "/domain/on_poweroff/text()", "destroy");
assertXpath(domainDoc, "/domain/on_crash/text()", "destroy");
assertXpath(domainDoc, "/domain/devices/watchdog/@model", "i6300esb");
assertXpath(domainDoc, "/domain/devices/watchdog/@action", "none");
}
static Document parse(final String input) {

View File

@ -27,6 +27,7 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.DiskDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.ChannelDef;
import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef;
public class LibvirtDomainXMLParserTest extends TestCase {
@ -185,6 +186,7 @@ public class LibvirtDomainXMLParserTest extends TestCase {
"<alias name='channel0'/>" +
"<address type='virtio-serial' controller='0' bus='0' port='1'/>" +
"</channel>" +
"<watchdog model='i6300esb' action='reset'/>" +
"</devices>" +
"<seclabel type='none'/>" +
"</domain>";
@ -232,5 +234,9 @@ public class LibvirtDomainXMLParserTest extends TestCase {
assertEquals(RngDef.RngBackendModel.RANDOM, rngs.get(0).getRngBackendModel());
assertEquals(4096, rngs.get(0).getRngRateBytes());
assertEquals(5000, rngs.get(0).getRngRatePeriod());
List<WatchDogDef> watchDogs = parser.getWatchDogs();
assertEquals(WatchDogDef.WatchDogModel.I6300ESB, watchDogs.get(0).getModel());
assertEquals(WatchDogDef.WatchDogAction.RESET, watchDogs.get(0).getAction());
}
}

View File

@ -149,4 +149,13 @@ public class LibvirtVMDefTest extends TestCase {
assertEquals(path, channelDef.getPath());
}
public void testWatchDogDef() {
LibvirtVMDef.WatchDogDef.WatchDogModel model = LibvirtVMDef.WatchDogDef.WatchDogModel.I6300ESB;
LibvirtVMDef.WatchDogDef.WatchDogAction action = LibvirtVMDef.WatchDogDef.WatchDogAction.RESET;
LibvirtVMDef.WatchDogDef def = new LibvirtVMDef.WatchDogDef(action, model);
assertEquals(def.getModel(), model);
assertEquals(def.getAction(), action);
}
}