From b130e55088ceb392d2c6ff1533c335882be1c9b5 Mon Sep 17 00:00:00 2001 From: Wido den Hollander Date: Thu, 28 Sep 2017 10:26:15 +0200 Subject: [PATCH] CLOUDSTACK-9397: Add Watchdog timer to KVM Instance (#1707) The watchdog timer adds functionality where the Hypervisor can detect if an instance has crashed or stopped functioning. The watchdog timer adds functionality where the Hypervisor can detect if an instance has crashed or stopped functioning. When the Instance has the 'watchdog' daemon running it will send heartbeats to the /dev/watchdog device. If these heartbeats are no longer received by the HV it will reset the Instance. If the Instance never sends the heartbeats the HV does not take action. It only takes action if it stops sending heartbeats. This is supported since Libvirt 0.7.3 and can be defined in the XML format as described in the docs: https://libvirt.org/formatdomain.html#elementsWatchdog To the 'devices' section this will be added: In the agent.properties the action to be taken can be defined: vm.watchdog.action=reset The same goes for the model. The Intel i6300esb is however the most commonly used. vm.watchdog.model=i6300esb When the Instance has the 'watchdog' daemon running it will send heartbeats to the /dev/watchdog device. If these heartbeats are no longer received by the HV it will reset the Instance. If the Instance never sends the heartbeats the HV does not take action. It only takes action if it stops sending heartbeats. This is supported since Libvirt 0.7.3 and can be defined in the XML format as described in the docs: https://libvirt.org/formatdomain.html#elementsWatchdog To the 'devices' section this will be added: In the agent.properties the action to be taken can be defined: vm.watchdog.action=reset The same goes for the model. The Intel i6300esb is however the most commonly used. vm.watchdog.model=i6300esb Signed-off-by: Wido den Hollander --- agent/conf/agent.properties | 12 ++++ .../resource/LibvirtComputingResource.java | 17 ++++++ .../kvm/resource/LibvirtDomainXMLParser.java | 29 +++++++++ .../hypervisor/kvm/resource/LibvirtVMDef.java | 61 +++++++++++++++++++ .../LibvirtComputingResourceTest.java | 3 + .../resource/LibvirtDomainXMLParserTest.java | 6 ++ .../kvm/resource/LibvirtVMDefTest.java | 9 +++ 7 files changed, 137 insertions(+) diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index 3ed382abb92..22741b51efc 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -180,3 +180,15 @@ hypervisor.type=kvm # router.aggregation.command.each.timeout=600 # timeout value for aggregation commands send to virtual router # + +# +# vm.watchdog.model=i6300esb +# The model of Watchdog timer to present to the Guest +# For all models refer to the libvirt documentation. +# Recommend value is: i6300esb +# +# vm.watchdog.action=none +# Action to take when the Guest/Instance is no longer notifiying the Watchdog +# timer. +# For all actions refer to the libvirt documentation. +# Recommended values are: none, reset and poweroff. diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 4b5811b5b92..104d435c948 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -133,6 +133,9 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.TermPolicy; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.VideoDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef.RngBackendModel; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogModel; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogAction; import com.cloud.hypervisor.kvm.resource.wrapper.LibvirtRequestWrapper; import com.cloud.hypervisor.kvm.resource.wrapper.LibvirtUtilitiesHelper; import com.cloud.hypervisor.kvm.storage.KVMPhysicalDisk; @@ -274,6 +277,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv private File _qemuSocketsPath; private final String _qemuGuestAgentSocketName = "org.qemu.guest_agent.0"; private long _totalMemory; + protected WatchDogAction _watchDogAction = WatchDogAction.NONE; + protected WatchDogModel _watchDogModel = WatchDogModel.I6300ESB; private final Map _pifs = new HashMap(); private final Map _vmStats = new ConcurrentHashMap(); @@ -870,6 +875,16 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv _rngRatePeriod = NumbersUtil.parseInt(value, new Integer(_rngRatePeriod)); } + value = (String) params.get("vm.watchdog.model"); + if (!Strings.isNullOrEmpty(value)) { + _watchDogModel = WatchDogModel.valueOf(value.toUpperCase()); + } + + value = (String) params.get("vm.watchdog.action"); + if (!Strings.isNullOrEmpty(value)) { + _watchDogAction = WatchDogAction.valueOf(value.toUpperCase()); + } + LibvirtConnection.initialize(_hypervisorURI); Connect conn = null; try { @@ -2066,6 +2081,8 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv devices.addDevice(new ChannelDef(_qemuGuestAgentSocketName, ChannelDef.ChannelType.UNIX, new File(_qemuSocketsPath + "/" + vmTO.getName() + "." + _qemuGuestAgentSocketName))); + devices.addDevice(new WatchDogDef(_watchDogAction, _watchDogModel)); + final VideoDef videoCard = new VideoDef(_videoHw, _videoRam); devices.addDevice(videoCard); diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParser.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParser.java index 3c4dc11ec9c..847d77553f4 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParser.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParser.java @@ -43,6 +43,9 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef.NicModel; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef.RngBackendModel; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogModel; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef.WatchDogAction; public class LibvirtDomainXMLParser { private static final Logger s_logger = Logger.getLogger(LibvirtDomainXMLParser.class); @@ -50,6 +53,7 @@ public class LibvirtDomainXMLParser { private final List diskDefs = new ArrayList(); private final List rngDefs = new ArrayList(); private final List channels = new ArrayList(); + private final List watchDogDefs = new ArrayList(); private Integer vncPort; private String desc; @@ -237,6 +241,27 @@ public class LibvirtDomainXMLParser { rngDefs.add(def); } + NodeList watchDogs = devices.getElementsByTagName("watchdog"); + for (int i = 0; i < watchDogs.getLength(); i++) { + WatchDogDef def = null; + Element watchDog = (Element)watchDogs.item(i); + String action = watchDog.getAttribute("action"); + String model = watchDog.getAttribute("model"); + + if (Strings.isNullOrEmpty(model)) { + continue; + } + + if (Strings.isNullOrEmpty(action)) { + def = new WatchDogDef(WatchDogModel.valueOf(model.toUpperCase())); + } else { + def = new WatchDogDef(WatchDogAction.valueOf(action.toUpperCase()), + WatchDogModel.valueOf(model.toUpperCase())); + } + + watchDogDefs.add(def); + } + return true; } catch (ParserConfigurationException e) { s_logger.debug(e.toString()); @@ -290,6 +315,10 @@ public class LibvirtDomainXMLParser { return Collections.unmodifiableList(channels); } + public List getWatchDogs() { + return watchDogDefs; + } + public String getDescription() { return desc; } diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java index ffc93de41e7..ff3eb03ebdf 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtVMDef.java @@ -1612,6 +1612,67 @@ public class LibvirtVMDef { } } + public static class WatchDogDef { + enum WatchDogModel { + I6300ESB("i6300esb"), IB700("ib700"), DIAG288("diag288"); + String model; + + WatchDogModel(String model) { + this.model = model; + } + + @Override + public String toString() { + return model; + } + } + + enum WatchDogAction { + RESET("reset"), SHUTDOWN("shutdown"), POWEROFF("poweroff"), PAUSE("pause"), NONE("none"), DUMP("dump"); + String action; + + WatchDogAction(String action) { + this.action = action; + } + + @Override + public String toString() { + return action; + } + } + + WatchDogModel model = WatchDogModel.I6300ESB; + WatchDogAction action = WatchDogAction.NONE; + + public WatchDogDef(WatchDogAction action) { + this.action = action; + } + + public WatchDogDef(WatchDogModel model) { + this.model = model; + } + + public WatchDogDef(WatchDogAction action, WatchDogModel model) { + this.action = action; + this.model = model; + } + + public WatchDogAction getAction() { + return action; + } + + public WatchDogModel getModel() { + return model; + } + + @Override + public String toString() { + StringBuilder wacthDogBuilder = new StringBuilder(); + wacthDogBuilder.append("\n"); + return wacthDogBuilder.toString(); + } + } + public void setHvsType(String hvs) { _hvsType = hvs; } diff --git a/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java b/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java index 8992a5b2430..b3f85300321 100644 --- a/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java +++ b/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtComputingResourceTest.java @@ -366,6 +366,9 @@ public class LibvirtComputingResourceTest { assertXpath(domainDoc, "/domain/on_reboot/text()", "restart"); assertXpath(domainDoc, "/domain/on_poweroff/text()", "destroy"); assertXpath(domainDoc, "/domain/on_crash/text()", "destroy"); + + assertXpath(domainDoc, "/domain/devices/watchdog/@model", "i6300esb"); + assertXpath(domainDoc, "/domain/devices/watchdog/@action", "none"); } static Document parse(final String input) { diff --git a/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParserTest.java b/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParserTest.java index f32f7fd833d..78c4e868f2c 100644 --- a/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParserTest.java +++ b/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtDomainXMLParserTest.java @@ -27,6 +27,7 @@ import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.DiskDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.InterfaceDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.RngDef; import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.ChannelDef; +import com.cloud.hypervisor.kvm.resource.LibvirtVMDef.WatchDogDef; public class LibvirtDomainXMLParserTest extends TestCase { @@ -185,6 +186,7 @@ public class LibvirtDomainXMLParserTest extends TestCase { "" + "
" + "" + + "" + "" + "" + ""; @@ -232,5 +234,9 @@ public class LibvirtDomainXMLParserTest extends TestCase { assertEquals(RngDef.RngBackendModel.RANDOM, rngs.get(0).getRngBackendModel()); assertEquals(4096, rngs.get(0).getRngRateBytes()); assertEquals(5000, rngs.get(0).getRngRatePeriod()); + + List watchDogs = parser.getWatchDogs(); + assertEquals(WatchDogDef.WatchDogModel.I6300ESB, watchDogs.get(0).getModel()); + assertEquals(WatchDogDef.WatchDogAction.RESET, watchDogs.get(0).getAction()); } } diff --git a/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java b/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java index b8ecdc3c2e3..8f0f93d31ab 100644 --- a/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java +++ b/plugins/hypervisors/kvm/test/com/cloud/hypervisor/kvm/resource/LibvirtVMDefTest.java @@ -149,4 +149,13 @@ public class LibvirtVMDefTest extends TestCase { assertEquals(path, channelDef.getPath()); } + public void testWatchDogDef() { + LibvirtVMDef.WatchDogDef.WatchDogModel model = LibvirtVMDef.WatchDogDef.WatchDogModel.I6300ESB; + LibvirtVMDef.WatchDogDef.WatchDogAction action = LibvirtVMDef.WatchDogDef.WatchDogAction.RESET; + + LibvirtVMDef.WatchDogDef def = new LibvirtVMDef.WatchDogDef(action, model); + assertEquals(def.getModel(), model); + assertEquals(def.getAction(), action); + } + }