diff --git a/agent/src/main/java/com/cloud/agent/Agent.java b/agent/src/main/java/com/cloud/agent/Agent.java index e9213ca9b8c..b1ec592b9fe 100644 --- a/agent/src/main/java/com/cloud/agent/Agent.java +++ b/agent/src/main/java/com/cloud/agent/Agent.java @@ -40,6 +40,8 @@ import java.util.concurrent.atomic.AtomicInteger; import javax.naming.ConfigurationException; +import com.cloud.resource.AgentStatusUpdater; +import com.cloud.resource.ResourceStatusUpdater; import com.cloud.utils.NumbersUtil; import org.apache.cloudstack.agent.lb.SetupMSListAnswer; import org.apache.cloudstack.agent.lb.SetupMSListCommand; @@ -100,7 +102,7 @@ import com.cloud.utils.script.Script; * For more configuration options, see the individual types. * **/ -public class Agent implements HandlerFactory, IAgentControl { +public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater { protected static Logger s_logger = Logger.getLogger(Agent.class); public enum ExitStatus { @@ -409,6 +411,20 @@ public class Agent implements HandlerFactory, IAgentControl { } } + public void triggerUpdate() { + PingCommand command = _resource.getCurrentStatus(getId()); + command.setOutOfBand(true); + s_logger.debug("Sending out of band ping"); + + final Request request = new Request(_id, -1, command, false); + request.setSequence(getNextSequence()); + try { + _link.send(request.toBytes()); + } catch (final ClosedChannelException e) { + s_logger.warn("Unable to send ping update: " + request.toString()); + } + } + protected void cancelTasks() { synchronized (_watchList) { for (final WatchTask task : _watchList) { @@ -461,6 +477,10 @@ public class Agent implements HandlerFactory, IAgentControl { } catch (final ClosedChannelException e) { s_logger.warn("Unable to send request: " + request.toString()); } + + if (_resource instanceof ResourceStatusUpdater) { + ((ResourceStatusUpdater) _resource).registerStatusUpdater(this); + } } } diff --git a/core/src/main/java/com/cloud/agent/api/PingCommand.java b/core/src/main/java/com/cloud/agent/api/PingCommand.java index 1d62c5d1359..4192fc2e747 100644 --- a/core/src/main/java/com/cloud/agent/api/PingCommand.java +++ b/core/src/main/java/com/cloud/agent/api/PingCommand.java @@ -24,6 +24,7 @@ import com.cloud.host.Host; public class PingCommand extends Command { Host.Type hostType; long hostId; + boolean outOfBand; protected PingCommand() { } @@ -33,6 +34,12 @@ public class PingCommand extends Command { hostId = id; } + public PingCommand(Host.Type type, long id, boolean oob) { + hostType = type; + hostId = id; + outOfBand = oob; + } + public Host.Type getHostType() { return hostType; } @@ -41,6 +48,10 @@ public class PingCommand extends Command { return hostId; } + public boolean getOutOfBand() { return outOfBand; } + + public void setOutOfBand(boolean oob) { this.outOfBand = oob; } + @Override public boolean executeInSequence() { return false; diff --git a/core/src/main/java/com/cloud/resource/AgentStatusUpdater.java b/core/src/main/java/com/cloud/resource/AgentStatusUpdater.java new file mode 100644 index 00000000000..63d5576c060 --- /dev/null +++ b/core/src/main/java/com/cloud/resource/AgentStatusUpdater.java @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.resource; + +/** + * AgentStatusUpdater is an agent with triggerable update functionality + */ +public interface AgentStatusUpdater { + /** + * Trigger the sending of an update (Ping). + */ + void triggerUpdate(); +} diff --git a/core/src/main/java/com/cloud/resource/ResourceStatusUpdater.java b/core/src/main/java/com/cloud/resource/ResourceStatusUpdater.java new file mode 100644 index 00000000000..df59e3a152e --- /dev/null +++ b/core/src/main/java/com/cloud/resource/ResourceStatusUpdater.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.resource; + +/** + * ResourceStatusUpdater is a resource that can trigger out of band status updates + */ +public interface ResourceStatusUpdater { + /** + * Register an AgentStatusUpdater to use for triggering out of band updates. + * + * @param updater The object to call triggerUpdate() on + */ + void registerStatusUpdater(AgentStatusUpdater updater); +} diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 1c243d4062e..c2801e28a09 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -3727,7 +3727,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac if (cmd instanceof PingRoutingCommand) { final PingRoutingCommand ping = (PingRoutingCommand)cmd; if (ping.getHostVmStateReport() != null) { - _syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport()); + _syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport(), ping.getOutOfBand()); } scanStalledVMInTransitionStateOnUpHost(agentId); diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSync.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSync.java index 152d0d889c6..b2a48a026a3 100644 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSync.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSync.java @@ -27,7 +27,7 @@ public interface VirtualMachinePowerStateSync { void processHostVmStateReport(long hostId, Map report); // to adapt legacy ping report - void processHostVmStatePingReport(long hostId, Map report); + void processHostVmStatePingReport(long hostId, Map report, boolean force); Map convertVmStateReport(Map states); } diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java index 815206a33bf..3eb3569cab0 100644 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java @@ -55,19 +55,19 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat s_logger.debug("Process host VM state report. host: " + hostId); Map translatedInfo = convertVmStateReport(report); - processReport(hostId, translatedInfo); + processReport(hostId, translatedInfo, false); } @Override - public void processHostVmStatePingReport(long hostId, Map report) { + public void processHostVmStatePingReport(long hostId, Map report, boolean force) { if (s_logger.isDebugEnabled()) s_logger.debug("Process host VM state report from ping process. host: " + hostId); Map translatedInfo = convertVmStateReport(report); - processReport(hostId, translatedInfo); + processReport(hostId, translatedInfo, force); } - private void processReport(long hostId, Map translatedInfo) { + private void processReport(long hostId, Map translatedInfo, boolean force) { if (s_logger.isDebugEnabled()) { s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size()); @@ -117,7 +117,7 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat // Make sure powerState is up to date for missing VMs try { - if (!_instanceDao.isPowerStateUpToDate(instance.getId())) { + if (!force && !_instanceDao.isPowerStateUpToDate(instance.getId())) { s_logger.warn("Detected missing VM but power state is outdated, wait for another process report run for VM id: " + instance.getId()); _instanceDao.resetVmPowerStateTracking(instance.getId()); continue; @@ -150,7 +150,7 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime(); - if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) { + if (force || milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) { s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period"); // this is were a race condition might have happened if we don't re-fetch the instance; diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index d5c0569ca9a..16cccb0150b 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -83,6 +83,7 @@ import org.libvirt.DomainInfo; import org.libvirt.DomainInfo.DomainState; import org.libvirt.DomainInterfaceStats; import org.libvirt.DomainSnapshot; +import org.libvirt.Library; import org.libvirt.LibvirtException; import org.libvirt.MemoryStatistic; import org.libvirt.Network; @@ -90,6 +91,9 @@ import org.libvirt.SchedParameter; import org.libvirt.SchedUlongParameter; import org.libvirt.Secret; import org.libvirt.VcpuInfo; +import org.libvirt.event.DomainEvent; +import org.libvirt.event.DomainEventDetail; +import org.libvirt.event.StoppedDetail; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; @@ -97,6 +101,7 @@ import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; + import com.cloud.agent.api.Answer; import com.cloud.agent.api.Command; import com.cloud.agent.api.HostVmStateReportEntry; @@ -175,6 +180,8 @@ import com.cloud.network.Networks.BroadcastDomainType; import com.cloud.network.Networks.IsolationType; import com.cloud.network.Networks.RouterPrivateIpStrategy; import com.cloud.network.Networks.TrafficType; +import com.cloud.resource.AgentStatusUpdater; +import com.cloud.resource.ResourceStatusUpdater; import com.cloud.resource.RequestWrapper; import com.cloud.resource.ServerResource; import com.cloud.resource.ServerResourceBase; @@ -224,11 +231,12 @@ import com.google.gson.Gson; * private mac addresses for domrs | mac address | start + 126 || || * pool | the parent of the storage pool hierarchy * } **/ -public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer { +public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer, ResourceStatusUpdater { protected static Logger s_logger = Logger.getLogger(LibvirtComputingResource.class); private static final String CONFIG_VALUES_SEPARATOR = ","; + private static final String LEGACY = "legacy"; private static final String SECURE = "secure"; @@ -457,6 +465,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv protected CPUStat cpuStat = new CPUStat(); protected MemStat memStat = new MemStat(dom0MinMem, dom0OvercommitMem); private final LibvirtUtilitiesHelper libvirtUtilitiesHelper = new LibvirtUtilitiesHelper(); + private AgentStatusUpdater _agentStatusUpdater; protected Boolean enableManuallySettingCpuTopologyOnKvmVm = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.ENABLE_MANUALLY_SETTING_CPU_TOPOLOGY_ON_KVM_VM); @@ -481,6 +490,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv return hypervisorQemuVersion; } + @Override + public void registerStatusUpdater(AgentStatusUpdater updater) { + _agentStatusUpdater = updater; + } + @Override public ExecutionResult executeInVR(final String routerIp, final String script, final String args) { return executeInVR(routerIp, script, args, timeout); @@ -3590,9 +3604,63 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv } catch (final CloudRuntimeException e) { s_logger.debug("Unable to initialize local storage pool: " + e); } + setupLibvirtEventListener(); return sscmd; } + private void setupLibvirtEventListener() { + final Thread libvirtListenerThread = new Thread(() -> { + try { + Library.runEventLoop(); + } catch (LibvirtException e) { + s_logger.error("LibvirtException was thrown in event loop: ", e); + } catch (InterruptedException e) { + s_logger.error("Libvirt event loop was interrupted: ", e); + } + }); + + try { + libvirtListenerThread.setDaemon(true); + libvirtListenerThread.start(); + + Connect conn = LibvirtConnection.getConnection(); + conn.addLifecycleListener(this::onDomainLifecycleChange); + + s_logger.debug("Set up the libvirt domain event lifecycle listener"); + } catch (LibvirtException e) { + s_logger.error("Failed to get libvirt connection for domain event lifecycle", e); + } + } + + private int onDomainLifecycleChange(Domain domain, DomainEvent domainEvent) { + try { + s_logger.debug(String.format("Got event lifecycle change on Domain %s, event %s", domain.getName(), domainEvent)); + if (domainEvent != null) { + switch (domainEvent.getType()) { + case STOPPED: + /* libvirt-destroyed VMs have detail StoppedDetail.DESTROYED, self shutdown guests are StoppedDetail.SHUTDOWN + * Checking for this helps us differentiate between events where cloudstack or admin stopped the VM vs guest + * initiated, and avoid pushing extra updates for actions we are initiating without a need for extra tracking */ + DomainEventDetail detail = domainEvent.getDetail(); + if (StoppedDetail.SHUTDOWN.equals(detail) || StoppedDetail.CRASHED.equals(detail)) { + s_logger.info("Triggering out of band status update due to completed self-shutdown or crash of VM"); + _agentStatusUpdater.triggerUpdate(); + } else { + s_logger.debug("Event detail: " + detail); + } + break; + default: + s_logger.debug(String.format("No handling for event %s", domainEvent)); + } + } + } catch (LibvirtException e) { + s_logger.error("Libvirt exception while processing lifecycle event", e); + } catch (Throwable e) { + s_logger.error("Error during lifecycle", e); + } + return 0; + } + public String diskUuidToSerial(String uuid) { String uuidWithoutHyphen = uuid.replace("-",""); return uuidWithoutHyphen.substring(0, Math.min(uuidWithoutHyphen.length(), 20));