Trigger out of band VM state update via libvirt event when VM stops (#7963)

* Trigger out of band VM state update via libvirt event when VM stops

* Add License headers, refactor nested try

---------

Co-authored-by: Marcus Sorensen <mls@apple.com>
This commit is contained in:
Marcus Sorensen 2023-09-28 00:42:03 -06:00 committed by GitHub
parent 348a63dc98
commit 3694667f50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 165 additions and 10 deletions

View File

@ -40,6 +40,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import javax.naming.ConfigurationException; import javax.naming.ConfigurationException;
import com.cloud.resource.AgentStatusUpdater;
import com.cloud.resource.ResourceStatusUpdater;
import com.cloud.utils.NumbersUtil; import com.cloud.utils.NumbersUtil;
import org.apache.cloudstack.agent.lb.SetupMSListAnswer; import org.apache.cloudstack.agent.lb.SetupMSListAnswer;
import org.apache.cloudstack.agent.lb.SetupMSListCommand; import org.apache.cloudstack.agent.lb.SetupMSListCommand;
@ -100,7 +102,7 @@ import com.cloud.utils.script.Script;
* For more configuration options, see the individual types. * For more configuration options, see the individual types.
* *
**/ **/
public class Agent implements HandlerFactory, IAgentControl { public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater {
protected static Logger s_logger = Logger.getLogger(Agent.class); protected static Logger s_logger = Logger.getLogger(Agent.class);
public enum ExitStatus { public enum ExitStatus {
@ -409,6 +411,20 @@ public class Agent implements HandlerFactory, IAgentControl {
} }
} }
public void triggerUpdate() {
PingCommand command = _resource.getCurrentStatus(getId());
command.setOutOfBand(true);
s_logger.debug("Sending out of band ping");
final Request request = new Request(_id, -1, command, false);
request.setSequence(getNextSequence());
try {
_link.send(request.toBytes());
} catch (final ClosedChannelException e) {
s_logger.warn("Unable to send ping update: " + request.toString());
}
}
protected void cancelTasks() { protected void cancelTasks() {
synchronized (_watchList) { synchronized (_watchList) {
for (final WatchTask task : _watchList) { for (final WatchTask task : _watchList) {
@ -461,6 +477,10 @@ public class Agent implements HandlerFactory, IAgentControl {
} catch (final ClosedChannelException e) { } catch (final ClosedChannelException e) {
s_logger.warn("Unable to send request: " + request.toString()); s_logger.warn("Unable to send request: " + request.toString());
} }
if (_resource instanceof ResourceStatusUpdater) {
((ResourceStatusUpdater) _resource).registerStatusUpdater(this);
}
} }
} }

View File

@ -24,6 +24,7 @@ import com.cloud.host.Host;
public class PingCommand extends Command { public class PingCommand extends Command {
Host.Type hostType; Host.Type hostType;
long hostId; long hostId;
boolean outOfBand;
protected PingCommand() { protected PingCommand() {
} }
@ -33,6 +34,12 @@ public class PingCommand extends Command {
hostId = id; hostId = id;
} }
public PingCommand(Host.Type type, long id, boolean oob) {
hostType = type;
hostId = id;
outOfBand = oob;
}
public Host.Type getHostType() { public Host.Type getHostType() {
return hostType; return hostType;
} }
@ -41,6 +48,10 @@ public class PingCommand extends Command {
return hostId; return hostId;
} }
public boolean getOutOfBand() { return outOfBand; }
public void setOutOfBand(boolean oob) { this.outOfBand = oob; }
@Override @Override
public boolean executeInSequence() { public boolean executeInSequence() {
return false; return false;

View File

@ -0,0 +1,27 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.resource;
/**
* AgentStatusUpdater is an agent with triggerable update functionality
*/
public interface AgentStatusUpdater {
/**
* Trigger the sending of an update (Ping).
*/
void triggerUpdate();
}

View File

@ -0,0 +1,29 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.resource;
/**
* ResourceStatusUpdater is a resource that can trigger out of band status updates
*/
public interface ResourceStatusUpdater {
/**
* Register an AgentStatusUpdater to use for triggering out of band updates.
*
* @param updater The object to call triggerUpdate() on
*/
void registerStatusUpdater(AgentStatusUpdater updater);
}

View File

@ -3727,7 +3727,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
if (cmd instanceof PingRoutingCommand) { if (cmd instanceof PingRoutingCommand) {
final PingRoutingCommand ping = (PingRoutingCommand)cmd; final PingRoutingCommand ping = (PingRoutingCommand)cmd;
if (ping.getHostVmStateReport() != null) { if (ping.getHostVmStateReport() != null) {
_syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport()); _syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport(), ping.getOutOfBand());
} }
scanStalledVMInTransitionStateOnUpHost(agentId); scanStalledVMInTransitionStateOnUpHost(agentId);

View File

@ -27,7 +27,7 @@ public interface VirtualMachinePowerStateSync {
void processHostVmStateReport(long hostId, Map<String, HostVmStateReportEntry> report); void processHostVmStateReport(long hostId, Map<String, HostVmStateReportEntry> report);
// to adapt legacy ping report // to adapt legacy ping report
void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report); void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report, boolean force);
Map<Long, VirtualMachine.PowerState> convertVmStateReport(Map<String, HostVmStateReportEntry> states); Map<Long, VirtualMachine.PowerState> convertVmStateReport(Map<String, HostVmStateReportEntry> states);
} }

View File

@ -55,19 +55,19 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat
s_logger.debug("Process host VM state report. host: " + hostId); s_logger.debug("Process host VM state report. host: " + hostId);
Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report); Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report);
processReport(hostId, translatedInfo); processReport(hostId, translatedInfo, false);
} }
@Override @Override
public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report) { public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report, boolean force) {
if (s_logger.isDebugEnabled()) if (s_logger.isDebugEnabled())
s_logger.debug("Process host VM state report from ping process. host: " + hostId); s_logger.debug("Process host VM state report from ping process. host: " + hostId);
Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report); Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report);
processReport(hostId, translatedInfo); processReport(hostId, translatedInfo, force);
} }
private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo) { private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo, boolean force) {
if (s_logger.isDebugEnabled()) { if (s_logger.isDebugEnabled()) {
s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size()); s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size());
@ -117,7 +117,7 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat
// Make sure powerState is up to date for missing VMs // Make sure powerState is up to date for missing VMs
try { try {
if (!_instanceDao.isPowerStateUpToDate(instance.getId())) { if (!force && !_instanceDao.isPowerStateUpToDate(instance.getId())) {
s_logger.warn("Detected missing VM but power state is outdated, wait for another process report run for VM id: " + instance.getId()); s_logger.warn("Detected missing VM but power state is outdated, wait for another process report run for VM id: " + instance.getId());
_instanceDao.resetVmPowerStateTracking(instance.getId()); _instanceDao.resetVmPowerStateTracking(instance.getId());
continue; continue;
@ -150,7 +150,7 @@ public class VirtualMachinePowerStateSyncImpl implements VirtualMachinePowerStat
long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime(); long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime();
if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) { if (force || milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) {
s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period"); s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period");
// this is were a race condition might have happened if we don't re-fetch the instance; // this is were a race condition might have happened if we don't re-fetch the instance;

View File

@ -83,6 +83,7 @@ import org.libvirt.DomainInfo;
import org.libvirt.DomainInfo.DomainState; import org.libvirt.DomainInfo.DomainState;
import org.libvirt.DomainInterfaceStats; import org.libvirt.DomainInterfaceStats;
import org.libvirt.DomainSnapshot; import org.libvirt.DomainSnapshot;
import org.libvirt.Library;
import org.libvirt.LibvirtException; import org.libvirt.LibvirtException;
import org.libvirt.MemoryStatistic; import org.libvirt.MemoryStatistic;
import org.libvirt.Network; import org.libvirt.Network;
@ -90,6 +91,9 @@ import org.libvirt.SchedParameter;
import org.libvirt.SchedUlongParameter; import org.libvirt.SchedUlongParameter;
import org.libvirt.Secret; import org.libvirt.Secret;
import org.libvirt.VcpuInfo; import org.libvirt.VcpuInfo;
import org.libvirt.event.DomainEvent;
import org.libvirt.event.DomainEventDetail;
import org.libvirt.event.StoppedDetail;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Node; import org.w3c.dom.Node;
@ -97,6 +101,7 @@ import org.w3c.dom.NodeList;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import com.cloud.agent.api.Answer; import com.cloud.agent.api.Answer;
import com.cloud.agent.api.Command; import com.cloud.agent.api.Command;
import com.cloud.agent.api.HostVmStateReportEntry; import com.cloud.agent.api.HostVmStateReportEntry;
@ -175,6 +180,8 @@ import com.cloud.network.Networks.BroadcastDomainType;
import com.cloud.network.Networks.IsolationType; import com.cloud.network.Networks.IsolationType;
import com.cloud.network.Networks.RouterPrivateIpStrategy; import com.cloud.network.Networks.RouterPrivateIpStrategy;
import com.cloud.network.Networks.TrafficType; import com.cloud.network.Networks.TrafficType;
import com.cloud.resource.AgentStatusUpdater;
import com.cloud.resource.ResourceStatusUpdater;
import com.cloud.resource.RequestWrapper; import com.cloud.resource.RequestWrapper;
import com.cloud.resource.ServerResource; import com.cloud.resource.ServerResource;
import com.cloud.resource.ServerResourceBase; import com.cloud.resource.ServerResourceBase;
@ -224,11 +231,12 @@ import com.google.gson.Gson;
* private mac addresses for domrs | mac address | start + 126 || || * private mac addresses for domrs | mac address | start + 126 || ||
* pool | the parent of the storage pool hierarchy * } * pool | the parent of the storage pool hierarchy * }
**/ **/
public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer { public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer, ResourceStatusUpdater {
protected static Logger s_logger = Logger.getLogger(LibvirtComputingResource.class); protected static Logger s_logger = Logger.getLogger(LibvirtComputingResource.class);
private static final String CONFIG_VALUES_SEPARATOR = ","; private static final String CONFIG_VALUES_SEPARATOR = ",";
private static final String LEGACY = "legacy"; private static final String LEGACY = "legacy";
private static final String SECURE = "secure"; private static final String SECURE = "secure";
@ -457,6 +465,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
protected CPUStat cpuStat = new CPUStat(); protected CPUStat cpuStat = new CPUStat();
protected MemStat memStat = new MemStat(dom0MinMem, dom0OvercommitMem); protected MemStat memStat = new MemStat(dom0MinMem, dom0OvercommitMem);
private final LibvirtUtilitiesHelper libvirtUtilitiesHelper = new LibvirtUtilitiesHelper(); private final LibvirtUtilitiesHelper libvirtUtilitiesHelper = new LibvirtUtilitiesHelper();
private AgentStatusUpdater _agentStatusUpdater;
protected Boolean enableManuallySettingCpuTopologyOnKvmVm = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.ENABLE_MANUALLY_SETTING_CPU_TOPOLOGY_ON_KVM_VM); protected Boolean enableManuallySettingCpuTopologyOnKvmVm = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.ENABLE_MANUALLY_SETTING_CPU_TOPOLOGY_ON_KVM_VM);
@ -481,6 +490,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
return hypervisorQemuVersion; return hypervisorQemuVersion;
} }
@Override
public void registerStatusUpdater(AgentStatusUpdater updater) {
_agentStatusUpdater = updater;
}
@Override @Override
public ExecutionResult executeInVR(final String routerIp, final String script, final String args) { public ExecutionResult executeInVR(final String routerIp, final String script, final String args) {
return executeInVR(routerIp, script, args, timeout); return executeInVR(routerIp, script, args, timeout);
@ -3590,9 +3604,63 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
} catch (final CloudRuntimeException e) { } catch (final CloudRuntimeException e) {
s_logger.debug("Unable to initialize local storage pool: " + e); s_logger.debug("Unable to initialize local storage pool: " + e);
} }
setupLibvirtEventListener();
return sscmd; return sscmd;
} }
private void setupLibvirtEventListener() {
final Thread libvirtListenerThread = new Thread(() -> {
try {
Library.runEventLoop();
} catch (LibvirtException e) {
s_logger.error("LibvirtException was thrown in event loop: ", e);
} catch (InterruptedException e) {
s_logger.error("Libvirt event loop was interrupted: ", e);
}
});
try {
libvirtListenerThread.setDaemon(true);
libvirtListenerThread.start();
Connect conn = LibvirtConnection.getConnection();
conn.addLifecycleListener(this::onDomainLifecycleChange);
s_logger.debug("Set up the libvirt domain event lifecycle listener");
} catch (LibvirtException e) {
s_logger.error("Failed to get libvirt connection for domain event lifecycle", e);
}
}
private int onDomainLifecycleChange(Domain domain, DomainEvent domainEvent) {
try {
s_logger.debug(String.format("Got event lifecycle change on Domain %s, event %s", domain.getName(), domainEvent));
if (domainEvent != null) {
switch (domainEvent.getType()) {
case STOPPED:
/* libvirt-destroyed VMs have detail StoppedDetail.DESTROYED, self shutdown guests are StoppedDetail.SHUTDOWN
* Checking for this helps us differentiate between events where cloudstack or admin stopped the VM vs guest
* initiated, and avoid pushing extra updates for actions we are initiating without a need for extra tracking */
DomainEventDetail detail = domainEvent.getDetail();
if (StoppedDetail.SHUTDOWN.equals(detail) || StoppedDetail.CRASHED.equals(detail)) {
s_logger.info("Triggering out of band status update due to completed self-shutdown or crash of VM");
_agentStatusUpdater.triggerUpdate();
} else {
s_logger.debug("Event detail: " + detail);
}
break;
default:
s_logger.debug(String.format("No handling for event %s", domainEvent));
}
}
} catch (LibvirtException e) {
s_logger.error("Libvirt exception while processing lifecycle event", e);
} catch (Throwable e) {
s_logger.error("Error during lifecycle", e);
}
return 0;
}
public String diskUuidToSerial(String uuid) { public String diskUuidToSerial(String uuid) {
String uuidWithoutHyphen = uuid.replace("-",""); String uuidWithoutHyphen = uuid.replace("-","");
return uuidWithoutHyphen.substring(0, Math.min(uuidWithoutHyphen.length(), 20)); return uuidWithoutHyphen.substring(0, Math.min(uuidWithoutHyphen.length(), 20));