Auto Enable/Disable KVM hosts (#7170)

* Auto Enable Disable KVM hosts

* Improve health check result

* Fix corner cases

* Script path refactor

* Fix sonar cloud reports

* Fix last code smells

* Add marvin tests

* Fix new line on agent.properties to prevent host add failures

* Send alert on auto-enable-disable and add annotations when the setting is enabled

* Address reviews

* Add a reason for enabling or disabling a host when the automatic feature is enabled

* Fix comment on the marvin test description

* Fix for disabling the feature if the admin has manually updated the host resource state before any health check result
This commit is contained in:
Nicolas Vazquez 2023-04-04 08:33:37 -03:00 committed by GitHub
parent 82a6a1f6c4
commit be66eb2a35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 688 additions and 62 deletions

View File

@ -398,3 +398,7 @@ iscsi.session.cleanup.enabled=false
# The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1 # The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1
# iothreads=1 # iothreads=1
# The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host
# depending on the return value of the file/script
# agent.health.check.script.path=

View File

@ -312,6 +312,9 @@ public class AgentProperties{
*/ */
public static final Property<String> OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class); public static final Property<String> OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class);
public static final Property<String> HEALTH_CHECK_SCRIPT_PATH =
new Property<>("agent.health.check.script.path", null, String.class);
/** /**
* Sets the hypervisor type.<br> * Sets the hypervisor type.<br>
* Possible values: kvm | lxc <br> * Possible values: kvm | lxc <br>

View File

@ -49,6 +49,8 @@ public interface ResourceService {
*/ */
Host updateHost(UpdateHostCmd cmd) throws NoTransitionException; Host updateHost(UpdateHostCmd cmd) throws NoTransitionException;
Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException;
Host cancelMaintenance(CancelMaintenanceCmd cmd); Host cancelMaintenance(CancelMaintenanceCmd cmd);
Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException;

View File

@ -1020,6 +1020,7 @@ public class ApiConstants {
public static final String PUBLIC_MTU = "publicmtu"; public static final String PUBLIC_MTU = "publicmtu";
public static final String PRIVATE_MTU = "privatemtu"; public static final String PRIVATE_MTU = "privatemtu";
public static final String MTU = "mtu"; public static final String MTU = "mtu";
public static final String AUTO_ENABLE_KVM_HOST = "autoenablekvmhost";
public static final String LIST_APIS = "listApis"; public static final String LIST_APIS = "listApis";
/** /**

View File

@ -19,7 +19,6 @@ package org.apache.cloudstack.api.command.admin.host;
import com.cloud.host.Host; import com.cloud.host.Host;
import com.cloud.user.Account; import com.cloud.user.Account;
import org.apache.cloudstack.acl.RoleType; import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.annotation.AnnotationService;
import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode; import org.apache.cloudstack.api.ApiErrorCode;
@ -117,9 +116,6 @@ public class UpdateHostCmd extends BaseCmd {
Host result; Host result;
try { try {
result = _resourceService.updateHost(this); result = _resourceService.updateHost(this);
if(getAnnotation() != null) {
annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true);
}
HostResponse hostResponse = _responseGenerator.createHostResponse(result); HostResponse hostResponse = _responseGenerator.createHostResponse(result);
hostResponse.setResponseName(getCommandName()); hostResponse.setResponseName(getCommandName());
this.setResponseObject(hostResponse); this.setResponseObject(hostResponse);

View File

@ -29,6 +29,7 @@ public class PingRoutingCommand extends PingCommand {
boolean _gatewayAccessible = true; boolean _gatewayAccessible = true;
boolean _vnetAccessible = true; boolean _vnetAccessible = true;
private Boolean hostHealthCheckResult;
protected PingRoutingCommand() { protected PingRoutingCommand() {
} }
@ -57,4 +58,12 @@ public class PingRoutingCommand extends PingCommand {
public void setVnetAccessible(boolean vnetAccessible) { public void setVnetAccessible(boolean vnetAccessible) {
_vnetAccessible = vnetAccessible; _vnetAccessible = vnetAccessible;
} }
public Boolean getHostHealthCheckResult() {
return hostHealthCheckResult;
}
public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
this.hostHealthCheckResult = hostHealthCheckResult;
}
} }

View File

@ -44,6 +44,7 @@ public class StartupRoutingCommand extends StartupCommand {
List<String> hostTags = new ArrayList<String>(); List<String> hostTags = new ArrayList<String>();
String hypervisorVersion; String hypervisorVersion;
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>(); HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>();
private Boolean hostHealthCheckResult;
public StartupRoutingCommand() { public StartupRoutingCommand() {
super(Host.Type.Routing); super(Host.Type.Routing);
@ -188,4 +189,12 @@ public class StartupRoutingCommand extends StartupCommand {
public void setSupportsClonedVolumes(boolean supportsClonedVolumes) { public void setSupportsClonedVolumes(boolean supportsClonedVolumes) {
this.supportsClonedVolumes = supportsClonedVolumes; this.supportsClonedVolumes = supportsClonedVolumes;
} }
public Boolean getHostHealthCheckResult() {
return hostHealthCheckResult;
}
public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
this.hostHealthCheckResult = hostHealthCheckResult;
}
} }

View File

@ -39,6 +39,13 @@ import com.cloud.resource.ServerResource;
public interface AgentManager { public interface AgentManager {
static final ConfigKey<Integer> Wait = new ConfigKey<Integer>("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return", static final ConfigKey<Integer> Wait = new ConfigKey<Integer>("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return",
true); true);
ConfigKey<Boolean> EnableKVMAutoEnableDisable = new ConfigKey<>(Boolean.class,
"enable.kvm.host.auto.enable.disable",
"Advanced",
"false",
"(KVM only) Enable Auto Disable/Enable KVM hosts in the cluster " +
"according to the hosts health check results",
true, ConfigKey.Scope.Cluster, null);
public enum TapAgentsAction { public enum TapAgentsAction {
Add, Del, Contains, Add, Del, Contains,

View File

@ -51,6 +51,7 @@ import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext;
import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.apache.log4j.MDC; import org.apache.log4j.MDC;
@ -1250,6 +1251,52 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
super(type, link, data); super(type, link, data);
} }
private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) {
if (hostHealthCheckResult == null) {
return;
}
HostVO host = _hostDao.findById(hostId);
if (host == null) {
s_logger.error(String.format("Unable to find host with ID: %s", hostId));
return;
}
if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) {
s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " +
"received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName()));
return;
}
ResourceState.Event resourceEvent = hostHealthCheckResult ? ResourceState.Event.Enable : ResourceState.Event.Disable;
try {
s_logger.info(String.format("Host health check %s, auto %s KVM host: %s",
hostHealthCheckResult ? "succeeds" : "fails",
hostHealthCheckResult ? "enabling" : "disabling",
host.getName()));
_resourceMgr.autoUpdateHostAllocationState(hostId, resourceEvent);
} catch (NoTransitionException e) {
s_logger.error(String.format("Cannot Auto %s host: %s", resourceEvent, host.getName()), e);
}
}
private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) {
if (startup == null) {
s_logger.error("Empty StartupRoutingCommand received");
return;
}
Boolean hostHealthCheckResult = startup.getHostHealthCheckResult();
processHostHealthCheckResult(hostHealthCheckResult, hostId);
}
private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) {
if (pingRoutingCommand == null) {
s_logger.error("Empty PingRoutingCommand received");
return;
}
Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult();
processHostHealthCheckResult(hostHealthCheckResult, hostId);
}
protected void processRequest(final Link link, final Request request) { protected void processRequest(final Link link, final Request request) {
final AgentAttache attache = (AgentAttache)link.attachment(); final AgentAttache attache = (AgentAttache)link.attachment();
final Command[] cmds = request.getCommands(); final Command[] cmds = request.getCommands();
@ -1291,6 +1338,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
try { try {
if (cmd instanceof StartupRoutingCommand) { if (cmd instanceof StartupRoutingCommand) {
final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; final StartupRoutingCommand startup = (StartupRoutingCommand) cmd;
processStartupRoutingCommand(startup, hostId);
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval()); answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
} else if (cmd instanceof StartupProxyCommand) { } else if (cmd instanceof StartupProxyCommand) {
final StartupProxyCommand startup = (StartupProxyCommand) cmd; final StartupProxyCommand startup = (StartupProxyCommand) cmd;
@ -1322,6 +1370,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
// if the router is sending a ping, verify the // if the router is sending a ping, verify the
// gateway was pingable // gateway was pingable
if (cmd instanceof PingRoutingCommand) { if (cmd instanceof PingRoutingCommand) {
processPingRoutingCommand((PingRoutingCommand) cmd, hostId);
final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible(); final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible();
final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));
@ -1748,8 +1797,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Override @Override
public ConfigKey<?>[] getConfigKeys() { public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize, return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize,
DirectAgentThreadCap }; DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable };
} }
protected class SetHostParamsListener implements Listener { protected class SetHostParamsListener implements Listener {

View File

@ -322,6 +322,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
private String _dcId; private String _dcId;
private String _clusterId; private String _clusterId;
private final Properties _uefiProperties = new Properties(); private final Properties _uefiProperties = new Properties();
private String hostHealthCheckScriptPath;
private long _hvVersion; private long _hvVersion;
private Duration _timeout; private Duration _timeout;
@ -717,6 +718,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
NATIVE, OPENVSWITCH, TUNGSTEN NATIVE, OPENVSWITCH, TUNGSTEN
} }
protected enum HealthCheckResult {
SUCCESS, FAILURE, IGNORE
}
protected BridgeType _bridgeType; protected BridgeType _bridgeType;
protected StorageSubsystemCommandHandler storageHandler; protected StorageSubsystemCommandHandler storageHandler;
@ -943,6 +948,12 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh"); throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh");
} }
hostHealthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH);
if (StringUtils.isNotBlank(hostHealthCheckScriptPath) && !new File(hostHealthCheckScriptPath).exists()) {
s_logger.info(String.format("Unable to find the host health check script at: %s, " +
"discarding it", hostHealthCheckScriptPath));
}
setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh"); setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh");
if (setupTungstenVrouterPath == null) { if (setupTungstenVrouterPath == null) {
throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh"); throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh");
@ -3436,13 +3447,54 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
@Override @Override
public PingCommand getCurrentStatus(final long id) { public PingCommand getCurrentStatus(final long id) {
PingRoutingCommand pingRoutingCommand;
if (!_canBridgeFirewall) { if (!_canBridgeFirewall) {
return new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport()); pingRoutingCommand = new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
} else { } else {
final HashMap<String, Pair<Long, Long>> nwGrpStates = syncNetworkGroups(id); final HashMap<String, Pair<Long, Long>> nwGrpStates = syncNetworkGroups(id);
return new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates); pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
} }
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
if (healthCheckResult != HealthCheckResult.IGNORE) {
pingRoutingCommand.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
}
return pingRoutingCommand;
}
/**
* The health check result is true, if the script is executed successfully and the exit code is 0
* The health check result is false, if the script is executed successfully and the exit code is 1
* The health check result is null, if
* - Script file is not specified, or
* - Script file does not exist, or
* - Script file is not accessible by the user of the cloudstack-agent process, or
* - Script file is not executable
* - There are errors when the script is executed (exit codes other than 0 or 1)
*/
private HealthCheckResult getHostHealthCheckResult() {
if (StringUtils.isBlank(hostHealthCheckScriptPath)) {
s_logger.debug("Host health check script path is not specified");
return HealthCheckResult.IGNORE;
}
File script = new File(hostHealthCheckScriptPath);
if (!script.exists() || !script.isFile() || !script.canExecute()) {
s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " +
"reason: %s", hostHealthCheckScriptPath,
!script.exists() ? "file does not exist" : "please check file permissions to execute this file"));
return HealthCheckResult.IGNORE;
}
int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath);
if (s_logger.isDebugEnabled()) {
s_logger.debug(String.format("Host health check script exit code: %s", exitCode));
}
return retrieveHealthCheckResultFromExitCode(exitCode);
}
private HealthCheckResult retrieveHealthCheckResultFromExitCode(int exitCode) {
if (exitCode != 0 && exitCode != 1) {
return HealthCheckResult.IGNORE;
}
return exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE;
} }
@Override @Override
@ -3484,6 +3536,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
cmd.setGatewayIpAddress(_localGateway); cmd.setGatewayIpAddress(_localGateway);
cmd.setIqn(getIqn()); cmd.setIqn(getIqn());
cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption())); cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption()));
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
if (healthCheckResult != HealthCheckResult.IGNORE) {
cmd.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
}
if (cmd.getHostDetails().containsKey("Host.OS")) { if (cmd.getHostDetails().containsKey("Host.OS")) {
_hostDistro = cmd.getHostDetails().get("Host.OS"); _hostDistro = cmd.getHostDetails().get("Host.OS");

View File

@ -36,8 +36,10 @@ import java.util.Random;
import javax.inject.Inject; import javax.inject.Inject;
import javax.naming.ConfigurationException; import javax.naming.ConfigurationException;
import com.cloud.alert.AlertManager;
import com.cloud.exception.StorageConflictException; import com.cloud.exception.StorageConflictException;
import com.cloud.exception.StorageUnavailableException; import com.cloud.exception.StorageUnavailableException;
import org.apache.cloudstack.alert.AlertService;
import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.AnnotationService;
import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.annotation.dao.AnnotationDao;
import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.ApiConstants;
@ -294,6 +296,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
private UserVmDetailsDao userVmDetailsDao; private UserVmDetailsDao userVmDetailsDao;
@Inject @Inject
private AnnotationDao annotationDao; private AnnotationDao annotationDao;
@Inject
private AlertManager alertManager;
@Inject
private AnnotationService annotationService;
private final long _nodeId = ManagementServerNode.getManagementServerId(); private final long _nodeId = ManagementServerNode.getManagementServerId();
@ -1774,73 +1780,149 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
return hostInMaintenance; return hostInMaintenance;
} }
private ResourceState.Event getResourceEventFromAllocationStateString(String allocationState) {
final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(allocationState);
if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) {
throw new InvalidParameterValueException(String.format("Invalid allocation state: %s, " +
"only Enable/Disable are allowed", allocationState));
}
return resourceEvent;
}
private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting,
boolean isUpdateFromHostHealthCheck,
HostVO host, DetailVO hostDetail,
ResourceState.Event resourceEvent) {
if (autoEnableDisableKVMSetting) {
if (!isUpdateFromHostHealthCheck && hostDetail != null &&
!Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) {
hostDetail.setValue(Boolean.TRUE.toString());
_hostDetailsDao.update(hostDetail.getId(), hostDetail);
} else if (!isUpdateFromHostHealthCheck && hostDetail != null &&
Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Disable) {
s_logger.info(String.format("The setting %s is enabled but the host %s is manually set into %s state," +
"ignoring future auto enabling of the host based on health check results",
AgentManager.EnableKVMAutoEnableDisable.key(), host.getName(), resourceEvent));
hostDetail.setValue(Boolean.FALSE.toString());
_hostDetailsDao.update(hostDetail.getId(), hostDetail);
} else if (hostDetail == null) {
String autoEnableValue = !isUpdateFromHostHealthCheck ? Boolean.FALSE.toString() : Boolean.TRUE.toString();
hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, autoEnableValue);
_hostDetailsDao.persist(hostDetail);
}
}
}
private boolean updateHostAllocationState(HostVO host, String allocationState,
boolean isUpdateFromHostHealthCheck) throws NoTransitionException {
boolean autoEnableDisableKVMSetting = AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) &&
host.getHypervisorType() == HypervisorType.KVM;
ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState);
DetailVO hostDetail = _hostDetailsDao.findDetail(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST);
if ((host.getResourceState() == ResourceState.Enabled && resourceEvent == ResourceState.Event.Enable) ||
(host.getResourceState() == ResourceState.Disabled && resourceEvent == ResourceState.Event.Disable)) {
s_logger.info(String.format("The host %s is already on the allocated state", host.getName()));
return false;
}
if (isAutoEnableAttemptForADisabledHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, hostDetail, resourceEvent)) {
s_logger.debug(String.format("The setting '%s' is enabled and the health check succeeds on the host, " +
"but the host has been manually disabled previously, ignoring auto enabling",
AgentManager.EnableKVMAutoEnableDisable.key()));
return false;
}
handleAutoEnableDisableKVMHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, host,
hostDetail, resourceEvent);
resourceStateTransitTo(host, resourceEvent, _nodeId);
return true;
}
private boolean isAutoEnableAttemptForADisabledHost(boolean autoEnableDisableKVMSetting,
boolean isUpdateFromHostHealthCheck,
DetailVO hostDetail, ResourceState.Event resourceEvent) {
return autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null &&
!Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable;
}
private void updateHostName(HostVO host, String name) {
s_logger.debug("Updating Host name to: " + name);
host.setName(name);
_hostDao.update(host.getId(), host);
}
private void updateHostGuestOSCategory(Long hostId, Long guestOSCategoryId) {
// Verify that the guest OS Category exists
if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) {
throw new InvalidParameterValueException("Please specify a valid guest OS category.");
}
final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId);
final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id");
if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) {
// Create/Update an entry for guest.os.category.id
if (guestOSDetail != null) {
guestOSDetail.setValue(String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail);
} else {
final Map<String, String> detail = new HashMap<String, String>();
detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.persist(hostId, detail);
}
} else {
// Delete any existing entry for guest.os.category.id
if (guestOSDetail != null) {
_hostDetailsDao.remove(guestOSDetail.getId());
}
}
}
private void updateHostTags(HostVO host, Long hostId, List<String> hostTags) {
List<VMInstanceVO> activeVMs = _vmDao.listByHostId(hostId);
s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. " +
"Updating the host tags will not affect them.", activeVMs, host));
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating Host Tags to :" + hostTags);
}
_hostTagsDao.persist(hostId, new ArrayList<>(new HashSet<>(hostTags)));
}
@Override @Override
public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException {
Long hostId = cmd.getId(); return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(),
String name = cmd.getName(); cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), cmd.getAnnotation(), false);
Long guestOSCategoryId = cmd.getOsCategoryId(); }
private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String allocationState,
String url, List<String> hostTags, String annotation, boolean isUpdateFromHostHealthCheck) throws NoTransitionException {
// Verify that the host exists // Verify that the host exists
final HostVO host = _hostDao.findById(hostId); final HostVO host = _hostDao.findById(hostId);
if (host == null) { if (host == null) {
throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist");
} }
if (cmd.getAllocationState() != null) { boolean isUpdateHostAllocation = false;
final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(cmd.getAllocationState()); if (StringUtils.isNotBlank(allocationState)) {
if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { isUpdateHostAllocation = updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck);
throw new CloudRuntimeException("Invalid allocation state:" + cmd.getAllocationState() + ", only Enable/Disable are allowed");
}
resourceStateTransitTo(host, resourceEvent, _nodeId);
} }
if (StringUtils.isNotBlank(name)) { if (StringUtils.isNotBlank(name)) {
s_logger.debug("Updating Host name to: " + name); updateHostName(host, name);
host.setName(name);
_hostDao.update(host.getId(), host);
} }
if (guestOSCategoryId != null) { if (guestOSCategoryId != null) {
// Verify that the guest OS Category exists updateHostGuestOSCategory(hostId, guestOSCategoryId);
if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) {
throw new InvalidParameterValueException("Please specify a valid guest OS category.");
}
final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId);
final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id");
if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) {
// Create/Update an entry for guest.os.category.id
if (guestOSDetail != null) {
guestOSDetail.setValue(String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail);
} else {
final Map<String, String> detail = new HashMap<String, String>();
detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.persist(hostId, detail);
}
} else {
// Delete any existing entry for guest.os.category.id
if (guestOSDetail != null) {
_hostDetailsDao.remove(guestOSDetail.getId());
}
}
} }
final List<String> hostTags = cmd.getHostTags();
if (hostTags != null) { if (hostTags != null) {
List<VMInstanceVO> activeVMs = _vmDao.listByHostId(hostId); updateHostTags(host, hostId, hostTags);
s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. Updating the host tags will not affect them.", activeVMs, host));
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating Host Tags to :" + hostTags);
}
_hostTagsDao.persist(hostId, new ArrayList(new HashSet<String>(hostTags)));
} }
final String url = cmd.getUrl();
if (url != null) { if (url != null) {
_storageMgr.updateSecondaryStorage(cmd.getId(), cmd.getUrl()); _storageMgr.updateSecondaryStorage(hostId, url);
} }
try { try {
_storageMgr.enableHost(hostId); _storageMgr.enableHost(hostId);
@ -1849,9 +1931,55 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
} }
final HostVO updatedHost = _hostDao.findById(hostId); final HostVO updatedHost = _hostDao.findById(hostId);
sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState,
isUpdateFromHostHealthCheck, isUpdateHostAllocation, annotation);
return updatedHost; return updatedHost;
} }
private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState,
boolean isUpdateFromHostHealthCheck,
boolean isUpdateHostAllocation, String annotation) {
boolean isAutoEnableDisableKVMSettingEnabled = host.getHypervisorType() == HypervisorType.KVM &&
AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId());
if (!isAutoEnableDisableKVMSettingEnabled) {
if (StringUtils.isNotBlank(annotation)) {
annotationService.addAnnotation(annotation, AnnotationService.EntityType.HOST, host.getUuid(), true);
}
return;
}
if (!isUpdateHostAllocation) {
return;
}
String msg = String.format("The host %s (%s) ", host.getName(), host.getUuid());
ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState);
boolean isEventEnable = resourceEvent == ResourceState.Event.Enable;
if (isUpdateFromHostHealthCheck) {
msg += String.format("is auto-%s after %s health check results",
isEventEnable ? "enabled" : "disabled",
isEventEnable ? "successful" : "failed");
alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(),
host.getPodId(), msg, msg);
} else {
msg += String.format("is %s despite the setting '%s' is enabled for the cluster %s",
isEventEnable ? "enabled" : "disabled", AgentManager.EnableKVMAutoEnableDisable.key(),
host.getClusterId());
if (StringUtils.isNotBlank(annotation)) {
msg += String.format(", reason: %s", annotation);
}
}
annotationService.addAnnotation(msg, AnnotationService.EntityType.HOST, host.getUuid(), true);
}
@Override
public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException {
return updateHost(hostId, null, null, resourceEvent.toString(), null, null, null, true);
}
@Override @Override
public Cluster getCluster(final Long clusterId) { public Cluster getCluster(final Long clusterId) {
return _clusterDao.findById(clusterId); return _clusterDao.findById(clusterId);

View File

@ -73,6 +73,11 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
return null; return null;
} }
@Override
public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException {
return null;
}
/* (non-Javadoc) /* (non-Javadoc)
* @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd) * @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd)
*/ */

View File

@ -20,7 +20,7 @@
Tests for host control state Tests for host control state
""" """
from marvin.cloudstackAPI import updateHost from marvin.cloudstackAPI import (updateHost, updateConfiguration)
from nose.plugins.attrib import attr from nose.plugins.attrib import attr
from marvin.cloudstackTestCase import cloudstackTestCase from marvin.cloudstackTestCase import cloudstackTestCase
from marvin.lib.common import (get_domain, from marvin.lib.common import (get_domain,
@ -28,13 +28,18 @@ from marvin.lib.common import (get_domain,
get_template, get_template,
list_hosts, list_hosts,
list_routers, list_routers,
list_ssvms) list_ssvms,
list_clusters,
list_hosts)
from marvin.lib.base import (Account, from marvin.lib.base import (Account,
Domain, Domain,
Host, Host,
ServiceOffering, ServiceOffering,
VirtualMachine) VirtualMachine)
from marvin.sshClient import SshClient from marvin.sshClient import SshClient
from marvin.lib.decoratorGenerators import skipTestIf
from marvin.lib.utils import wait_until
import logging
import time import time
@ -250,3 +255,220 @@ class TestHostControlState(cloudstackTestCase):
self.enable_host(host_id) self.enable_host(host_id)
self.verify_router_host_control_state(router.id, "Enabled") self.verify_router_host_control_state(router.id, "Enabled")
class TestAutoEnableDisableHost(cloudstackTestCase):
@classmethod
def setUpClass(cls):
cls.testClient = super(TestAutoEnableDisableHost, cls).getClsTestClient()
cls.apiclient = cls.testClient.getApiClient()
cls.services = cls.testClient.getParsedTestDataConfig()
# Get Zone, Domain and templates
cls.zone = get_zone(cls.apiclient, cls.testClient.getZoneForTests())
cls.hypervisor = cls.testClient.getHypervisorInfo()
cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
if cls.hypervisor.lower() not in ['kvm']:
cls.hypervisorNotSupported = True
return
cls.logger = logging.getLogger('TestAutoEnableDisableHost')
return
@classmethod
def tearDownClass(cls):
super(TestAutoEnableDisableHost, cls).tearDownClass()
def tearDown(self):
super(TestAutoEnableDisableHost, self).tearDown()
def get_ssh_client(self, ip, username, password, retries=10):
""" Setup ssh client connection and return connection """
try:
ssh_client = SshClient(ip, 22, username, password, retries)
except Exception as e:
raise unittest.SkipTest("Unable to create ssh connection: " % e)
self.assertIsNotNone(
ssh_client, "Failed to setup ssh connection to ip=%s" % ip)
return ssh_client
def wait_until_host_is_in_state(self, hostid, resourcestate, interval=3, retries=20):
def check_resource_state():
response = Host.list(
self.apiclient,
id=hostid
)
if isinstance(response, list):
if response[0].resourcestate == resourcestate:
self.logger.debug('Host with id %s is in resource state = %s' % (hostid, resourcestate))
return True, None
else:
self.logger.debug("Waiting for host " + hostid +
" to reach state " + resourcestate +
", with current state " + response[0].resourcestate)
return False, None
done, _ = wait_until(interval, retries, check_resource_state)
if not done:
raise Exception("Failed to wait for host %s to be on resource state %s" % (hostid, resourcestate))
return True
def update_config(self, enable_feature):
cmd = updateConfiguration.updateConfigurationCmd()
cmd.name = "enable.kvm.host.auto.enable.disable"
cmd.value = enable_feature
response = self.apiclient.updateConfiguration(cmd)
self.debug("updated the parameter %s with value %s" % (response.name, response.value))
def update_health_check_script(self, ip_address, username, password, exit_code):
health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh"
health_check_agent_property = "agent.health.check.script.path"
agent_properties_file_path = "/etc/cloudstack/agent/agent.properties"
ssh_client = self.get_ssh_client(ip_address, username, password)
ssh_client.execute("echo 'exit %s' > %s" % (exit_code, health_check_script_path))
ssh_client.execute("chmod +x %s" % health_check_script_path)
ssh_client.execute("echo '%s=%s' >> %s" % (health_check_agent_property, health_check_script_path,
agent_properties_file_path))
ssh_client.execute("service cloudstack-agent restart")
def remove_host_health_check(self, ip_address, username, password):
health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh"
ssh_client = self.get_ssh_client(ip_address, username, password)
ssh_client.execute("rm -f %s" % health_check_script_path)
def select_host_for_health_checks(self):
clusters = list_clusters(
self.apiclient,
zoneid=self.zone.id
)
if not clusters:
return None
for cluster in clusters:
list_hosts_response = list_hosts(
self.apiclient,
clusterid=cluster.id,
type="Routing",
resourcestate="Enabled"
)
assert isinstance(list_hosts_response, list)
if not list_hosts_response or len(list_hosts_response) < 1:
continue
return list_hosts_response[0]
return None
def update_host_allocation_state(self, id, enable):
cmd = updateHost.updateHostCmd()
cmd.id = id
cmd.allocationstate = "Enable" if enable else "Disable"
response = self.apiclient.updateHost(cmd)
self.assertEqual(response.resourcestate, "Enabled" if enable else "Disabled")
@attr(tags=["basic", "advanced"], required_hardware="false")
@skipTestIf("hypervisorNotSupported")
def test_01_auto_enable_disable_kvm_host(self):
"""Test to auto-enable and auto-disable a KVM host based on health check results
# Validate the following:
# 1. Enable the KVM Auto Enable/Disable Feature
# 2. Set a health check script that fails and observe the host is Disabled
# 3. Make the health check script succeed and observe the host is Enabled
"""
selected_host = self.select_host_for_health_checks()
if not selected_host:
self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature")
username = self.hostConfig["username"]
password = self.hostConfig["password"]
# Enable the Auto Enable/Disable Configuration
self.update_config("true")
# Set health check script for failure
self.update_health_check_script(selected_host.ipaddress, username, password, 1)
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Set health check script for success
self.update_health_check_script(selected_host.ipaddress, username, password, 0)
self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200)
@attr(tags=["basic", "advanced"], required_hardware="false")
@skipTestIf("hypervisorNotSupported")
def test_02_disable_host_overrides_auto_enable_kvm_host(self):
"""Test to override the auto-enabling of a KVM host by an administrator
# Validate the following:
# 1. Enable the KVM Auto Enable/Disable Feature
# 2. Set a health check script that succeeds and observe the host is Enabled
# 3. Make the host Disabled
# 4. Verify the host does not get auto-enabled after the previous step
"""
selected_host = self.select_host_for_health_checks()
if not selected_host:
self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature")
username = self.hostConfig["username"]
password = self.hostConfig["password"]
# Enable the Auto Enable/Disable Configuration
self.update_config("true")
# Set health check script for failure
self.update_health_check_script(selected_host.ipaddress, username, password, 0)
self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200)
# Manually disable the host
self.update_host_allocation_state(selected_host.id, False)
# Wait for more than the ping interval
time.sleep(70)
# Verify the host continues on Disabled state
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Restore the host to Enabled state
self.remove_host_health_check(selected_host.ipaddress, username, password)
self.update_host_allocation_state(selected_host.id, True)
@attr(tags=["basic", "advanced"], required_hardware="false")
@skipTestIf("hypervisorNotSupported")
def test_03_enable_host_does_not_override_auto_disable_kvm_host(self):
"""Test to override the auto-disabling of a KVM host by an administrator
# Validate the following:
# 1. Enable the KVM Auto Enable/Disable Feature
# 2. Set a health check script that fails and observe the host is Disabled
# 3. Make the host Enabled
# 4. Verify the host does get auto-disabled after the previous step
"""
selected_host = self.select_host_for_health_checks()
if not selected_host:
self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature")
username = self.hostConfig["username"]
password = self.hostConfig["password"]
# Enable the Auto Enable/Disable Configuration
self.update_config("true")
# Set health check script for failure
self.update_health_check_script(selected_host.ipaddress, username, password, 1)
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Manually enable the host
self.update_host_allocation_state(selected_host.id, True)
# Verify the host goes back to Disabled state
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Restore the host to Enabled state
self.remove_host_health_check(selected_host.ipaddress, username, password)
self.update_host_allocation_state(selected_host.id, True)

View File

@ -98,8 +98,9 @@ export default {
label: 'label.disable.host', label: 'label.disable.host',
message: 'message.confirm.disable.host', message: 'message.confirm.disable.host',
dataView: true, dataView: true,
defaultArgs: { allocationstate: 'Disable' }, show: (record) => { return record.resourcestate === 'Enabled' },
show: (record) => { return record.resourcestate === 'Enabled' } popup: true,
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable')))
}, },
{ {
api: 'updateHost', api: 'updateHost',
@ -107,8 +108,9 @@ export default {
label: 'label.enable.host', label: 'label.enable.host',
message: 'message.confirm.enable.host', message: 'message.confirm.enable.host',
dataView: true, dataView: true,
defaultArgs: { allocationstate: 'Enable' }, show: (record) => { return record.resourcestate === 'Disabled' },
show: (record) => { return record.resourcestate === 'Disabled' } popup: true,
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable')))
}, },
{ {
api: 'prepareHostForMaintenance', api: 'prepareHostForMaintenance',

View File

@ -0,0 +1,133 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
<template>
<div class="form-layout">
<a-form
:ref="formRef"
:model="form"
:rules="rules"
@finish="handleSubmit"
v-ctrl-enter="handleSubmit"
class="form"
layout="vertical"
>
<a-alert type="warning">
<template #message>
<span v-html="$t('message.confirm.enable.host')" />
</template>
</a-alert>
<div v-show="enableKVMAutoEnableDisableSetting" class="reason">
<a-form-item
class="form__item"
name="reason"
ref="reason"
:label="'The setting \'enable.kvm.host.auto.enable.disable\' is enabled, ' +
' can specify a reason for ' + (resourcestate === 'Enabled' ? 'disabling' : 'enabling') + ' this host'">
<a-textarea
v-model:value="form.reason"
:placeholder="'(Optional) Reason to ' + (resourcestate === 'Enabled' ? 'disable' : 'enable') + ' this host'"
rows="3"
/>
</a-form-item>
</div>
<div :span="24" class="action-button">
<a-button @click="$emit('close-action')">{{ $t('label.cancel') }}</a-button>
<a-button type="primary" @click="handleSubmit" ref="submit">{{ $t('label.ok') }}</a-button>
</div>
</a-form>
</div>
</template>
<script>
import { ref, reactive, toRaw } from 'vue'
import { api } from '@/api'
export default {
name: 'HostEnableDisable',
props: {
resource: {
type: Object,
required: true
}
},
data () {
return {
resourcestate: '',
allocationstate: '',
enableKVMAutoEnableDisableSetting: false
}
},
created () {
this.initForm()
this.fetchAutoEnableDisableKVMSetting()
this.resourcestate = this.resource.resourcestate
this.allocationstate = this.resourcestate === 'Enabled' ? 'Disable' : 'Enable'
},
beforeCreate () {
},
methods: {
initForm () {
this.formRef = ref()
this.form = reactive({})
this.rules = reactive({})
},
fetchAutoEnableDisableKVMSetting () {
if (this.resource.hypervisor !== 'KVM') {
return
}
api('listConfigurations', { name: 'enable.kvm.host.auto.enable.disable', clusterid: this.resource.clusterid }).then(json => {
if (json.listconfigurationsresponse.configuration[0]) {
this.enableKVMAutoEnableDisableSetting = json.listconfigurationsresponse.configuration[0].value
}
})
},
handleSubmit (e) {
e.preventDefault()
this.formRef.value.validate().then(() => {
const values = toRaw(this.form)
var data = {
allocationstate: this.allocationstate,
id: this.resource.id
}
if (values.reason) {
data.annotation = values.reason
}
api('updateHost', data).then(_ => {
this.$emit('close-action')
})
})
}
}
}
</script>
<style scoped>
.reason {
padding-top: 20px
}
.form-layout {
width: 30vw;
@media (min-width: 500px) {
width: 450px;
}
}
</style>