Auto Enable/Disable KVM hosts (#7170)

* Auto Enable Disable KVM hosts

* Improve health check result

* Fix corner cases

* Script path refactor

* Fix sonar cloud reports

* Fix last code smells

* Add marvin tests

* Fix new line on agent.properties to prevent host add failures

* Send alert on auto-enable-disable and add annotations when the setting is enabled

* Address reviews

* Add a reason for enabling or disabling a host when the automatic feature is enabled

* Fix comment on the marvin test description

* Fix for disabling the feature if the admin has manually updated the host resource state before any health check result
This commit is contained in:
Nicolas Vazquez 2023-04-04 08:33:37 -03:00 committed by GitHub
parent 82a6a1f6c4
commit be66eb2a35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 688 additions and 62 deletions

View File

@ -398,3 +398,7 @@ iscsi.session.cleanup.enabled=false
# The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1
# iothreads=1
# The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host
# depending on the return value of the file/script
# agent.health.check.script.path=

View File

@ -312,6 +312,9 @@ public class AgentProperties{
*/
public static final Property<String> OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class);
public static final Property<String> HEALTH_CHECK_SCRIPT_PATH =
new Property<>("agent.health.check.script.path", null, String.class);
/**
* Sets the hypervisor type.<br>
* Possible values: kvm | lxc <br>

View File

@ -49,6 +49,8 @@ public interface ResourceService {
*/
Host updateHost(UpdateHostCmd cmd) throws NoTransitionException;
Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException;
Host cancelMaintenance(CancelMaintenanceCmd cmd);
Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException;

View File

@ -1020,6 +1020,7 @@ public class ApiConstants {
public static final String PUBLIC_MTU = "publicmtu";
public static final String PRIVATE_MTU = "privatemtu";
public static final String MTU = "mtu";
public static final String AUTO_ENABLE_KVM_HOST = "autoenablekvmhost";
public static final String LIST_APIS = "listApis";
/**

View File

@ -19,7 +19,6 @@ package org.apache.cloudstack.api.command.admin.host;
import com.cloud.host.Host;
import com.cloud.user.Account;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.annotation.AnnotationService;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
@ -117,9 +116,6 @@ public class UpdateHostCmd extends BaseCmd {
Host result;
try {
result = _resourceService.updateHost(this);
if(getAnnotation() != null) {
annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true);
}
HostResponse hostResponse = _responseGenerator.createHostResponse(result);
hostResponse.setResponseName(getCommandName());
this.setResponseObject(hostResponse);

View File

@ -29,6 +29,7 @@ public class PingRoutingCommand extends PingCommand {
boolean _gatewayAccessible = true;
boolean _vnetAccessible = true;
private Boolean hostHealthCheckResult;
protected PingRoutingCommand() {
}
@ -57,4 +58,12 @@ public class PingRoutingCommand extends PingCommand {
public void setVnetAccessible(boolean vnetAccessible) {
_vnetAccessible = vnetAccessible;
}
public Boolean getHostHealthCheckResult() {
return hostHealthCheckResult;
}
public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
this.hostHealthCheckResult = hostHealthCheckResult;
}
}

View File

@ -44,6 +44,7 @@ public class StartupRoutingCommand extends StartupCommand {
List<String> hostTags = new ArrayList<String>();
String hypervisorVersion;
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>();
private Boolean hostHealthCheckResult;
public StartupRoutingCommand() {
super(Host.Type.Routing);
@ -188,4 +189,12 @@ public class StartupRoutingCommand extends StartupCommand {
public void setSupportsClonedVolumes(boolean supportsClonedVolumes) {
this.supportsClonedVolumes = supportsClonedVolumes;
}
public Boolean getHostHealthCheckResult() {
return hostHealthCheckResult;
}
public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
this.hostHealthCheckResult = hostHealthCheckResult;
}
}

View File

@ -39,6 +39,13 @@ import com.cloud.resource.ServerResource;
public interface AgentManager {
static final ConfigKey<Integer> Wait = new ConfigKey<Integer>("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return",
true);
ConfigKey<Boolean> EnableKVMAutoEnableDisable = new ConfigKey<>(Boolean.class,
"enable.kvm.host.auto.enable.disable",
"Advanced",
"false",
"(KVM only) Enable Auto Disable/Enable KVM hosts in the cluster " +
"according to the hosts health check results",
true, ConfigKey.Scope.Cluster, null);
public enum TapAgentsAction {
Add, Del, Contains,

View File

@ -51,6 +51,7 @@ import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.MDC;
@ -1250,6 +1251,52 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
super(type, link, data);
}
private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) {
if (hostHealthCheckResult == null) {
return;
}
HostVO host = _hostDao.findById(hostId);
if (host == null) {
s_logger.error(String.format("Unable to find host with ID: %s", hostId));
return;
}
if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) {
s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " +
"received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName()));
return;
}
ResourceState.Event resourceEvent = hostHealthCheckResult ? ResourceState.Event.Enable : ResourceState.Event.Disable;
try {
s_logger.info(String.format("Host health check %s, auto %s KVM host: %s",
hostHealthCheckResult ? "succeeds" : "fails",
hostHealthCheckResult ? "enabling" : "disabling",
host.getName()));
_resourceMgr.autoUpdateHostAllocationState(hostId, resourceEvent);
} catch (NoTransitionException e) {
s_logger.error(String.format("Cannot Auto %s host: %s", resourceEvent, host.getName()), e);
}
}
private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) {
if (startup == null) {
s_logger.error("Empty StartupRoutingCommand received");
return;
}
Boolean hostHealthCheckResult = startup.getHostHealthCheckResult();
processHostHealthCheckResult(hostHealthCheckResult, hostId);
}
private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) {
if (pingRoutingCommand == null) {
s_logger.error("Empty PingRoutingCommand received");
return;
}
Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult();
processHostHealthCheckResult(hostHealthCheckResult, hostId);
}
protected void processRequest(final Link link, final Request request) {
final AgentAttache attache = (AgentAttache)link.attachment();
final Command[] cmds = request.getCommands();
@ -1291,6 +1338,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
try {
if (cmd instanceof StartupRoutingCommand) {
final StartupRoutingCommand startup = (StartupRoutingCommand) cmd;
processStartupRoutingCommand(startup, hostId);
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
} else if (cmd instanceof StartupProxyCommand) {
final StartupProxyCommand startup = (StartupProxyCommand) cmd;
@ -1322,6 +1370,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
// if the router is sending a ping, verify the
// gateway was pingable
if (cmd instanceof PingRoutingCommand) {
processPingRoutingCommand((PingRoutingCommand) cmd, hostId);
final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible();
final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));
@ -1748,8 +1797,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize,
DirectAgentThreadCap };
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize,
DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable };
}
protected class SetHostParamsListener implements Listener {

View File

@ -322,6 +322,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
private String _dcId;
private String _clusterId;
private final Properties _uefiProperties = new Properties();
private String hostHealthCheckScriptPath;
private long _hvVersion;
private Duration _timeout;
@ -717,6 +718,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
NATIVE, OPENVSWITCH, TUNGSTEN
}
protected enum HealthCheckResult {
SUCCESS, FAILURE, IGNORE
}
protected BridgeType _bridgeType;
protected StorageSubsystemCommandHandler storageHandler;
@ -943,6 +948,12 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh");
}
hostHealthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH);
if (StringUtils.isNotBlank(hostHealthCheckScriptPath) && !new File(hostHealthCheckScriptPath).exists()) {
s_logger.info(String.format("Unable to find the host health check script at: %s, " +
"discarding it", hostHealthCheckScriptPath));
}
setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh");
if (setupTungstenVrouterPath == null) {
throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh");
@ -3436,13 +3447,54 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
@Override
public PingCommand getCurrentStatus(final long id) {
PingRoutingCommand pingRoutingCommand;
if (!_canBridgeFirewall) {
return new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
pingRoutingCommand = new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
} else {
final HashMap<String, Pair<Long, Long>> nwGrpStates = syncNetworkGroups(id);
return new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
}
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
if (healthCheckResult != HealthCheckResult.IGNORE) {
pingRoutingCommand.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
}
return pingRoutingCommand;
}
/**
* The health check result is true, if the script is executed successfully and the exit code is 0
* The health check result is false, if the script is executed successfully and the exit code is 1
* The health check result is null, if
* - Script file is not specified, or
* - Script file does not exist, or
* - Script file is not accessible by the user of the cloudstack-agent process, or
* - Script file is not executable
* - There are errors when the script is executed (exit codes other than 0 or 1)
*/
private HealthCheckResult getHostHealthCheckResult() {
if (StringUtils.isBlank(hostHealthCheckScriptPath)) {
s_logger.debug("Host health check script path is not specified");
return HealthCheckResult.IGNORE;
}
File script = new File(hostHealthCheckScriptPath);
if (!script.exists() || !script.isFile() || !script.canExecute()) {
s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " +
"reason: %s", hostHealthCheckScriptPath,
!script.exists() ? "file does not exist" : "please check file permissions to execute this file"));
return HealthCheckResult.IGNORE;
}
int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath);
if (s_logger.isDebugEnabled()) {
s_logger.debug(String.format("Host health check script exit code: %s", exitCode));
}
return retrieveHealthCheckResultFromExitCode(exitCode);
}
private HealthCheckResult retrieveHealthCheckResultFromExitCode(int exitCode) {
if (exitCode != 0 && exitCode != 1) {
return HealthCheckResult.IGNORE;
}
return exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE;
}
@Override
@ -3484,6 +3536,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
cmd.setGatewayIpAddress(_localGateway);
cmd.setIqn(getIqn());
cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption()));
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
if (healthCheckResult != HealthCheckResult.IGNORE) {
cmd.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
}
if (cmd.getHostDetails().containsKey("Host.OS")) {
_hostDistro = cmd.getHostDetails().get("Host.OS");

View File

@ -36,8 +36,10 @@ import java.util.Random;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.alert.AlertManager;
import com.cloud.exception.StorageConflictException;
import com.cloud.exception.StorageUnavailableException;
import org.apache.cloudstack.alert.AlertService;
import org.apache.cloudstack.annotation.AnnotationService;
import org.apache.cloudstack.annotation.dao.AnnotationDao;
import org.apache.cloudstack.api.ApiConstants;
@ -294,6 +296,10 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
private UserVmDetailsDao userVmDetailsDao;
@Inject
private AnnotationDao annotationDao;
@Inject
private AlertManager alertManager;
@Inject
private AnnotationService annotationService;
private final long _nodeId = ManagementServerNode.getManagementServerId();
@ -1774,73 +1780,149 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
return hostInMaintenance;
}
private ResourceState.Event getResourceEventFromAllocationStateString(String allocationState) {
final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(allocationState);
if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) {
throw new InvalidParameterValueException(String.format("Invalid allocation state: %s, " +
"only Enable/Disable are allowed", allocationState));
}
return resourceEvent;
}
private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting,
boolean isUpdateFromHostHealthCheck,
HostVO host, DetailVO hostDetail,
ResourceState.Event resourceEvent) {
if (autoEnableDisableKVMSetting) {
if (!isUpdateFromHostHealthCheck && hostDetail != null &&
!Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) {
hostDetail.setValue(Boolean.TRUE.toString());
_hostDetailsDao.update(hostDetail.getId(), hostDetail);
} else if (!isUpdateFromHostHealthCheck && hostDetail != null &&
Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Disable) {
s_logger.info(String.format("The setting %s is enabled but the host %s is manually set into %s state," +
"ignoring future auto enabling of the host based on health check results",
AgentManager.EnableKVMAutoEnableDisable.key(), host.getName(), resourceEvent));
hostDetail.setValue(Boolean.FALSE.toString());
_hostDetailsDao.update(hostDetail.getId(), hostDetail);
} else if (hostDetail == null) {
String autoEnableValue = !isUpdateFromHostHealthCheck ? Boolean.FALSE.toString() : Boolean.TRUE.toString();
hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, autoEnableValue);
_hostDetailsDao.persist(hostDetail);
}
}
}
private boolean updateHostAllocationState(HostVO host, String allocationState,
boolean isUpdateFromHostHealthCheck) throws NoTransitionException {
boolean autoEnableDisableKVMSetting = AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) &&
host.getHypervisorType() == HypervisorType.KVM;
ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState);
DetailVO hostDetail = _hostDetailsDao.findDetail(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST);
if ((host.getResourceState() == ResourceState.Enabled && resourceEvent == ResourceState.Event.Enable) ||
(host.getResourceState() == ResourceState.Disabled && resourceEvent == ResourceState.Event.Disable)) {
s_logger.info(String.format("The host %s is already on the allocated state", host.getName()));
return false;
}
if (isAutoEnableAttemptForADisabledHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, hostDetail, resourceEvent)) {
s_logger.debug(String.format("The setting '%s' is enabled and the health check succeeds on the host, " +
"but the host has been manually disabled previously, ignoring auto enabling",
AgentManager.EnableKVMAutoEnableDisable.key()));
return false;
}
handleAutoEnableDisableKVMHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, host,
hostDetail, resourceEvent);
resourceStateTransitTo(host, resourceEvent, _nodeId);
return true;
}
private boolean isAutoEnableAttemptForADisabledHost(boolean autoEnableDisableKVMSetting,
boolean isUpdateFromHostHealthCheck,
DetailVO hostDetail, ResourceState.Event resourceEvent) {
return autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null &&
!Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable;
}
private void updateHostName(HostVO host, String name) {
s_logger.debug("Updating Host name to: " + name);
host.setName(name);
_hostDao.update(host.getId(), host);
}
private void updateHostGuestOSCategory(Long hostId, Long guestOSCategoryId) {
// Verify that the guest OS Category exists
if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) {
throw new InvalidParameterValueException("Please specify a valid guest OS category.");
}
final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId);
final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id");
if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) {
// Create/Update an entry for guest.os.category.id
if (guestOSDetail != null) {
guestOSDetail.setValue(String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail);
} else {
final Map<String, String> detail = new HashMap<String, String>();
detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.persist(hostId, detail);
}
} else {
// Delete any existing entry for guest.os.category.id
if (guestOSDetail != null) {
_hostDetailsDao.remove(guestOSDetail.getId());
}
}
}
private void updateHostTags(HostVO host, Long hostId, List<String> hostTags) {
List<VMInstanceVO> activeVMs = _vmDao.listByHostId(hostId);
s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. " +
"Updating the host tags will not affect them.", activeVMs, host));
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating Host Tags to :" + hostTags);
}
_hostTagsDao.persist(hostId, new ArrayList<>(new HashSet<>(hostTags)));
}
@Override
public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException {
Long hostId = cmd.getId();
String name = cmd.getName();
Long guestOSCategoryId = cmd.getOsCategoryId();
return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(),
cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), cmd.getAnnotation(), false);
}
private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String allocationState,
String url, List<String> hostTags, String annotation, boolean isUpdateFromHostHealthCheck) throws NoTransitionException {
// Verify that the host exists
final HostVO host = _hostDao.findById(hostId);
if (host == null) {
throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist");
}
if (cmd.getAllocationState() != null) {
final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(cmd.getAllocationState());
if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) {
throw new CloudRuntimeException("Invalid allocation state:" + cmd.getAllocationState() + ", only Enable/Disable are allowed");
}
resourceStateTransitTo(host, resourceEvent, _nodeId);
boolean isUpdateHostAllocation = false;
if (StringUtils.isNotBlank(allocationState)) {
isUpdateHostAllocation = updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck);
}
if (StringUtils.isNotBlank(name)) {
s_logger.debug("Updating Host name to: " + name);
host.setName(name);
_hostDao.update(host.getId(), host);
updateHostName(host, name);
}
if (guestOSCategoryId != null) {
// Verify that the guest OS Category exists
if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) {
throw new InvalidParameterValueException("Please specify a valid guest OS category.");
}
final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId);
final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id");
if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) {
// Create/Update an entry for guest.os.category.id
if (guestOSDetail != null) {
guestOSDetail.setValue(String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail);
} else {
final Map<String, String> detail = new HashMap<String, String>();
detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId()));
_hostDetailsDao.persist(hostId, detail);
}
} else {
// Delete any existing entry for guest.os.category.id
if (guestOSDetail != null) {
_hostDetailsDao.remove(guestOSDetail.getId());
}
}
updateHostGuestOSCategory(hostId, guestOSCategoryId);
}
final List<String> hostTags = cmd.getHostTags();
if (hostTags != null) {
List<VMInstanceVO> activeVMs = _vmDao.listByHostId(hostId);
s_logger.warn(String.format("The following active VMs [%s] are using the host [%s]. Updating the host tags will not affect them.", activeVMs, host));
if (s_logger.isDebugEnabled()) {
s_logger.debug("Updating Host Tags to :" + hostTags);
}
_hostTagsDao.persist(hostId, new ArrayList(new HashSet<String>(hostTags)));
updateHostTags(host, hostId, hostTags);
}
final String url = cmd.getUrl();
if (url != null) {
_storageMgr.updateSecondaryStorage(cmd.getId(), cmd.getUrl());
_storageMgr.updateSecondaryStorage(hostId, url);
}
try {
_storageMgr.enableHost(hostId);
@ -1849,9 +1931,55 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
final HostVO updatedHost = _hostDao.findById(hostId);
sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState,
isUpdateFromHostHealthCheck, isUpdateHostAllocation, annotation);
return updatedHost;
}
private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState,
boolean isUpdateFromHostHealthCheck,
boolean isUpdateHostAllocation, String annotation) {
boolean isAutoEnableDisableKVMSettingEnabled = host.getHypervisorType() == HypervisorType.KVM &&
AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId());
if (!isAutoEnableDisableKVMSettingEnabled) {
if (StringUtils.isNotBlank(annotation)) {
annotationService.addAnnotation(annotation, AnnotationService.EntityType.HOST, host.getUuid(), true);
}
return;
}
if (!isUpdateHostAllocation) {
return;
}
String msg = String.format("The host %s (%s) ", host.getName(), host.getUuid());
ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState);
boolean isEventEnable = resourceEvent == ResourceState.Event.Enable;
if (isUpdateFromHostHealthCheck) {
msg += String.format("is auto-%s after %s health check results",
isEventEnable ? "enabled" : "disabled",
isEventEnable ? "successful" : "failed");
alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(),
host.getPodId(), msg, msg);
} else {
msg += String.format("is %s despite the setting '%s' is enabled for the cluster %s",
isEventEnable ? "enabled" : "disabled", AgentManager.EnableKVMAutoEnableDisable.key(),
host.getClusterId());
if (StringUtils.isNotBlank(annotation)) {
msg += String.format(", reason: %s", annotation);
}
}
annotationService.addAnnotation(msg, AnnotationService.EntityType.HOST, host.getUuid(), true);
}
@Override
public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException {
return updateHost(hostId, null, null, resourceEvent.toString(), null, null, null, true);
}
@Override
public Cluster getCluster(final Long clusterId) {
return _clusterDao.findById(clusterId);

View File

@ -73,6 +73,11 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
return null;
}
@Override
public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException {
return null;
}
/* (non-Javadoc)
* @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd)
*/

View File

@ -20,7 +20,7 @@
Tests for host control state
"""
from marvin.cloudstackAPI import updateHost
from marvin.cloudstackAPI import (updateHost, updateConfiguration)
from nose.plugins.attrib import attr
from marvin.cloudstackTestCase import cloudstackTestCase
from marvin.lib.common import (get_domain,
@ -28,13 +28,18 @@ from marvin.lib.common import (get_domain,
get_template,
list_hosts,
list_routers,
list_ssvms)
list_ssvms,
list_clusters,
list_hosts)
from marvin.lib.base import (Account,
Domain,
Host,
ServiceOffering,
VirtualMachine)
from marvin.sshClient import SshClient
from marvin.lib.decoratorGenerators import skipTestIf
from marvin.lib.utils import wait_until
import logging
import time
@ -250,3 +255,220 @@ class TestHostControlState(cloudstackTestCase):
self.enable_host(host_id)
self.verify_router_host_control_state(router.id, "Enabled")
class TestAutoEnableDisableHost(cloudstackTestCase):
@classmethod
def setUpClass(cls):
cls.testClient = super(TestAutoEnableDisableHost, cls).getClsTestClient()
cls.apiclient = cls.testClient.getApiClient()
cls.services = cls.testClient.getParsedTestDataConfig()
# Get Zone, Domain and templates
cls.zone = get_zone(cls.apiclient, cls.testClient.getZoneForTests())
cls.hypervisor = cls.testClient.getHypervisorInfo()
cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
if cls.hypervisor.lower() not in ['kvm']:
cls.hypervisorNotSupported = True
return
cls.logger = logging.getLogger('TestAutoEnableDisableHost')
return
@classmethod
def tearDownClass(cls):
super(TestAutoEnableDisableHost, cls).tearDownClass()
def tearDown(self):
super(TestAutoEnableDisableHost, self).tearDown()
def get_ssh_client(self, ip, username, password, retries=10):
""" Setup ssh client connection and return connection """
try:
ssh_client = SshClient(ip, 22, username, password, retries)
except Exception as e:
raise unittest.SkipTest("Unable to create ssh connection: " % e)
self.assertIsNotNone(
ssh_client, "Failed to setup ssh connection to ip=%s" % ip)
return ssh_client
def wait_until_host_is_in_state(self, hostid, resourcestate, interval=3, retries=20):
def check_resource_state():
response = Host.list(
self.apiclient,
id=hostid
)
if isinstance(response, list):
if response[0].resourcestate == resourcestate:
self.logger.debug('Host with id %s is in resource state = %s' % (hostid, resourcestate))
return True, None
else:
self.logger.debug("Waiting for host " + hostid +
" to reach state " + resourcestate +
", with current state " + response[0].resourcestate)
return False, None
done, _ = wait_until(interval, retries, check_resource_state)
if not done:
raise Exception("Failed to wait for host %s to be on resource state %s" % (hostid, resourcestate))
return True
def update_config(self, enable_feature):
cmd = updateConfiguration.updateConfigurationCmd()
cmd.name = "enable.kvm.host.auto.enable.disable"
cmd.value = enable_feature
response = self.apiclient.updateConfiguration(cmd)
self.debug("updated the parameter %s with value %s" % (response.name, response.value))
def update_health_check_script(self, ip_address, username, password, exit_code):
health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh"
health_check_agent_property = "agent.health.check.script.path"
agent_properties_file_path = "/etc/cloudstack/agent/agent.properties"
ssh_client = self.get_ssh_client(ip_address, username, password)
ssh_client.execute("echo 'exit %s' > %s" % (exit_code, health_check_script_path))
ssh_client.execute("chmod +x %s" % health_check_script_path)
ssh_client.execute("echo '%s=%s' >> %s" % (health_check_agent_property, health_check_script_path,
agent_properties_file_path))
ssh_client.execute("service cloudstack-agent restart")
def remove_host_health_check(self, ip_address, username, password):
health_check_script_path = "/etc/cloudstack/agent/healthcheck.sh"
ssh_client = self.get_ssh_client(ip_address, username, password)
ssh_client.execute("rm -f %s" % health_check_script_path)
def select_host_for_health_checks(self):
clusters = list_clusters(
self.apiclient,
zoneid=self.zone.id
)
if not clusters:
return None
for cluster in clusters:
list_hosts_response = list_hosts(
self.apiclient,
clusterid=cluster.id,
type="Routing",
resourcestate="Enabled"
)
assert isinstance(list_hosts_response, list)
if not list_hosts_response or len(list_hosts_response) < 1:
continue
return list_hosts_response[0]
return None
def update_host_allocation_state(self, id, enable):
cmd = updateHost.updateHostCmd()
cmd.id = id
cmd.allocationstate = "Enable" if enable else "Disable"
response = self.apiclient.updateHost(cmd)
self.assertEqual(response.resourcestate, "Enabled" if enable else "Disabled")
@attr(tags=["basic", "advanced"], required_hardware="false")
@skipTestIf("hypervisorNotSupported")
def test_01_auto_enable_disable_kvm_host(self):
"""Test to auto-enable and auto-disable a KVM host based on health check results
# Validate the following:
# 1. Enable the KVM Auto Enable/Disable Feature
# 2. Set a health check script that fails and observe the host is Disabled
# 3. Make the health check script succeed and observe the host is Enabled
"""
selected_host = self.select_host_for_health_checks()
if not selected_host:
self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature")
username = self.hostConfig["username"]
password = self.hostConfig["password"]
# Enable the Auto Enable/Disable Configuration
self.update_config("true")
# Set health check script for failure
self.update_health_check_script(selected_host.ipaddress, username, password, 1)
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Set health check script for success
self.update_health_check_script(selected_host.ipaddress, username, password, 0)
self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200)
@attr(tags=["basic", "advanced"], required_hardware="false")
@skipTestIf("hypervisorNotSupported")
def test_02_disable_host_overrides_auto_enable_kvm_host(self):
"""Test to override the auto-enabling of a KVM host by an administrator
# Validate the following:
# 1. Enable the KVM Auto Enable/Disable Feature
# 2. Set a health check script that succeeds and observe the host is Enabled
# 3. Make the host Disabled
# 4. Verify the host does not get auto-enabled after the previous step
"""
selected_host = self.select_host_for_health_checks()
if not selected_host:
self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature")
username = self.hostConfig["username"]
password = self.hostConfig["password"]
# Enable the Auto Enable/Disable Configuration
self.update_config("true")
# Set health check script for failure
self.update_health_check_script(selected_host.ipaddress, username, password, 0)
self.wait_until_host_is_in_state(selected_host.id, "Enabled", 5, 200)
# Manually disable the host
self.update_host_allocation_state(selected_host.id, False)
# Wait for more than the ping interval
time.sleep(70)
# Verify the host continues on Disabled state
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Restore the host to Enabled state
self.remove_host_health_check(selected_host.ipaddress, username, password)
self.update_host_allocation_state(selected_host.id, True)
@attr(tags=["basic", "advanced"], required_hardware="false")
@skipTestIf("hypervisorNotSupported")
def test_03_enable_host_does_not_override_auto_disable_kvm_host(self):
"""Test to override the auto-disabling of a KVM host by an administrator
# Validate the following:
# 1. Enable the KVM Auto Enable/Disable Feature
# 2. Set a health check script that fails and observe the host is Disabled
# 3. Make the host Enabled
# 4. Verify the host does get auto-disabled after the previous step
"""
selected_host = self.select_host_for_health_checks()
if not selected_host:
self.skipTest("Cannot find a KVM host to test the auto-enable-disable feature")
username = self.hostConfig["username"]
password = self.hostConfig["password"]
# Enable the Auto Enable/Disable Configuration
self.update_config("true")
# Set health check script for failure
self.update_health_check_script(selected_host.ipaddress, username, password, 1)
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Manually enable the host
self.update_host_allocation_state(selected_host.id, True)
# Verify the host goes back to Disabled state
self.wait_until_host_is_in_state(selected_host.id, "Disabled", 5, 200)
# Restore the host to Enabled state
self.remove_host_health_check(selected_host.ipaddress, username, password)
self.update_host_allocation_state(selected_host.id, True)

View File

@ -98,8 +98,9 @@ export default {
label: 'label.disable.host',
message: 'message.confirm.disable.host',
dataView: true,
defaultArgs: { allocationstate: 'Disable' },
show: (record) => { return record.resourcestate === 'Enabled' }
show: (record) => { return record.resourcestate === 'Enabled' },
popup: true,
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable')))
},
{
api: 'updateHost',
@ -107,8 +108,9 @@ export default {
label: 'label.enable.host',
message: 'message.confirm.enable.host',
dataView: true,
defaultArgs: { allocationstate: 'Enable' },
show: (record) => { return record.resourcestate === 'Disabled' }
show: (record) => { return record.resourcestate === 'Disabled' },
popup: true,
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/HostEnableDisable')))
},
{
api: 'prepareHostForMaintenance',

View File

@ -0,0 +1,133 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
<template>
<div class="form-layout">
<a-form
:ref="formRef"
:model="form"
:rules="rules"
@finish="handleSubmit"
v-ctrl-enter="handleSubmit"
class="form"
layout="vertical"
>
<a-alert type="warning">
<template #message>
<span v-html="$t('message.confirm.enable.host')" />
</template>
</a-alert>
<div v-show="enableKVMAutoEnableDisableSetting" class="reason">
<a-form-item
class="form__item"
name="reason"
ref="reason"
:label="'The setting \'enable.kvm.host.auto.enable.disable\' is enabled, ' +
' can specify a reason for ' + (resourcestate === 'Enabled' ? 'disabling' : 'enabling') + ' this host'">
<a-textarea
v-model:value="form.reason"
:placeholder="'(Optional) Reason to ' + (resourcestate === 'Enabled' ? 'disable' : 'enable') + ' this host'"
rows="3"
/>
</a-form-item>
</div>
<div :span="24" class="action-button">
<a-button @click="$emit('close-action')">{{ $t('label.cancel') }}</a-button>
<a-button type="primary" @click="handleSubmit" ref="submit">{{ $t('label.ok') }}</a-button>
</div>
</a-form>
</div>
</template>
<script>
import { ref, reactive, toRaw } from 'vue'
import { api } from '@/api'
export default {
name: 'HostEnableDisable',
props: {
resource: {
type: Object,
required: true
}
},
data () {
return {
resourcestate: '',
allocationstate: '',
enableKVMAutoEnableDisableSetting: false
}
},
created () {
this.initForm()
this.fetchAutoEnableDisableKVMSetting()
this.resourcestate = this.resource.resourcestate
this.allocationstate = this.resourcestate === 'Enabled' ? 'Disable' : 'Enable'
},
beforeCreate () {
},
methods: {
initForm () {
this.formRef = ref()
this.form = reactive({})
this.rules = reactive({})
},
fetchAutoEnableDisableKVMSetting () {
if (this.resource.hypervisor !== 'KVM') {
return
}
api('listConfigurations', { name: 'enable.kvm.host.auto.enable.disable', clusterid: this.resource.clusterid }).then(json => {
if (json.listconfigurationsresponse.configuration[0]) {
this.enableKVMAutoEnableDisableSetting = json.listconfigurationsresponse.configuration[0].value
}
})
},
handleSubmit (e) {
e.preventDefault()
this.formRef.value.validate().then(() => {
const values = toRaw(this.form)
var data = {
allocationstate: this.allocationstate,
id: this.resource.id
}
if (values.reason) {
data.annotation = values.reason
}
api('updateHost', data).then(_ => {
this.$emit('close-action')
})
})
}
}
}
</script>
<style scoped>
.reason {
padding-top: 20px
}
.form-layout {
width: 30vw;
@media (min-width: 500px) {
width: 450px;
}
}
</style>