Management Server - Prepare for Maintenance and Cancel Maintenance improvements (#10995)

* Management Server - Prepare for Maintenance and Cancel Maintenance improvements:
- Added new setting 'management.server.maintenance.ignore.maintenance.hosts' to ignore hosts in maintenance states  while preparing management server for maintenance. This skips agent transfer and agents count check for hosts in maintenance.
- Rebalance indirect agents after cancel maintenance, using rebalance parameter in cancelMaintenance API
- Force maintenance after maintenance window timeout, using forced parameter in prepareForMaintenance API.
- Propagate 'indirect.agent.lb.check.interval' setting change to the host agents.

* rebases fixes

* code improvements, cleanup

* [UI] Set rebalance true by default in cancel maintenance dialog

* Update MS state after executing cluster cmd in the target MS, and some code improvements

* code improvements

* Ensure the host lb algorithm 'shuffle' is applied once before disabling the indirect agent lb check background task
This commit is contained in:
Suresh Kumar Anaparti 2025-07-03 12:17:04 +05:30 committed by GitHub
parent 16c60c7528
commit be22bfe2c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 411 additions and 145 deletions

View File

@ -453,22 +453,30 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
certExecutor.schedule(new PostCertificateRenewalTask(this), 5, TimeUnit.SECONDS); certExecutor.schedule(new PostCertificateRenewalTask(this), 5, TimeUnit.SECONDS);
} }
private void scheduleHostLBCheckerTask(final long checkInterval) { private void scheduleHostLBCheckerTask(final String lbAlgorithm, final long checkInterval) {
String name = "HostLBCheckerTask"; String name = "HostLBCheckerTask";
if (hostLbCheckExecutor != null && !hostLbCheckExecutor.isShutdown()) { if (hostLbCheckExecutor != null && !hostLbCheckExecutor.isShutdown()) {
logger.info("Shutting down the preferred host checker task {}", name);
hostLbCheckExecutor.shutdown(); hostLbCheckExecutor.shutdown();
try { try {
if (!hostLbCheckExecutor.awaitTermination(1, TimeUnit.SECONDS)) { if (!hostLbCheckExecutor.awaitTermination(1, TimeUnit.SECONDS)) {
hostLbCheckExecutor.shutdownNow(); hostLbCheckExecutor.shutdownNow();
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
logger.debug("Forcing {} shutdown as it did not shutdown in the desired time due to: {}", logger.debug("Forcing the preferred host checker task {} shutdown as it did not shutdown in the desired time due to: {}",
name, e.getMessage()); name, e.getMessage());
hostLbCheckExecutor.shutdownNow(); hostLbCheckExecutor.shutdownNow();
} }
} }
if (checkInterval > 0L) { if (checkInterval > 0L) {
logger.info("Scheduling preferred host task with host.lb.interval={}ms", checkInterval); if ("shuffle".equalsIgnoreCase(lbAlgorithm)) {
logger.info("Scheduling the preferred host checker task to trigger once (to apply lb algorithm '{}') after host.lb.interval={} ms", lbAlgorithm, checkInterval);
hostLbCheckExecutor = Executors.newSingleThreadScheduledExecutor((new NamedThreadFactory(name)));
hostLbCheckExecutor.schedule(new PreferredHostCheckerTask(), checkInterval, TimeUnit.MILLISECONDS);
return;
}
logger.info("Scheduling a recurring preferred host checker task with lb algorithm '{}' and host.lb.interval={} ms", lbAlgorithm, checkInterval);
hostLbCheckExecutor = Executors.newSingleThreadScheduledExecutor((new NamedThreadFactory(name))); hostLbCheckExecutor = Executors.newSingleThreadScheduledExecutor((new NamedThreadFactory(name)));
hostLbCheckExecutor.scheduleAtFixedRate(new PreferredHostCheckerTask(), checkInterval, checkInterval, hostLbCheckExecutor.scheduleAtFixedRate(new PreferredHostCheckerTask(), checkInterval, checkInterval,
TimeUnit.MILLISECONDS); TimeUnit.MILLISECONDS);
@ -928,7 +936,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
return new SetupCertificateAnswer(true); return new SetupCertificateAnswer(true);
} }
private void processManagementServerList(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) { private void processManagementServerList(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval, final boolean triggerHostLB) {
if (CollectionUtils.isNotEmpty(msList) && StringUtils.isNotEmpty(lbAlgorithm)) { if (CollectionUtils.isNotEmpty(msList) && StringUtils.isNotEmpty(lbAlgorithm)) {
try { try {
final String newMSHosts = String.format("%s%s%s", com.cloud.utils.StringUtils.toCSVList(msList), IAgentShell.hostLbAlgorithmSeparator, lbAlgorithm); final String newMSHosts = String.format("%s%s%s", com.cloud.utils.StringUtils.toCSVList(msList), IAgentShell.hostLbAlgorithmSeparator, lbAlgorithm);
@ -941,22 +949,24 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
} }
} }
shell.setAvoidHosts(avoidMsList); shell.setAvoidHosts(avoidMsList);
if ("shuffle".equals(lbAlgorithm)) { if (triggerHostLB) {
scheduleHostLBCheckerTask(0); logger.info("Triggering the preferred host checker task now");
} else { ScheduledExecutorService hostLbExecutor = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("HostLB-Executor"));
scheduleHostLBCheckerTask(shell.getLbCheckerInterval(lbCheckInterval)); hostLbExecutor.schedule(new PreferredHostCheckerTask(), 0, TimeUnit.MILLISECONDS);
hostLbExecutor.shutdown();
} }
scheduleHostLBCheckerTask(lbAlgorithm, shell.getLbCheckerInterval(lbCheckInterval));
} }
private Answer setupManagementServerList(final SetupMSListCommand cmd) { private Answer setupManagementServerList(final SetupMSListCommand cmd) {
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval()); processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval(), cmd.getTriggerHostLb());
return new SetupMSListAnswer(true); return new SetupMSListAnswer(true);
} }
private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) { private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) {
try { try {
if (CollectionUtils.isNotEmpty(cmd.getMsList())) { if (CollectionUtils.isNotEmpty(cmd.getMsList())) {
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval()); processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval(), false);
} }
Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("MigrateAgentConnection-Job")).schedule(() -> { Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("MigrateAgentConnection-Job")).schedule(() -> {
migrateAgentConnection(cmd.getAvoidMsList()); migrateAgentConnection(cmd.getAvoidMsList());
@ -1046,7 +1056,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
} }
verifyAgentArch(ready.getArch()); verifyAgentArch(ready.getArch());
processManagementServerList(ready.getMsHostList(), ready.getAvoidMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval()); processManagementServerList(ready.getMsHostList(), ready.getAvoidMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval(), false);
logger.info("Ready command is processed for agent [id: {}, uuid: {}, name: {}]", getId(), getUuid(), getName()); logger.info("Ready command is processed for agent [id: {}, uuid: {}, name: {}]", getId(), getUuid(), getName());
} }

View File

@ -40,7 +40,7 @@ public class OperationTimedoutException extends CloudException {
boolean _isActive; boolean _isActive;
public OperationTimedoutException(Command[] cmds, long agentId, long seqId, int time, boolean isActive) { public OperationTimedoutException(Command[] cmds, long agentId, long seqId, int time, boolean isActive) {
super("Commands " + seqId + " to Host " + agentId + " timed out after " + time); super("Commands " + seqId + " to Host " + agentId + " timed out after " + time + " secs");
_agentId = agentId; _agentId = agentId;
_seqId = seqId; _seqId = seqId;
_time = time; _time = time;

View File

@ -76,6 +76,10 @@ public enum ResourceState {
} }
} }
public static List<ResourceState> s_maintenanceStates = List.of(ResourceState.Maintenance,
ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance,
ResourceState.ErrorInPrepareForMaintenance);
public ResourceState getNextState(Event a) { public ResourceState getNextState(Event a) {
return s_fsm.getNextState(this, a); return s_fsm.getNextState(this, a);
} }
@ -98,8 +102,7 @@ public enum ResourceState {
} }
public static boolean isMaintenanceState(ResourceState state) { public static boolean isMaintenanceState(ResourceState state) {
return Arrays.asList(ResourceState.Maintenance, ResourceState.ErrorInMaintenance, return s_maintenanceStates.contains(state);
ResourceState.PrepareForMaintenance, ResourceState.ErrorInPrepareForMaintenance).contains(state);
} }
public static boolean canAttemptMaintenance(ResourceState state) { public static boolean canAttemptMaintenance(ResourceState state) {

View File

@ -441,6 +441,7 @@ public class ApiConstants {
public static final String PUBLIC_END_PORT = "publicendport"; public static final String PUBLIC_END_PORT = "publicendport";
public static final String PUBLIC_ZONE = "publiczone"; public static final String PUBLIC_ZONE = "publiczone";
public static final String PURGE_RESOURCES = "purgeresources"; public static final String PURGE_RESOURCES = "purgeresources";
public static final String REBALANCE = "rebalance";
public static final String RECEIVED_BYTES = "receivedbytes"; public static final String RECEIVED_BYTES = "receivedbytes";
public static final String RECONNECT = "reconnect"; public static final String RECONNECT = "reconnect";
public static final String RECOVER = "recover"; public static final String RECOVER = "recover";

View File

@ -46,7 +46,7 @@ public class PatchSystemVMCmd extends BaseAsyncCmd {
@Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN, @Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN,
description = "If true, initiates copy of scripts and restart of the agent, even if the scripts version matches." + description = "If true, initiates copy of scripts and restart of the agent, even if the scripts version matches." +
"To be used with ID parameter only") "To be used with ID parameter only")
private Boolean force; private Boolean forced;
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
/////////////////// Accessors /////////////////////// /////////////////// Accessors ///////////////////////
@ -58,7 +58,7 @@ public class PatchSystemVMCmd extends BaseAsyncCmd {
} }
public boolean isForced() { public boolean isForced() {
return force != null && force; return forced != null && forced;
} }
///////////////////////////////////////////////////// /////////////////////////////////////////////////////

View File

@ -29,13 +29,15 @@ public class SetupMSListCommand extends Command {
private List<String> avoidMsList; private List<String> avoidMsList;
private String lbAlgorithm; private String lbAlgorithm;
private Long lbCheckInterval; private Long lbCheckInterval;
private Boolean triggerHostLb;
public SetupMSListCommand(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) { public SetupMSListCommand(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval, final Boolean triggerHostLb) {
super(); super();
this.msList = msList; this.msList = msList;
this.avoidMsList = avoidMsList; this.avoidMsList = avoidMsList;
this.lbAlgorithm = lbAlgorithm; this.lbAlgorithm = lbAlgorithm;
this.lbCheckInterval = lbCheckInterval; this.lbCheckInterval = lbCheckInterval;
this.triggerHostLb = triggerHostLb;
} }
public List<String> getMsList() { public List<String> getMsList() {
@ -54,9 +56,12 @@ public class SetupMSListCommand extends Command {
return lbCheckInterval; return lbCheckInterval;
} }
public boolean getTriggerHostLb() {
return triggerHostLb;
}
@Override @Override
public boolean executeInSequence() { public boolean executeInSequence() {
return false; return false;
} }
} }

View File

@ -171,5 +171,5 @@ public interface AgentManager {
void propagateChangeToAgents(Map<String, String> params); void propagateChangeToAgents(Map<String, String> params);
boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs); boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs, boolean excludeHostsInMaintenance);
} }

View File

@ -273,8 +273,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
_executor = new ThreadPoolExecutor(agentTaskThreads, agentTaskThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentTaskPool")); _executor = new ThreadPoolExecutor(agentTaskThreads, agentTaskThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentTaskPool"));
initConnectExecutor();
maxConcurrentNewAgentConnections = RemoteAgentMaxConcurrentNewConnections.value(); maxConcurrentNewAgentConnections = RemoteAgentMaxConcurrentNewConnections.value();
_connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10, _connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10,
@ -828,6 +826,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
return true; return true;
} }
initConnectExecutor();
startDirectlyConnectedHosts(false); startDirectlyConnectedHosts(false);
if (_connection != null) { if (_connection != null) {
@ -2193,7 +2192,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
@Override @Override
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
return true; return true;
} }

View File

@ -42,6 +42,7 @@ import javax.naming.ConfigurationException;
import javax.net.ssl.SSLContext; import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLEngine; import javax.net.ssl.SSLEngine;
import com.cloud.resource.ResourceState;
import org.apache.cloudstack.ca.CAManager; import org.apache.cloudstack.ca.CAManager;
import org.apache.cloudstack.framework.config.ConfigDepot; import org.apache.cloudstack.framework.config.ConfigDepot;
import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.ConfigKey;
@ -431,10 +432,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
ch = connectToPeer(peer, ch); ch = connectToPeer(peer, ch);
if (ch == null) { if (ch == null) {
try { try {
logD(bytes, "Unable to route to peer: " + Request.parse(bytes)); logD(bytes, "Unable to establish connection to route to peer: " + Request.parse(bytes));
} catch (ClassNotFoundException | UnsupportedVersionException e) { } catch (ClassNotFoundException | UnsupportedVersionException e) {
// Request.parse thrown exception when we try to log it, log as much as we can // Request.parse thrown exception when we try to log it, log as much as we can
logD(bytes, "Unable to route to peer, and Request.parse further caught exception" + e.getMessage()); logD(bytes, "Unable to establish connection to route to peer, and Request.parse further caught exception" + e.getMessage());
} }
return false; return false;
} }
@ -643,7 +644,6 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
final Link link = task.getLink(); final Link link = task.getLink();
if (Request.fromServer(data)) { if (Request.fromServer(data)) {
final AgentAttache agent = findAttache(hostId); final AgentAttache agent = findAttache(hostId);
if (Request.isControl(data)) { if (Request.isControl(data)) {
@ -691,7 +691,6 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
cancel(Long.toString(Request.getManagementServerId(data)), hostId, Request.getSequence(data), e.getMessage()); cancel(Long.toString(Request.getManagementServerId(data)), hostId, Request.getSequence(data), e.getMessage());
} }
} else { } else {
final long mgmtId = Request.getManagementServerId(data); final long mgmtId = Request.getManagementServerId(data);
if (mgmtId != -1 && mgmtId != _nodeId) { if (mgmtId != -1 && mgmtId != _nodeId) {
routeToPeer(Long.toString(mgmtId), data); routeToPeer(Long.toString(mgmtId), data);
@ -1352,7 +1351,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) { if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) {
logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance"); logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance");
try { try {
managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm()); managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm(), ((PrepareForMaintenanceManagementServerHostCommand) cmd).isForced());
return "Successfully prepared for maintenance"; return "Successfully prepared for maintenance";
} catch(CloudRuntimeException e) { } catch(CloudRuntimeException e) {
return e.getMessage(); return e.getMessage();
@ -1399,14 +1398,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
} }
@Override @Override
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
if (timeoutDurationInMs <= 0) { if (timeoutDurationInMs <= 0) {
logger.debug("Not transferring direct agents from management server node {} (id: {}) to other nodes, invalid timeout duration", fromMsId, fromMsUuid); logger.debug("Not transferring direct agents from management server node {} (id: {}) to other nodes, invalid timeout duration", fromMsId, fromMsUuid);
return false; return false;
} }
long transferStartTimeInMs = System.currentTimeMillis(); long transferStartTimeInMs = System.currentTimeMillis();
if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) { if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId, excludeHostsInMaintenance))) {
logger.info("No direct agent hosts available on management server node {} (id: {}), to transfer", fromMsId, fromMsUuid); logger.info("No direct agent hosts available on management server node {} (id: {}), to transfer", fromMsId, fromMsUuid);
return true; return true;
} }
@ -1421,7 +1420,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
int agentTransferFailedCount = 0; int agentTransferFailedCount = 0;
List<DataCenterVO> dataCenterList = dcDao.listAll(); List<DataCenterVO> dataCenterList = dcDao.listAll();
for (DataCenterVO dc : dataCenterList) { for (DataCenterVO dc : dataCenterList) {
List<HostVO> directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId()); List<HostVO> directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId(), excludeHostsInMaintenance);
if (CollectionUtils.isEmpty(directAgentHostsInDc)) { if (CollectionUtils.isEmpty(directAgentHostsInDc)) {
continue; continue;
} }
@ -1455,9 +1454,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
return (agentTransferFailedCount == 0); return (agentTransferFailedCount == 0);
} }
private List<HostVO> getDirectAgentHosts(long msId) { private List<HostVO> getDirectAgentHosts(long msId, boolean excludeHostsInMaintenance) {
List<HostVO> directAgentHosts = new ArrayList<>(); List<HostVO> directAgentHosts = new ArrayList<>();
List<HostVO> hosts = _hostDao.listHostsByMs(msId); List<ResourceState> statesToExclude = excludeHostsInMaintenance ? ResourceState.s_maintenanceStates : List.of();
List<HostVO> hosts = _hostDao.listHostsByMsResourceState(msId, statesToExclude);
for (HostVO host : hosts) { for (HostVO host : hosts) {
AgentAttache agent = findAttache(host.getId()); AgentAttache agent = findAttache(host.getId());
if (agent instanceof DirectAgentAttache) { if (agent instanceof DirectAgentAttache) {
@ -1468,9 +1468,11 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
return directAgentHosts; return directAgentHosts;
} }
private List<HostVO> getDirectAgentHostsInDc(long msId, long dcId) { private List<HostVO> getDirectAgentHostsInDc(long msId, long dcId, boolean excludeHostsInMaintenance) {
List<HostVO> directAgentHosts = new ArrayList<>(); List<HostVO> directAgentHosts = new ArrayList<>();
List<HostVO> hosts = _hostDao.listHostsByMsAndDc(msId, dcId); // To exclude maintenance states use values from ResourceState as source of truth
List<ResourceState> statesToExclude = excludeHostsInMaintenance ? ResourceState.s_maintenanceStates : List.of();
List<HostVO> hosts = _hostDao.listHostsByMsDcResourceState(msId, dcId, statesToExclude);
for (HostVO host : hosts) { for (HostVO host : hosts) {
AgentAttache agent = findAttache(host.getId()); AgentAttache agent = findAttache(host.getId());
if (agent instanceof DirectAgentAttache) { if (agent instanceof DirectAgentAttache) {
@ -1506,6 +1508,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
public void onManagementServerCancelPreparingForMaintenance() { public void onManagementServerCancelPreparingForMaintenance() {
logger.debug("Management server cancel preparing for maintenance"); logger.debug("Management server cancel preparing for maintenance");
super.onManagementServerPreparingForMaintenance(); super.onManagementServerPreparingForMaintenance();
// needed for the case when Management Server in Preparing For Maintenance but didn't go to Maintenance state
// (where this variable will be reset)
_agentLbHappened = false;
} }
@Override @Override

View File

@ -177,14 +177,24 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
List<HostVO> listHostsByMsAndDc(long msId, long dcId); List<HostVO> listHostsByMsAndDc(long msId, long dcId);
List<HostVO> listHostsByMsDcResourceState(long msId, long dcId, List<ResourceState> excludedResourceStates);
List<HostVO> listHostsByMs(long msId); List<HostVO> listHostsByMs(long msId);
List<HostVO> listHostsByMsResourceState(long msId, List<ResourceState> excludedResourceStates);
/** /**
* Retrieves the number of hosts/agents this {@see ManagementServer} has responsibility over. * Count Hosts by given Management Server, Host and Hypervisor Types,
* @param msId the id of the {@see ManagementServer} * and exclude Hosts with given Resource States.
* @return the number of hosts/agents this {@see ManagementServer} has responsibility over *
* @param msId Management Server Id
* @param excludedResourceStates Resource States to be excluded
* @param hostTypes Host Types
* @param hypervisorTypes Hypervisor Types
* @return Hosts count
*/ */
int countByMs(long msId); int countHostsByMsResourceStateTypeAndHypervisorType(long msId, List<ResourceState> excludedResourceStates,
List<Type> hostTypes, List<HypervisorType> hypervisorTypes);
/** /**
* Retrieves the host ids/agents this {@see ManagementServer} has responsibility over. * Retrieves the host ids/agents this {@see ManagementServer} has responsibility over.

View File

@ -72,6 +72,7 @@ import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.GenericSearchBuilder; import com.cloud.utils.db.GenericSearchBuilder;
import com.cloud.utils.db.JoinBuilder; import com.cloud.utils.db.JoinBuilder;
import com.cloud.utils.db.JoinBuilder.JoinType; import com.cloud.utils.db.JoinBuilder.JoinType;
import com.cloud.utils.db.QueryBuilder;
import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.SearchCriteria.Func; import com.cloud.utils.db.SearchCriteria.Func;
@ -1600,6 +1601,17 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return listBy(sc); return listBy(sc);
} }
@Override
public List<HostVO> listHostsByMsDcResourceState(long msId, long dcId, List<ResourceState> excludedResourceStates) {
QueryBuilder<HostVO> sc = QueryBuilder.create(HostVO.class);
sc.and(sc.entity().getManagementServerId(), Op.EQ, msId);
sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId);
if (CollectionUtils.isNotEmpty(excludedResourceStates)) {
sc.and(sc.entity().getResourceState(), Op.NIN, excludedResourceStates.toArray());
}
return listBy(sc.create());
}
@Override @Override
public List<HostVO> listHostsByMs(long msId) { public List<HostVO> listHostsByMs(long msId) {
SearchCriteria<HostVO> sc = ResponsibleMsSearch.create(); SearchCriteria<HostVO> sc = ResponsibleMsSearch.create();
@ -1608,10 +1620,32 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
} }
@Override @Override
public int countByMs(long msId) { public List<HostVO> listHostsByMsResourceState(long msId, List<ResourceState> excludedResourceStates) {
SearchCriteria<HostVO> sc = ResponsibleMsSearch.create(); QueryBuilder<HostVO> sc = QueryBuilder.create(HostVO.class);
sc.setParameters("managementServerId", msId); sc.and(sc.entity().getManagementServerId(), Op.EQ, msId);
return getCount(sc); if (CollectionUtils.isNotEmpty(excludedResourceStates)) {
sc.and(sc.entity().getResourceState(), Op.NIN, excludedResourceStates.toArray());
}
return listBy(sc.create());
}
@Override
public int countHostsByMsResourceStateTypeAndHypervisorType(long msId,
List<ResourceState> excludedResourceStates,
List<Type> hostTypes,
List<HypervisorType> hypervisorTypes) {
QueryBuilder<HostVO> sc = QueryBuilder.create(HostVO.class);
sc.and(sc.entity().getManagementServerId(), Op.EQ, msId);
if (CollectionUtils.isNotEmpty(excludedResourceStates)) {
sc.and(sc.entity().getResourceState(), Op.NIN, excludedResourceStates.toArray());
}
if (CollectionUtils.isNotEmpty(hostTypes)) {
sc.and(sc.entity().getType(), Op.IN, hostTypes.toArray());
}
if (CollectionUtils.isNotEmpty(hypervisorTypes)) {
sc.and(sc.entity().getHypervisorType(), Op.IN, hypervisorTypes.toArray());
}
return getCount(sc.create());
} }
@Override @Override

View File

@ -70,9 +70,11 @@ public interface IndirectAgentLB {
*/ */
Long getLBPreferredHostCheckInterval(Long clusterId); Long getLBPreferredHostCheckInterval(Long clusterId);
void propagateMSListToAgents(); void propagateMSListToAgents(boolean triggerHostLB);
boolean haveAgentBasedHosts(long msId); void propagateMSListToAgentsInCluster(Long clusterId);
boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs); boolean haveAgentBasedHosts(long msId, boolean excludeHostsInMaintenance);
boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs, boolean excludeHostsInMaintenance);
} }

View File

@ -18,12 +18,15 @@
package org.apache.cloudstack.api.command; package org.apache.cloudstack.api.command;
import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseCmd; import org.apache.cloudstack.api.BaseCmd;
import com.cloud.user.Account; import com.cloud.user.Account;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType; import org.apache.cloudstack.acl.RoleType;
import org.apache.commons.lang3.BooleanUtils;
@APICommand(name = CancelMaintenanceCmd.APINAME, @APICommand(name = CancelMaintenanceCmd.APINAME,
description = "Cancels maintenance of the management server", description = "Cancels maintenance of the management server",
@ -36,6 +39,13 @@ public class CancelMaintenanceCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "cancelMaintenance"; public static final String APINAME = "cancelMaintenance";
@Parameter(name = ApiConstants.REBALANCE, type = CommandType.BOOLEAN, description = "Rebalance agents (applicable for indirect agents, ensure the settings 'host' and 'indirect.agent.lb.algorithm' are properly configured) after cancelling maintenance, default is true")
private Boolean rebalance;
public boolean getRebalance() {
return BooleanUtils.toBooleanDefaultIfNull(rebalance, true);
}
@Override @Override
public String getCommandName() { public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;

View File

@ -26,6 +26,7 @@ import com.cloud.user.Account;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType; import org.apache.cloudstack.acl.RoleType;
import org.apache.commons.lang3.BooleanUtils;
@APICommand(name = PrepareForMaintenanceCmd.APINAME, @APICommand(name = PrepareForMaintenanceCmd.APINAME,
description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents", description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents",
@ -40,6 +41,9 @@ public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd {
" when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered") " when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered")
private String algorithm; private String algorithm;
@Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN, description = "Force management server to maintenance after the maintenance window timeout, default is false")
private Boolean forced;
public String getAlgorithm() { public String getAlgorithm() {
return algorithm; return algorithm;
} }
@ -48,6 +52,10 @@ public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd {
this.algorithm = algorithm; this.algorithm = algorithm;
} }
public boolean isForced() {
return BooleanUtils.toBooleanDefaultIfNull(forced, false);
}
@Override @Override
public String getCommandName() { public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;

View File

@ -40,6 +40,15 @@ public interface ManagementServerMaintenanceManager {
ConfigKey.Scope.Global, ConfigKey.Scope.Global,
null); null);
ConfigKey<Boolean> ManagementServerMaintenanceIgnoreMaintenanceHosts = new ConfigKey<>(Boolean.class,
"management.server.maintenance.ignore.maintenance.hosts",
"Advanced",
String.valueOf(Boolean.FALSE),
"Host in Maintenance state can sometimes block Management Server to go to Maintenance; this setting skips Host(s) in Maintenance state during Management Server Maintenance, default: false.",
true,
ConfigKey.Scope.Global,
null);
void registerListener(ManagementServerMaintenanceListener listener); void registerListener(ManagementServerMaintenanceListener listener);
void unregisterListener(ManagementServerMaintenanceListener listener); void unregisterListener(ManagementServerMaintenanceListener listener);
@ -76,14 +85,14 @@ public interface ManagementServerMaintenanceManager {
// Indicates whether the current management server is preparing to maintenance // Indicates whether the current management server is preparing to maintenance
boolean isPreparingForMaintenance(); boolean isPreparingForMaintenance();
void resetPreparingForMaintenance(); void resetMaintenanceParams();
long getMaintenanceStartTime(); long getMaintenanceStartTime();
String getLbAlgorithm(); String getLbAlgorithm();
// Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs // Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs
void prepareForMaintenance(String lbAlorithm); void prepareForMaintenance(String lbAlorithm, boolean forced);
// Cancels maintenance of the current management server // Cancels maintenance of the current management server
void cancelMaintenance(); void cancelMaintenance();

View File

@ -26,7 +26,9 @@ import java.util.concurrent.TimeUnit;
import javax.inject.Inject; import javax.inject.Inject;
import com.cloud.resource.ResourceState;
import org.apache.cloudstack.agent.lb.IndirectAgentLB; import org.apache.cloudstack.agent.lb.IndirectAgentLB;
import org.apache.cloudstack.agent.lb.IndirectAgentLBServiceImpl;
import org.apache.cloudstack.api.command.CancelMaintenanceCmd; import org.apache.cloudstack.api.command.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.CancelShutdownCmd; import org.apache.cloudstack.api.command.CancelShutdownCmd;
import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd;
@ -39,6 +41,7 @@ import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.jobs.AsyncJobManager; import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.management.ManagementServerHost;
import org.apache.cloudstack.management.ManagementServerHost.State; import org.apache.cloudstack.management.ManagementServerHost.State;
import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand;
@ -196,13 +199,20 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
return preparingForShutdown; return preparingForShutdown;
} }
private void resetShutdownParams() {
logger.debug("Resetting shutdown params");
preparingForShutdown = false;
shutdownTriggered = false;
}
@Override @Override
public boolean isPreparingForMaintenance() { public boolean isPreparingForMaintenance() {
return preparingForMaintenance; return preparingForMaintenance;
} }
@Override @Override
public void resetPreparingForMaintenance() { public void resetMaintenanceParams() {
logger.debug("Resetting maintenance params");
preparingForMaintenance = false; preparingForMaintenance = false;
maintenanceStartTime = 0; maintenanceStartTime = 0;
lbAlgorithm = null; lbAlgorithm = null;
@ -235,6 +245,11 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
} }
this.shutdownTriggered = true; this.shutdownTriggered = true;
prepareForShutdown(true); prepareForShutdown(true);
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Invalid node id for the management server");
}
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
} }
private void prepareForShutdown(boolean postTrigger) { private void prepareForShutdown(boolean postTrigger) {
@ -251,29 +266,38 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
this.preparingForShutdown = true; this.preparingForShutdown = true;
jobManager.disableAsyncJobs(); jobManager.disableAsyncJobs();
waitForPendingJobs(); waitForPendingJobs(false);
} }
@Override @Override
public void prepareForShutdown() { public void prepareForShutdown() {
prepareForShutdown(false); prepareForShutdown(false);
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Invalid node id for the management server");
}
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
} }
@Override @Override
public void cancelShutdown() { public void cancelShutdown() {
if (!this.preparingForShutdown) { ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Invalid node id for the management server");
}
if (!this.preparingForShutdown && !(State.PreparingForShutDown.equals(msHost.getState()) || State.ReadyToShutDown.equals(msHost.getState()))) {
throw new CloudRuntimeException("Shutdown has not been triggered"); throw new CloudRuntimeException("Shutdown has not been triggered");
} }
this.preparingForShutdown = false; resetShutdownParams();
this.shutdownTriggered = false; resetMaintenanceParams();
resetPreparingForMaintenance();
jobManager.enableAsyncJobs(); jobManager.enableAsyncJobs();
cancelWaitForPendingJobs(); cancelWaitForPendingJobs();
msHostDao.updateState(msHost.getId(), State.Up);
} }
@Override @Override
public void prepareForMaintenance(String lbAlorithm) { public void prepareForMaintenance(String lbAlorithm, boolean forced) {
if (this.preparingForShutdown) { if (this.preparingForShutdown) {
throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again"); throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again");
} }
@ -281,41 +305,57 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
if (this.preparingForMaintenance) { if (this.preparingForMaintenance) {
throw new CloudRuntimeException("Maintenance has already been initiated"); throw new CloudRuntimeException("Maintenance has already been initiated");
} }
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Invalid node id for the management server");
}
this.preparingForMaintenance = true; this.preparingForMaintenance = true;
this.maintenanceStartTime = System.currentTimeMillis(); this.maintenanceStartTime = System.currentTimeMillis();
this.lbAlgorithm = lbAlorithm; this.lbAlgorithm = lbAlorithm;
jobManager.disableAsyncJobs(); jobManager.disableAsyncJobs();
onPreparingForMaintenance(); onPreparingForMaintenance();
waitForPendingJobs(); waitForPendingJobs(forced);
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
} }
@Override @Override
public void cancelMaintenance() { public void cancelMaintenance() {
if (!this.preparingForMaintenance) { ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Invalid node id for the management server");
}
if (!this.preparingForMaintenance && !(State.Maintenance.equals(msHost.getState()) || State.PreparingForMaintenance.equals(msHost.getState()))) {
throw new CloudRuntimeException("Maintenance has not been initiated"); throw new CloudRuntimeException("Maintenance has not been initiated");
} }
resetPreparingForMaintenance(); resetMaintenanceParams();
this.preparingForShutdown = false; resetShutdownParams();
this.shutdownTriggered = false;
jobManager.enableAsyncJobs(); jobManager.enableAsyncJobs();
cancelWaitForPendingJobs(); cancelWaitForPendingJobs();
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); msHostDao.updateState(msHost.getId(), State.Up);
if (msHost != null) { ScheduledExecutorService cancelMaintenanceService = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("CancelMaintenance-Job"));
if (State.PreparingForMaintenance.equals(msHost.getState())) { cancelMaintenanceService.schedule(() -> {
onCancelPreparingForMaintenance(); cancelMaintenanceTask(msHost.getState());
} }, 0, TimeUnit.SECONDS);
if (State.Maintenance.equals(msHost.getState())) { cancelMaintenanceService.shutdown();
onCancelMaintenance(); }
}
private void cancelMaintenanceTask(ManagementServerHost.State msState) {
if (State.PreparingForMaintenance.equals(msState)) {
onCancelPreparingForMaintenance();
}
if (State.Maintenance.equals(msState)) {
onCancelMaintenance();
} }
} }
private void waitForPendingJobs() { private void waitForPendingJobs(boolean forceMaintenance) {
cancelWaitForPendingJobs(); cancelWaitForPendingJobs();
pendingJobsCheckTask = Executors.newScheduledThreadPool(1, new NamedThreadFactory("PendingJobsCheck")); pendingJobsCheckTask = Executors.newScheduledThreadPool(1, new NamedThreadFactory("PendingJobsCheck"));
long pendingJobsCheckDelayInSecs = 1L; // 1 sec long pendingJobsCheckDelayInSecs = 1L; // 1 sec
long pendingJobsCheckPeriodInSecs = 3L; // every 3 secs, check more frequently for pending jobs long pendingJobsCheckPeriodInSecs = 3L; // every 3 secs, check more frequently for pending jobs
pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS); boolean ignoreMaintenanceHosts = ManagementServerMaintenanceIgnoreMaintenanceHosts.value();
pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this, ignoreMaintenanceHosts, forceMaintenance), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS);
} }
@Override @Override
@ -349,7 +389,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid());
executeCmd(msHost, cmds); executeCmd(msHost, cmds);
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
return prepareMaintenanceResponse(cmd.getManagementServerId()); return prepareMaintenanceResponse(cmd.getManagementServerId());
} }
@ -375,7 +414,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid());
executeCmd(msHost, cmds); executeCmd(msHost, cmds);
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
return prepareMaintenanceResponse(cmd.getManagementServerId()); return prepareMaintenanceResponse(cmd.getManagementServerId());
} }
@ -395,7 +433,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid());
executeCmd(msHost, cmds); executeCmd(msHost, cmds);
msHostDao.updateState(msHost.getId(), State.Up);
return prepareMaintenanceResponse(cmd.getManagementServerId()); return prepareMaintenanceResponse(cmd.getManagementServerId());
} }
@ -426,7 +463,8 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
checkAnyMsInPreparingStates("prepare for maintenance"); checkAnyMsInPreparingStates("prepare for maintenance");
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) { boolean ignoreMaintenanceHosts = ManagementServerMaintenanceIgnoreMaintenanceHosts.value();
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid(), ignoreMaintenanceHosts)) {
List<String> indirectAgentMsList = indirectAgentLB.getManagementServerList(); List<String> indirectAgentMsList = indirectAgentLB.getManagementServerList();
indirectAgentMsList.remove(msHost.getServiceIP()); indirectAgentMsList.remove(msHost.getServiceIP());
List<String> nonUpMsList = msHostDao.listNonUpStateMsIPs(); List<String> nonUpMsList = msHostDao.listNonUpStateMsIPs();
@ -437,10 +475,9 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
} }
final Command[] cmds = new Command[1]; final Command[] cmds = new Command[1];
cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm()); cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm(), cmd.isForced());
executeCmd(msHost, cmds); executeCmd(msHost, cmds);
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
return prepareMaintenanceResponse(cmd.getManagementServerId()); return prepareMaintenanceResponse(cmd.getManagementServerId());
} }
@ -460,7 +497,11 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid()); cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid());
executeCmd(msHost, cmds); executeCmd(msHost, cmds);
msHostDao.updateState(msHost.getId(), State.Up); if (cmd.getRebalance()) {
logger.info("Propagate MS list and rebalance indirect agents");
indirectAgentLB.propagateMSListToAgents(true);
}
return prepareMaintenanceResponse(cmd.getManagementServerId()); return prepareMaintenanceResponse(cmd.getManagementServerId());
} }
@ -485,12 +526,14 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
@Override @Override
public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) { public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) {
resetPreparingForMaintenance(); resetMaintenanceParams();
this.preparingForShutdown = false; resetShutdownParams();
this.shutdownTriggered = false;
jobManager.enableAsyncJobs(); jobManager.enableAsyncJobs();
if (msHost == null) { if (msHost == null) {
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Invalid node id for the management server");
}
} }
onCancelPreparingForMaintenance(); onCancelPreparingForMaintenance();
msHostDao.updateState(msHost.getId(), State.Up); msHostDao.updateState(msHost.getId(), State.Up);
@ -546,17 +589,21 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
@Override @Override
public ConfigKey<?>[] getConfigKeys() { public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[]{ return new ConfigKey<?>[]{
ManagementServerMaintenanceTimeoutInMins ManagementServerMaintenanceTimeoutInMins, ManagementServerMaintenanceIgnoreMaintenanceHosts
}; };
} }
private final class CheckPendingJobsTask extends ManagedContextRunnable { private final class CheckPendingJobsTask extends ManagedContextRunnable {
private ManagementServerMaintenanceManager managementServerMaintenanceManager; private ManagementServerMaintenanceManager managementServerMaintenanceManager;
private boolean ignoreMaintenanceHosts = false;
private boolean agentsTransferTriggered = false; private boolean agentsTransferTriggered = false;
private boolean forceMaintenance = false;
public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager) { public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager, boolean ignoreMaintenanceHosts, boolean forceMaintenance) {
this.managementServerMaintenanceManager = managementServerMaintenanceManager; this.managementServerMaintenanceManager = managementServerMaintenanceManager;
this.ignoreMaintenanceHosts = ignoreMaintenanceHosts;
this.forceMaintenance = forceMaintenance;
} }
@Override @Override
@ -570,6 +617,19 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
} }
if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) { if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) {
if (forceMaintenance) {
logger.debug("Maintenance window timeout, MS is forced to Maintenance Mode");
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
logger.warn("Unable to find the management server, invalid node id");
return;
}
msHostDao.updateState(msHost.getId(), State.Maintenance);
managementServerMaintenanceManager.onMaintenance();
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
logger.debug("Maintenance window timeout, terminating the pending jobs check timer task"); logger.debug("Maintenance window timeout, terminating the pending jobs check timer task");
managementServerMaintenanceManager.cancelPreparingForMaintenance(null); managementServerMaintenanceManager.cancelPreparingForMaintenance(null);
managementServerMaintenanceManager.cancelWaitForPendingJobs(); managementServerMaintenanceManager.cancelWaitForPendingJobs();
@ -577,9 +637,11 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
} }
long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId()); long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId());
int totalAgents = hostDao.countByMs(ManagementServerNode.getManagementServerId());
String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]", long totalAgents = totalAgentsInMs();
managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents);
String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] preparingForShutdown[%b] preparingForMaintenance[%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]",
managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isPreparingForShutdown(), managementServerMaintenanceManager.isPreparingForMaintenance(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents);
logger.debug(msg); logger.debug(msg);
if (totalPendingJobs > 0) { if (totalPendingJobs > 0) {
@ -594,6 +656,10 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
} }
if (managementServerMaintenanceManager.isPreparingForMaintenance()) { if (managementServerMaintenanceManager.isPreparingForMaintenance()) {
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
logger.warn("Unable to find the management server, invalid node id");
return;
}
if (totalAgents == 0) { if (totalAgents == 0) {
logger.info("MS is in Maintenance Mode"); logger.info("MS is in Maintenance Mode");
msHostDao.updateState(msHost.getId(), State.Maintenance); msHostDao.updateState(msHost.getId(), State.Maintenance);
@ -609,7 +675,7 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
agentsTransferTriggered = true; agentsTransferTriggered = true;
logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs()); boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs(), ignoreMaintenanceHosts);
if (!agentsMigrated) { if (!agentsMigrated) {
logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost);
@ -617,18 +683,20 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
return; return;
} }
if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs())) { if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs(), ignoreMaintenanceHosts)) {
logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost);
managementServerMaintenanceManager.cancelWaitForPendingJobs(); managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
} }
} else if (managementServerMaintenanceManager.isPreparingForShutdown()) { } else if (managementServerMaintenanceManager.isPreparingForShutdown()) {
logger.info("MS is Ready To Shutdown"); logger.info("MS is Ready To Shutdown");
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost == null) {
logger.warn("Unable to find the management server, invalid node id");
return;
}
msHostDao.updateState(msHost.getId(), State.ReadyToShutDown); msHostDao.updateState(msHost.getId(), State.ReadyToShutDown);
managementServerMaintenanceManager.cancelWaitForPendingJobs(); managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
} }
} catch (final Exception e) { } catch (final Exception e) {
logger.error("Error trying to check/run pending jobs task", e); logger.error("Error trying to check/run pending jobs task", e);
@ -648,5 +716,14 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs; long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs;
return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0; return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0;
} }
private long totalAgentsInMs() {
/* Any Host in Maintenance state could block moving Management Server to Maintenance state, exclude those Hosts from total agents count
* To exclude maintenance states use values from ResourceState as source of truth
*/
List<ResourceState> statesToExclude = ignoreMaintenanceHosts ? ResourceState.s_maintenanceStates : List.of();
return hostDao.countHostsByMsResourceStateTypeAndHypervisorType(ManagementServerNode.getManagementServerId(), statesToExclude,
IndirectAgentLBServiceImpl.agentValidHostTypes, null);
}
} }
} }

View File

@ -20,17 +20,23 @@ package org.apache.cloudstack.maintenance.command;
public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {
String lbAlgorithm; String lbAlgorithm;
boolean forced;
public PrepareForMaintenanceManagementServerHostCommand(long msId) { public PrepareForMaintenanceManagementServerHostCommand(long msId) {
super(msId); super(msId);
} }
public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm) { public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm, boolean forced) {
super(msId); super(msId);
this.lbAlgorithm = lbAlgorithm; this.lbAlgorithm = lbAlgorithm;
this.forced = forced;
} }
public String getLbAlgorithm() { public String getLbAlgorithm() {
return lbAlgorithm; return lbAlgorithm;
} }
public boolean isForced() {
return forced;
}
} }

View File

@ -92,6 +92,8 @@ public class ManagementServerMaintenanceManagerImplTest {
@Test @Test
public void prepareForShutdown() { public void prepareForShutdown() {
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs(); Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
spy.prepareForShutdown(); spy.prepareForShutdown();
Mockito.verify(jobManagerMock).disableAsyncJobs(); Mockito.verify(jobManagerMock).disableAsyncJobs();
@ -106,6 +108,9 @@ public class ManagementServerMaintenanceManagerImplTest {
@Test @Test
public void cancelShutdown() { public void cancelShutdown() {
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
spy.cancelShutdown(); spy.cancelShutdown();
}); });
@ -115,6 +120,8 @@ public class ManagementServerMaintenanceManagerImplTest {
public void triggerShutdown() { public void triggerShutdown() {
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs(); Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
Mockito.lenient().when(spy.isShutdownTriggered()).thenReturn(false); Mockito.lenient().when(spy.isShutdownTriggered()).thenReturn(false);
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
spy.triggerShutdown(); spy.triggerShutdown();
Mockito.verify(jobManagerMock).disableAsyncJobs(); Mockito.verify(jobManagerMock).disableAsyncJobs();
@ -305,43 +312,44 @@ public class ManagementServerMaintenanceManagerImplTest {
@Test @Test
public void prepareForMaintenanceAndCancelFromMaintenanceState() { public void prepareForMaintenanceAndCancelFromMaintenanceState() {
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs(); Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
spy.prepareForMaintenance("static"); ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
spy.prepareForMaintenance("static", false);
Mockito.verify(jobManagerMock).disableAsyncJobs(); Mockito.verify(jobManagerMock).disableAsyncJobs();
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
spy.prepareForMaintenance("static"); spy.prepareForMaintenance("static", false);
}); });
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Maintenance); Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Maintenance);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs(); Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
spy.cancelMaintenance(); spy.cancelMaintenance();
Mockito.verify(jobManagerMock).enableAsyncJobs(); Mockito.verify(jobManagerMock).enableAsyncJobs();
Mockito.verify(spy, Mockito.times(1)).onCancelMaintenance();
} }
@Test @Test
public void prepareForMaintenanceAndCancelFromPreparingForMaintenanceState() { public void prepareForMaintenanceAndCancelFromPreparingForMaintenanceState() {
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs(); Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
spy.prepareForMaintenance("static"); ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
spy.prepareForMaintenance("static", false);
Mockito.verify(jobManagerMock).disableAsyncJobs(); Mockito.verify(jobManagerMock).disableAsyncJobs();
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
spy.prepareForMaintenance("static"); spy.prepareForMaintenance("static", false);
}); });
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.PreparingForMaintenance); Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.PreparingForMaintenance);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs(); Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
spy.cancelMaintenance(); spy.cancelMaintenance();
Mockito.verify(jobManagerMock).enableAsyncJobs(); Mockito.verify(jobManagerMock).enableAsyncJobs();
Mockito.verify(spy, Mockito.times(1)).onCancelPreparingForMaintenance();
} }
@Test @Test
public void cancelMaintenance() { public void cancelMaintenance() {
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
spy.cancelMaintenance(); spy.cancelMaintenance();
}); });
@ -455,7 +463,7 @@ public class ManagementServerMaintenanceManagerImplTest {
Mockito.when(msHostDao.listNonUpStateMsIPs()).thenReturn(new ArrayList<>()); Mockito.when(msHostDao.listNonUpStateMsIPs()).thenReturn(new ArrayList<>());
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class); PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
Mockito.when(cmd.getManagementServerId()).thenReturn(1L); Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(true); Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(true);
Mockito.when(indirectAgentLBMock.getManagementServerList()).thenReturn(new ArrayList<>()); Mockito.when(indirectAgentLBMock.getManagementServerList()).thenReturn(new ArrayList<>());
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
@ -476,7 +484,7 @@ public class ManagementServerMaintenanceManagerImplTest {
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1); Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class); PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
Mockito.when(cmd.getManagementServerId()).thenReturn(1L); Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false); Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(false);
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn(null); Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn(null);
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
@ -497,7 +505,7 @@ public class ManagementServerMaintenanceManagerImplTest {
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1); Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class); PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
Mockito.when(cmd.getManagementServerId()).thenReturn(1L); Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false); Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(false);
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Failed"); Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Failed");
Assert.assertThrows(CloudRuntimeException.class, () -> { Assert.assertThrows(CloudRuntimeException.class, () -> {
@ -518,7 +526,7 @@ public class ManagementServerMaintenanceManagerImplTest {
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1); Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class); PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
Mockito.when(cmd.getManagementServerId()).thenReturn(1L); Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false); Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(false);
Mockito.when(hostDao.listByMs(anyLong())).thenReturn(new ArrayList<>()); Mockito.when(hostDao.listByMs(anyLong())).thenReturn(new ArrayList<>());
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success"); Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");

View File

@ -287,6 +287,7 @@ import com.cloud.user.dao.AccountDao;
import com.cloud.user.dao.UserDao; import com.cloud.user.dao.UserDao;
import com.cloud.utils.NumbersUtil; import com.cloud.utils.NumbersUtil;
import com.cloud.utils.Pair; import com.cloud.utils.Pair;
import com.cloud.utils.Ternary;
import com.cloud.utils.UriUtils; import com.cloud.utils.UriUtils;
import com.cloud.utils.component.ManagerBase; import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.crypt.DBEncryptionUtil; import com.cloud.utils.crypt.DBEncryptionUtil;
@ -631,21 +632,30 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
private void initMessageBusListener() { private void initMessageBusListener() {
messageBus.subscribe(EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, new MessageSubscriber() { messageBus.subscribe(EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, new MessageSubscriber() {
@Override @Override
public void onPublishMessage(String serderAddress, String subject, Object args) { public void onPublishMessage(String senderAddress, String subject, Object args) {
String globalSettingUpdated = (String) args; Ternary<String, ConfigKey.Scope, Long> settingUpdated = (Ternary<String, ConfigKey.Scope, Long>) args;
if (StringUtils.isEmpty(globalSettingUpdated)) { String settingNameUpdated = settingUpdated.first();
if (StringUtils.isEmpty(settingNameUpdated)) {
return; return;
} }
if (globalSettingUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) || if (settingNameUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) ||
globalSettingUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) { settingNameUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) {
_indirectAgentLB.propagateMSListToAgents(); _indirectAgentLB.propagateMSListToAgents(false);
} else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString()) } else if (settingNameUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())
|| globalSettingUpdated.equals(Config.MigrateWait.toString())) { || settingNameUpdated.equals(Config.MigrateWait.toString())) {
Map<String, String> params = new HashMap<String, String>(); Map<String, String> params = new HashMap<String, String>();
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
_agentManager.propagateChangeToAgents(params); _agentManager.propagateChangeToAgents(params);
} else if (VMLeaseManager.InstanceLeaseEnabled.key().equals(globalSettingUpdated)) { } else if (settingNameUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBCheckInterval.key())) {
ConfigKey.Scope scope = settingUpdated.second();
if (scope == ConfigKey.Scope.Global) {
_indirectAgentLB.propagateMSListToAgents(false);
} else if (scope == ConfigKey.Scope.Cluster) {
Long clusterId = settingUpdated.third();
_indirectAgentLB.propagateMSListToAgentsInCluster(clusterId);
}
} else if (VMLeaseManager.InstanceLeaseEnabled.key().equals(settingNameUpdated)) {
vmLeaseManager.onLeaseFeatureToggle(); vmLeaseManager.onLeaseFeatureToggle();
} }
} }
@ -845,6 +855,7 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
CallContext.current().setEventDetails(String.format(" Name: %s, New Value: %s, Scope: %s", name, value, scope.name())); CallContext.current().setEventDetails(String.format(" Name: %s, New Value: %s, Scope: %s", name, value, scope.name()));
_configDepot.invalidateConfigCache(name, scope, resourceId); _configDepot.invalidateConfigCache(name, scope, resourceId);
messageBus.publish(_name, EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, PublishScope.GLOBAL, new Ternary<>(name, scope, resourceId));
return valueEncrypted ? DBEncryptionUtil.decrypt(value) : value; return valueEncrypted ? DBEncryptionUtil.decrypt(value) : value;
} }
@ -939,7 +950,7 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
} }
txn.commit(); txn.commit();
messageBus.publish(_name, EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, PublishScope.GLOBAL, name); messageBus.publish(_name, EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, PublishScope.GLOBAL, new Ternary<>(name, ConfigKey.Scope.Global, resourceId));
return _configDao.getValue(name); return _configDao.getValue(name);
} }

View File

@ -30,6 +30,7 @@ import java.util.concurrent.TimeUnit;
import javax.inject.Inject; import javax.inject.Inject;
import javax.naming.ConfigurationException; import javax.naming.ConfigurationException;
import com.cloud.dc.ClusterVO;
import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBRoundRobinAlgorithm; import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBRoundRobinAlgorithm;
import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBShuffleAlgorithm; import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBShuffleAlgorithm;
import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBStaticAlgorithm; import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBStaticAlgorithm;
@ -62,7 +63,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
public static final ConfigKey<String> IndirectAgentLBAlgorithm = new ConfigKey<>(String.class, public static final ConfigKey<String> IndirectAgentLBAlgorithm = new ConfigKey<>(String.class,
"indirect.agent.lb.algorithm", "Advanced", "static", "indirect.agent.lb.algorithm", "Advanced", "static",
"The algorithm to be applied on the provided management server list in the 'host' config that that is sent to indirect agents. Allowed values are: static, roundrobin and shuffle.", "The algorithm to be applied on the provided management server list in the 'host' config that that is sent to indirect agents. Allowed values are: static, roundrobin and shuffle. " +
"Note: The lb algorithm 'shuffle' disables the indirect agent lb check background task once the algorithm is applied on the agent.",
true, ConfigKey.Scope.Global, null, null, null, null, null, ConfigKey.Kind.Select, "static,roundrobin,shuffle"); true, ConfigKey.Scope.Global, null, null, null, null, null, ConfigKey.Kind.Select, "static,roundrobin,shuffle");
public static final ConfigKey<Long> IndirectAgentLBCheckInterval = new ConfigKey<>("Advanced", Long.class, public static final ConfigKey<Long> IndirectAgentLBCheckInterval = new ConfigKey<>("Advanced", Long.class,
@ -89,7 +91,9 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
private static final List<ResourceState> agentValidResourceStates = List.of( private static final List<ResourceState> agentValidResourceStates = List.of(
ResourceState.Enabled, ResourceState.Maintenance, ResourceState.Disabled, ResourceState.Enabled, ResourceState.Maintenance, ResourceState.Disabled,
ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance); ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance);
private static final List<Host.Type> agentValidHostTypes = List.of(Host.Type.Routing, Host.Type.ConsoleProxy, private static final List<ResourceState> agentNonMaintenanceResourceStates = List.of(
ResourceState.Enabled, ResourceState.Disabled);
public static final List<Host.Type> agentValidHostTypes = List.of(Host.Type.Routing, Host.Type.ConsoleProxy,
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM); Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
private static final List<Host.Type> agentNonRoutingHostTypes = List.of(Host.Type.ConsoleProxy, private static final List<Host.Type> agentNonRoutingHostTypes = List.of(Host.Type.ConsoleProxy,
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM); Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
@ -132,7 +136,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm); final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm);
List<Long> hostIdList = orderedHostIdList; List<Long> hostIdList = orderedHostIdList;
if (hostIdList == null) { if (hostIdList == null) {
hostIdList = algorithm.isHostListNeeded() ? getOrderedHostIdList(dcId) : new ArrayList<>(); hostIdList = algorithm.isHostListNeeded() ? getOrderedHostIdList(dcId, false) : new ArrayList<>();
} }
// just in case we have a host in creating state make sure it is in the list: // just in case we have a host in creating state make sure it is in the list:
@ -167,8 +171,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
return IndirectAgentLBCheckInterval.valueIn(clusterId); return IndirectAgentLBCheckInterval.valueIn(clusterId);
} }
List<Long> getOrderedHostIdList(final Long dcId) { List<Long> getOrderedHostIdList(final Long dcId, boolean excludeHostsInMaintenance) {
final List<Long> hostIdList = getAllAgentBasedHostsFromDB(dcId, null); final List<Long> hostIdList = getAllAgentBasedHostsFromDB(dcId, null, null, excludeHostsInMaintenance);
hostIdList.sort(Comparator.comparingLong(x -> x)); hostIdList.sort(Comparator.comparingLong(x -> x));
return hostIdList; return hostIdList;
} }
@ -259,19 +263,25 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
agentValidResourceStates, agentNonRoutingHostTypes, agentValidHypervisorTypes); agentValidResourceStates, agentNonRoutingHostTypes, agentValidHypervisorTypes);
} }
private List<Long> getAllAgentBasedRoutingHostsFromDB(final Long zoneId, final Long clusterId, final Long msId) { private List<Long> getAllAgentBasedRoutingHostsFromDB(final Long zoneId, final Long clusterId, final Long msId, boolean excludeHostsInMaintenance) {
List<ResourceState> validResourceStates = excludeHostsInMaintenance ? agentNonMaintenanceResourceStates : agentValidResourceStates;
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, msId, return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, msId,
agentValidResourceStates, List.of(Host.Type.Routing), agentValidHypervisorTypes); validResourceStates, List.of(Host.Type.Routing), agentValidHypervisorTypes);
} }
private List<Long> getAllAgentBasedHostsFromDB(final Long zoneId, final Long clusterId) { private List<Long> getAllAgentBasedHostsFromDB(final Long zoneId, final Long clusterId, final Long msId, boolean excludeHostsInMaintenance) {
List<ResourceState> validResourceStates = excludeHostsInMaintenance ? agentNonMaintenanceResourceStates : agentValidResourceStates;
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, null, return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, null,
agentValidResourceStates, agentValidHostTypes, agentValidHypervisorTypes); validResourceStates, agentValidHostTypes, agentValidHypervisorTypes);
}
private List<Long> getAllAgentBasedHosts(long msId, boolean excludeHostsInMaintenance) {
return getAllAgentBasedHostsFromDB(null, null, msId, excludeHostsInMaintenance);
} }
@Override @Override
public boolean haveAgentBasedHosts(long msId) { public boolean haveAgentBasedHosts(long msId, boolean excludeHostsInMaintenance) {
return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId)); return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId, excludeHostsInMaintenance));
} }
private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() { private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() {
@ -303,8 +313,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@Override @Override
public void propagateMSListToAgents() { public void propagateMSListToAgents(boolean triggerHostLB) {
logger.debug("Propagating management server list update to agents"); logger.debug("Propagating management server list update to the agents");
ExecutorService setupMSListExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("SetupMSList-Worker")); ExecutorService setupMSListExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("SetupMSList-Worker"));
final String lbAlgorithm = getLBAlgorithmName(); final String lbAlgorithm = getLBAlgorithmName();
final Long globalLbCheckInterval = getLBPreferredHostCheckInterval(null); final Long globalLbCheckInterval = getLBPreferredHostCheckInterval(null);
@ -316,20 +326,20 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
Map<Long, List<Long>> clusterHostIdsMap = new HashMap<>(); Map<Long, List<Long>> clusterHostIdsMap = new HashMap<>();
List<Long> clusterIds = clusterDao.listAllClusterIds(zone.getId()); List<Long> clusterIds = clusterDao.listAllClusterIds(zone.getId());
for (Long clusterId : clusterIds) { for (Long clusterId : clusterIds) {
List<Long> hostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null); List<Long> hostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null, false);
clusterHostIdsMap.put(clusterId, hostIds); clusterHostIdsMap.put(clusterId, hostIds);
zoneHostIds.addAll(hostIds); zoneHostIds.addAll(hostIds);
} }
zoneHostIds.sort(Comparator.comparingLong(x -> x)); zoneHostIds.sort(Comparator.comparingLong(x -> x));
final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs(); final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs();
for (Long nonRoutingHostId : nonRoutingHostIds) { for (Long nonRoutingHostId : nonRoutingHostIds) {
setupMSListExecutorService.submit(new SetupMSListTask(nonRoutingHostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, globalLbCheckInterval)); setupMSListExecutorService.submit(new SetupMSListTask(nonRoutingHostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, globalLbCheckInterval, triggerHostLB));
} }
for (Long clusterId : clusterIds) { for (Long clusterId : clusterIds) {
final Long clusterLbCheckInterval = getLBPreferredHostCheckInterval(clusterId); final Long clusterLbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
List<Long> hostIds = clusterHostIdsMap.get(clusterId); List<Long> hostIds = clusterHostIdsMap.get(clusterId);
for (Long hostId : hostIds) { for (Long hostId : hostIds) {
setupMSListExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval)); setupMSListExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval, triggerHostLB));
} }
} }
} }
@ -345,6 +355,45 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
} }
} }
@Override
public void propagateMSListToAgentsInCluster(Long clusterId) {
if (clusterId == null) {
return;
}
logger.debug("Propagating management server list update to the agents in cluster " + clusterId);
ClusterVO cluster = clusterDao.findById(clusterId);
if (cluster == null) {
logger.warn("Unable to propagate management server list, couldn't find cluster " + clusterId);
return;
}
DataCenterVO zone = dataCenterDao.findById(cluster.getDataCenterId());
if (zone == null) {
logger.warn("Unable to propagate management server list, couldn't find zone of the cluster " + clusterId);
return;
}
ExecutorService setupMSListInClusterExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("SetupMSListInCluster-Worker"));
final String lbAlgorithm = getLBAlgorithmName();
List<Long> clusterHostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null, false);
clusterHostIds.sort(Comparator.comparingLong(x -> x));
final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs();
final Long clusterLbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
for (Long hostId : clusterHostIds) {
setupMSListInClusterExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), clusterHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval, false));
}
setupMSListInClusterExecutorService.shutdown();
try {
if (!setupMSListInClusterExecutorService.awaitTermination(300, TimeUnit.SECONDS)) {
setupMSListInClusterExecutorService.shutdownNow();
}
} catch (InterruptedException e) {
setupMSListInClusterExecutorService.shutdownNow();
logger.debug(String.format("Force shutdown setup ms list in cluster service as it did not shutdown in the desired time due to: %s", e.getMessage()));
}
}
private final class SetupMSListTask extends ManagedContextRunnable { private final class SetupMSListTask extends ManagedContextRunnable {
private Long hostId; private Long hostId;
private Long dcId; private Long dcId;
@ -352,21 +401,23 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
private List<String> avoidMsList; private List<String> avoidMsList;
private String lbAlgorithm; private String lbAlgorithm;
private Long lbCheckInterval; private Long lbCheckInterval;
private Boolean triggerHostLb;
public SetupMSListTask(Long hostId, Long dcId, List<Long> orderedHostIdList, List<String> avoidMsList, public SetupMSListTask(Long hostId, Long dcId, List<Long> orderedHostIdList, List<String> avoidMsList,
String lbAlgorithm, Long lbCheckInterval) { String lbAlgorithm, Long lbCheckInterval, Boolean triggerHostLb) {
this.hostId = hostId; this.hostId = hostId;
this.dcId = dcId; this.dcId = dcId;
this.orderedHostIdList = orderedHostIdList; this.orderedHostIdList = orderedHostIdList;
this.avoidMsList = avoidMsList; this.avoidMsList = avoidMsList;
this.lbAlgorithm = lbAlgorithm; this.lbAlgorithm = lbAlgorithm;
this.lbCheckInterval = lbCheckInterval; this.lbCheckInterval = lbCheckInterval;
this.triggerHostLb = triggerHostLb;
} }
@Override @Override
protected void runInContext() { protected void runInContext() {
final List<String> msList = getManagementServerList(hostId, dcId, orderedHostIdList); final List<String> msList = getManagementServerList(hostId, dcId, orderedHostIdList);
final SetupMSListCommand cmd = new SetupMSListCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval); final SetupMSListCommand cmd = new SetupMSListCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval, triggerHostLb);
cmd.setWait(60); cmd.setWait(60);
final Answer answer = agentManager.easySend(hostId, cmd); final Answer answer = agentManager.easySend(hostId, cmd);
if (answer == null || !answer.getResult()) { if (answer == null || !answer.getResult()) {
@ -419,9 +470,9 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
protected boolean migrateRoutingHostAgentsInCluster(long clusterId, String fromMsUuid, long fromMsId, DataCenter dc, protected boolean migrateRoutingHostAgentsInCluster(long clusterId, String fromMsUuid, long fromMsId, DataCenter dc,
long migrationStartTimeInMs, long timeoutDurationInMs, final List<String> avoidMsList, String lbAlgorithm, long migrationStartTimeInMs, long timeoutDurationInMs, final List<String> avoidMsList, String lbAlgorithm,
boolean lbAlgorithmChanged, List<Long> orderedHostIdList) { boolean lbAlgorithmChanged, List<Long> orderedHostIdList, boolean excludeHostsInMaintenance) {
List<Long> agentBasedHostsOfMsInDcAndCluster = getAllAgentBasedRoutingHostsFromDB(dc.getId(), clusterId, fromMsId); List<Long> agentBasedHostsOfMsInDcAndCluster = getAllAgentBasedRoutingHostsFromDB(dc.getId(), clusterId, fromMsId, excludeHostsInMaintenance);
if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDcAndCluster)) { if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDcAndCluster)) {
return true; return true;
} }
@ -461,7 +512,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
} }
@Override @Override
public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs) { public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
if (timeoutDurationInMs <= 0) { if (timeoutDurationInMs <= 0) {
logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid));
return false; return false;
@ -469,7 +520,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid));
long migrationStartTimeInMs = System.currentTimeMillis(); long migrationStartTimeInMs = System.currentTimeMillis();
if (!haveAgentBasedHosts(fromMsId)) { if (!haveAgentBasedHosts(fromMsId, excludeHostsInMaintenance)) {
logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid)); logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid));
return true; return true;
} }
@ -489,7 +540,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
List<DataCenterVO> dataCenterList = dcDao.listAll(); List<DataCenterVO> dataCenterList = dcDao.listAll();
for (DataCenterVO dc : dataCenterList) { for (DataCenterVO dc : dataCenterList) {
if (!migrateAgentsInZone(dc, fromMsUuid, fromMsId, avoidMsList, lbAlgorithm, lbAlgorithmChanged, if (!migrateAgentsInZone(dc, fromMsUuid, fromMsId, avoidMsList, lbAlgorithm, lbAlgorithmChanged,
migrationStartTimeInMs, timeoutDurationInMs)) { migrationStartTimeInMs, timeoutDurationInMs, excludeHostsInMaintenance)) {
return false; return false;
} }
} }
@ -498,8 +549,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
} }
private boolean migrateAgentsInZone(DataCenterVO dc, String fromMsUuid, long fromMsId, List<String> avoidMsList, private boolean migrateAgentsInZone(DataCenterVO dc, String fromMsUuid, long fromMsId, List<String> avoidMsList,
String lbAlgorithm, boolean lbAlgorithmChanged, long migrationStartTimeInMs, long timeoutDurationInMs) { String lbAlgorithm, boolean lbAlgorithmChanged, long migrationStartTimeInMs, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
List<Long> orderedHostIdList = getOrderedHostIdList(dc.getId()); List<Long> orderedHostIdList = getOrderedHostIdList(dc.getId(), excludeHostsInMaintenance);
if (!migrateNonRoutingHostAgentsInZone(fromMsUuid, fromMsId, dc, migrationStartTimeInMs, if (!migrateNonRoutingHostAgentsInZone(fromMsUuid, fromMsId, dc, migrationStartTimeInMs,
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) { timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) {
return false; return false;
@ -507,7 +558,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
List<Long> clusterIds = clusterDao.listAllClusterIds(dc.getId()); List<Long> clusterIds = clusterDao.listAllClusterIds(dc.getId());
for (Long clusterId : clusterIds) { for (Long clusterId : clusterIds) {
if (!migrateRoutingHostAgentsInCluster(clusterId, fromMsUuid, fromMsId, dc, migrationStartTimeInMs, if (!migrateRoutingHostAgentsInCluster(clusterId, fromMsUuid, fromMsId, dc, migrationStartTimeInMs,
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) { timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList, excludeHostsInMaintenance)) {
return false; return false;
} }
} }
@ -547,7 +598,9 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval); final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
cmd.setWait(60); cmd.setWait(60);
final Answer answer = agentManager.easySend(hostId, cmd); //may not receive answer when the agent disconnects immediately and try reconnecting to other ms host final Answer answer = agentManager.easySend(hostId, cmd); //may not receive answer when the agent disconnects immediately and try reconnecting to other ms host
if (answer != null && !answer.getResult()) { if (answer == null) {
logger.warn(String.format("Got empty answer while initiating migration of agent connection for host agent ID: %d", hostId));
} else if (!answer.getResult()) {
logger.warn(String.format("Error while initiating migration of agent connection for host agent ID: %d - %s", hostId, answer.getDetails())); logger.warn(String.format("Error while initiating migration of agent connection for host agent ID: %d - %s", hostId, answer.getDetails()));
} }
updateLastManagementServer(hostId, fromMsId); updateLastManagementServer(hostId, fromMsId);

View File

@ -204,7 +204,7 @@ public class IndirectAgentLBServiceImplTest {
public void testGetOrderedRunningHostIdsEmptyList() { public void testGetOrderedRunningHostIdsEmptyList() {
doReturn(Collections.emptyList()).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType( doReturn(Collections.emptyList()).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(
Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList()); Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
Assert.assertTrue(agentMSLB.getOrderedHostIdList(DC_1_ID).isEmpty()); Assert.assertTrue(agentMSLB.getOrderedHostIdList(DC_1_ID, false).isEmpty());
} }
@Test @Test
@ -213,6 +213,6 @@ public class IndirectAgentLBServiceImplTest {
.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null), .findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null),
Mockito.anyList(), Mockito.anyList(), Mockito.anyList()); Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
Assert.assertEquals(Arrays.asList(host1.getId(), host2.getId(), host3.getId(), host4.getId()), Assert.assertEquals(Arrays.asList(host1.getId(), host2.getId(), host3.getId(), host4.getId()),
agentMSLB.getOrderedHostIdList(DC_1_ID)); agentMSLB.getOrderedHostIdList(DC_1_ID, false));
} }
} }

View File

@ -1900,6 +1900,7 @@
"label.read.io": "Read (IO)", "label.read.io": "Read (IO)",
"label.readonly": "Read-Only", "label.readonly": "Read-Only",
"label.reason": "Reason", "label.reason": "Reason",
"label.rebalance": "Rebalance",
"label.reboot": "Reboot", "label.reboot": "Reboot",
"label.recent.deliveries": "Recent deliveries", "label.recent.deliveries": "Recent deliveries",
"label.receivedbytes": "Bytes received", "label.receivedbytes": "Bytes received",

View File

@ -75,6 +75,7 @@ export default {
message: 'message.cancel.maintenance', message: 'message.cancel.maintenance',
dataView: true, dataView: true,
popup: true, popup: true,
args: ['rebalance'],
show: (record, store) => { return ['PreparingForMaintenance', 'Maintenance'].includes(record.state) }, show: (record, store) => { return ['PreparingForMaintenance', 'Maintenance'].includes(record.state) },
mapping: { mapping: {
managementserverid: { managementserverid: {
@ -109,7 +110,6 @@ export default {
icon: 'close-circle-outlined', icon: 'close-circle-outlined',
label: 'label.cancel.shutdown', label: 'label.cancel.shutdown',
message: 'message.cancel.shutdown', message: 'message.cancel.shutdown',
docHelp: 'installguide/configuration.html#adding-a-zone',
dataView: true, dataView: true,
popup: true, popup: true,
show: (record, store) => { return ['PreparingForShutDown', 'ReadyToShutDown', 'ShuttingDown'].includes(record.state) }, show: (record, store) => { return ['PreparingForShutDown', 'ReadyToShutDown', 'ShuttingDown'].includes(record.state) },

View File

@ -1193,7 +1193,7 @@ export default {
this.getFirstIndexFocus() this.getFirstIndexFocus()
this.showAction = true this.showAction = true
const listIconForFillValues = ['copy-outlined', 'CopyOutlined', 'edit-outlined', 'EditOutlined', 'share-alt-outlined', 'ShareAltOutlined'] const listIconForFillValues = ['copy-outlined', 'CopyOutlined', 'edit-outlined', 'EditOutlined', 'share-alt-outlined', 'ShareAltOutlined', 'minus-square-outlined']
for (const param of this.currentAction.paramFields) { for (const param of this.currentAction.paramFields) {
if (param.type === 'list' && ['tags', 'hosttags', 'storagetags', 'storageaccessgroups', 'files'].includes(param.name)) { if (param.type === 'list' && ['tags', 'hosttags', 'storagetags', 'storageaccessgroups', 'files'].includes(param.name)) {
param.type = 'string' param.type = 'string'
@ -1422,6 +1422,8 @@ export default {
fieldValue = this.resource[fieldName] ? this.resource[fieldName] : null fieldValue = this.resource[fieldName] ? this.resource[fieldName] : null
if (fieldValue) { if (fieldValue) {
this.form[field.name] = fieldValue this.form[field.name] = fieldValue
} else if (field.type === 'boolean' && field.name === 'rebalance' && this.currentAction.api === 'cancelMaintenance') {
this.form[field.name] = true
} }
}) })
}, },
@ -1578,6 +1580,10 @@ export default {
} }
} }
if (['cancelMaintenance'].includes(action.api) && (params.rebalance === undefined || params.rebalance === null || params.rebalance === '')) {
params.rebalance = true
}
for (const key in values) { for (const key in values) {
const input = values[key] const input = values[key]
for (const param of action.params) { for (const param of action.params) {

View File

@ -45,6 +45,12 @@
</a-select-option> </a-select-option>
</a-select> </a-select>
</a-form-item> </a-form-item>
<a-form-item name="forced" ref="forced">
<template #label>
<tooltip-label :title="$t('label.forced')" :tooltip="prepareForMaintenanceApiParams.forced.description"/>
</template>
<a-switch v-model:checked="form.forced" />
</a-form-item>
<a-divider/> <a-divider/>
<a-alert type="error"> <a-alert type="error">
<template #message> <template #message>
@ -135,6 +141,7 @@ export default {
if (this.isPrepareForMaintenance && this.form.algorithm !== '') { if (this.isPrepareForMaintenance && this.form.algorithm !== '') {
params.algorithm = this.form.algorithm params.algorithm = this.form.algorithm
} }
params.forced = this.form.forced
api(this.action.currentAction.api, params).then(() => { api(this.action.currentAction.api, params).then(() => {
this.$message.success(this.$t(this.action.currentAction.label) + ' : ' + this.resource.name) this.$message.success(this.$t(this.action.currentAction.label) + ' : ' + this.resource.name)
this.closeAction() this.closeAction()

View File

@ -323,7 +323,7 @@ public abstract class NioConnection implements Callable<Boolean> {
logger.trace("Reading from: {}", socketChannel.socket().toString()); logger.trace("Reading from: {}", socketChannel.socket().toString());
final byte[] data = link.read(socketChannel); final byte[] data = link.read(socketChannel);
if (data == null) { if (data == null) {
logger.trace("Packet is incomplete. Waiting for more."); logger.trace("Packet is incomplete. Waiting for more.");
return; return;
} }
final Task task = _factory.create(Task.Type.DATA, link, data); final Task task = _factory.create(Task.Type.DATA, link, data);