mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Management Server - Prepare for Maintenance and Cancel Maintenance improvements (#10995)
* Management Server - Prepare for Maintenance and Cancel Maintenance improvements: - Added new setting 'management.server.maintenance.ignore.maintenance.hosts' to ignore hosts in maintenance states while preparing management server for maintenance. This skips agent transfer and agents count check for hosts in maintenance. - Rebalance indirect agents after cancel maintenance, using rebalance parameter in cancelMaintenance API - Force maintenance after maintenance window timeout, using forced parameter in prepareForMaintenance API. - Propagate 'indirect.agent.lb.check.interval' setting change to the host agents. * rebases fixes * code improvements, cleanup * [UI] Set rebalance true by default in cancel maintenance dialog * Update MS state after executing cluster cmd in the target MS, and some code improvements * code improvements * Ensure the host lb algorithm 'shuffle' is applied once before disabling the indirect agent lb check background task
This commit is contained in:
parent
16c60c7528
commit
be22bfe2c9
@ -453,22 +453,30 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||
certExecutor.schedule(new PostCertificateRenewalTask(this), 5, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
private void scheduleHostLBCheckerTask(final long checkInterval) {
|
||||
private void scheduleHostLBCheckerTask(final String lbAlgorithm, final long checkInterval) {
|
||||
String name = "HostLBCheckerTask";
|
||||
if (hostLbCheckExecutor != null && !hostLbCheckExecutor.isShutdown()) {
|
||||
logger.info("Shutting down the preferred host checker task {}", name);
|
||||
hostLbCheckExecutor.shutdown();
|
||||
try {
|
||||
if (!hostLbCheckExecutor.awaitTermination(1, TimeUnit.SECONDS)) {
|
||||
hostLbCheckExecutor.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
logger.debug("Forcing {} shutdown as it did not shutdown in the desired time due to: {}",
|
||||
logger.debug("Forcing the preferred host checker task {} shutdown as it did not shutdown in the desired time due to: {}",
|
||||
name, e.getMessage());
|
||||
hostLbCheckExecutor.shutdownNow();
|
||||
}
|
||||
}
|
||||
if (checkInterval > 0L) {
|
||||
logger.info("Scheduling preferred host task with host.lb.interval={}ms", checkInterval);
|
||||
if ("shuffle".equalsIgnoreCase(lbAlgorithm)) {
|
||||
logger.info("Scheduling the preferred host checker task to trigger once (to apply lb algorithm '{}') after host.lb.interval={} ms", lbAlgorithm, checkInterval);
|
||||
hostLbCheckExecutor = Executors.newSingleThreadScheduledExecutor((new NamedThreadFactory(name)));
|
||||
hostLbCheckExecutor.schedule(new PreferredHostCheckerTask(), checkInterval, TimeUnit.MILLISECONDS);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info("Scheduling a recurring preferred host checker task with lb algorithm '{}' and host.lb.interval={} ms", lbAlgorithm, checkInterval);
|
||||
hostLbCheckExecutor = Executors.newSingleThreadScheduledExecutor((new NamedThreadFactory(name)));
|
||||
hostLbCheckExecutor.scheduleAtFixedRate(new PreferredHostCheckerTask(), checkInterval, checkInterval,
|
||||
TimeUnit.MILLISECONDS);
|
||||
@ -928,7 +936,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||
return new SetupCertificateAnswer(true);
|
||||
}
|
||||
|
||||
private void processManagementServerList(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) {
|
||||
private void processManagementServerList(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval, final boolean triggerHostLB) {
|
||||
if (CollectionUtils.isNotEmpty(msList) && StringUtils.isNotEmpty(lbAlgorithm)) {
|
||||
try {
|
||||
final String newMSHosts = String.format("%s%s%s", com.cloud.utils.StringUtils.toCSVList(msList), IAgentShell.hostLbAlgorithmSeparator, lbAlgorithm);
|
||||
@ -941,22 +949,24 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||
}
|
||||
}
|
||||
shell.setAvoidHosts(avoidMsList);
|
||||
if ("shuffle".equals(lbAlgorithm)) {
|
||||
scheduleHostLBCheckerTask(0);
|
||||
} else {
|
||||
scheduleHostLBCheckerTask(shell.getLbCheckerInterval(lbCheckInterval));
|
||||
if (triggerHostLB) {
|
||||
logger.info("Triggering the preferred host checker task now");
|
||||
ScheduledExecutorService hostLbExecutor = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("HostLB-Executor"));
|
||||
hostLbExecutor.schedule(new PreferredHostCheckerTask(), 0, TimeUnit.MILLISECONDS);
|
||||
hostLbExecutor.shutdown();
|
||||
}
|
||||
scheduleHostLBCheckerTask(lbAlgorithm, shell.getLbCheckerInterval(lbCheckInterval));
|
||||
}
|
||||
|
||||
private Answer setupManagementServerList(final SetupMSListCommand cmd) {
|
||||
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
|
||||
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval(), cmd.getTriggerHostLb());
|
||||
return new SetupMSListAnswer(true);
|
||||
}
|
||||
|
||||
private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) {
|
||||
try {
|
||||
if (CollectionUtils.isNotEmpty(cmd.getMsList())) {
|
||||
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
|
||||
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval(), false);
|
||||
}
|
||||
Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("MigrateAgentConnection-Job")).schedule(() -> {
|
||||
migrateAgentConnection(cmd.getAvoidMsList());
|
||||
@ -1046,7 +1056,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||
}
|
||||
|
||||
verifyAgentArch(ready.getArch());
|
||||
processManagementServerList(ready.getMsHostList(), ready.getAvoidMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval());
|
||||
processManagementServerList(ready.getMsHostList(), ready.getAvoidMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval(), false);
|
||||
|
||||
logger.info("Ready command is processed for agent [id: {}, uuid: {}, name: {}]", getId(), getUuid(), getName());
|
||||
}
|
||||
|
||||
@ -40,7 +40,7 @@ public class OperationTimedoutException extends CloudException {
|
||||
boolean _isActive;
|
||||
|
||||
public OperationTimedoutException(Command[] cmds, long agentId, long seqId, int time, boolean isActive) {
|
||||
super("Commands " + seqId + " to Host " + agentId + " timed out after " + time);
|
||||
super("Commands " + seqId + " to Host " + agentId + " timed out after " + time + " secs");
|
||||
_agentId = agentId;
|
||||
_seqId = seqId;
|
||||
_time = time;
|
||||
|
||||
@ -76,6 +76,10 @@ public enum ResourceState {
|
||||
}
|
||||
}
|
||||
|
||||
public static List<ResourceState> s_maintenanceStates = List.of(ResourceState.Maintenance,
|
||||
ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance,
|
||||
ResourceState.ErrorInPrepareForMaintenance);
|
||||
|
||||
public ResourceState getNextState(Event a) {
|
||||
return s_fsm.getNextState(this, a);
|
||||
}
|
||||
@ -98,8 +102,7 @@ public enum ResourceState {
|
||||
}
|
||||
|
||||
public static boolean isMaintenanceState(ResourceState state) {
|
||||
return Arrays.asList(ResourceState.Maintenance, ResourceState.ErrorInMaintenance,
|
||||
ResourceState.PrepareForMaintenance, ResourceState.ErrorInPrepareForMaintenance).contains(state);
|
||||
return s_maintenanceStates.contains(state);
|
||||
}
|
||||
|
||||
public static boolean canAttemptMaintenance(ResourceState state) {
|
||||
|
||||
@ -441,6 +441,7 @@ public class ApiConstants {
|
||||
public static final String PUBLIC_END_PORT = "publicendport";
|
||||
public static final String PUBLIC_ZONE = "publiczone";
|
||||
public static final String PURGE_RESOURCES = "purgeresources";
|
||||
public static final String REBALANCE = "rebalance";
|
||||
public static final String RECEIVED_BYTES = "receivedbytes";
|
||||
public static final String RECONNECT = "reconnect";
|
||||
public static final String RECOVER = "recover";
|
||||
|
||||
@ -46,7 +46,7 @@ public class PatchSystemVMCmd extends BaseAsyncCmd {
|
||||
@Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN,
|
||||
description = "If true, initiates copy of scripts and restart of the agent, even if the scripts version matches." +
|
||||
"To be used with ID parameter only")
|
||||
private Boolean force;
|
||||
private Boolean forced;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
/////////////////// Accessors ///////////////////////
|
||||
@ -58,7 +58,7 @@ public class PatchSystemVMCmd extends BaseAsyncCmd {
|
||||
}
|
||||
|
||||
public boolean isForced() {
|
||||
return force != null && force;
|
||||
return forced != null && forced;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
|
||||
@ -29,13 +29,15 @@ public class SetupMSListCommand extends Command {
|
||||
private List<String> avoidMsList;
|
||||
private String lbAlgorithm;
|
||||
private Long lbCheckInterval;
|
||||
private Boolean triggerHostLb;
|
||||
|
||||
public SetupMSListCommand(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) {
|
||||
public SetupMSListCommand(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval, final Boolean triggerHostLb) {
|
||||
super();
|
||||
this.msList = msList;
|
||||
this.avoidMsList = avoidMsList;
|
||||
this.lbAlgorithm = lbAlgorithm;
|
||||
this.lbCheckInterval = lbCheckInterval;
|
||||
this.triggerHostLb = triggerHostLb;
|
||||
}
|
||||
|
||||
public List<String> getMsList() {
|
||||
@ -54,9 +56,12 @@ public class SetupMSListCommand extends Command {
|
||||
return lbCheckInterval;
|
||||
}
|
||||
|
||||
public boolean getTriggerHostLb() {
|
||||
return triggerHostLb;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean executeInSequence() {
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -171,5 +171,5 @@ public interface AgentManager {
|
||||
|
||||
void propagateChangeToAgents(Map<String, String> params);
|
||||
|
||||
boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs);
|
||||
boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs, boolean excludeHostsInMaintenance);
|
||||
}
|
||||
|
||||
@ -273,8 +273,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||
|
||||
_executor = new ThreadPoolExecutor(agentTaskThreads, agentTaskThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentTaskPool"));
|
||||
|
||||
initConnectExecutor();
|
||||
|
||||
maxConcurrentNewAgentConnections = RemoteAgentMaxConcurrentNewConnections.value();
|
||||
|
||||
_connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10,
|
||||
@ -828,6 +826,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||
return true;
|
||||
}
|
||||
|
||||
initConnectExecutor();
|
||||
startDirectlyConnectedHosts(false);
|
||||
|
||||
if (_connection != null) {
|
||||
@ -2193,7 +2192,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) {
|
||||
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@ -42,6 +42,7 @@ import javax.naming.ConfigurationException;
|
||||
import javax.net.ssl.SSLContext;
|
||||
import javax.net.ssl.SSLEngine;
|
||||
|
||||
import com.cloud.resource.ResourceState;
|
||||
import org.apache.cloudstack.ca.CAManager;
|
||||
import org.apache.cloudstack.framework.config.ConfigDepot;
|
||||
import org.apache.cloudstack.framework.config.ConfigKey;
|
||||
@ -431,10 +432,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
ch = connectToPeer(peer, ch);
|
||||
if (ch == null) {
|
||||
try {
|
||||
logD(bytes, "Unable to route to peer: " + Request.parse(bytes));
|
||||
logD(bytes, "Unable to establish connection to route to peer: " + Request.parse(bytes));
|
||||
} catch (ClassNotFoundException | UnsupportedVersionException e) {
|
||||
// Request.parse thrown exception when we try to log it, log as much as we can
|
||||
logD(bytes, "Unable to route to peer, and Request.parse further caught exception" + e.getMessage());
|
||||
logD(bytes, "Unable to establish connection to route to peer, and Request.parse further caught exception" + e.getMessage());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -643,7 +644,6 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
final Link link = task.getLink();
|
||||
|
||||
if (Request.fromServer(data)) {
|
||||
|
||||
final AgentAttache agent = findAttache(hostId);
|
||||
|
||||
if (Request.isControl(data)) {
|
||||
@ -691,7 +691,6 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
cancel(Long.toString(Request.getManagementServerId(data)), hostId, Request.getSequence(data), e.getMessage());
|
||||
}
|
||||
} else {
|
||||
|
||||
final long mgmtId = Request.getManagementServerId(data);
|
||||
if (mgmtId != -1 && mgmtId != _nodeId) {
|
||||
routeToPeer(Long.toString(mgmtId), data);
|
||||
@ -1352,7 +1351,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) {
|
||||
logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance");
|
||||
try {
|
||||
managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm());
|
||||
managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm(), ((PrepareForMaintenanceManagementServerHostCommand) cmd).isForced());
|
||||
return "Successfully prepared for maintenance";
|
||||
} catch(CloudRuntimeException e) {
|
||||
return e.getMessage();
|
||||
@ -1399,14 +1398,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) {
|
||||
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
|
||||
if (timeoutDurationInMs <= 0) {
|
||||
logger.debug("Not transferring direct agents from management server node {} (id: {}) to other nodes, invalid timeout duration", fromMsId, fromMsUuid);
|
||||
return false;
|
||||
}
|
||||
|
||||
long transferStartTimeInMs = System.currentTimeMillis();
|
||||
if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) {
|
||||
if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId, excludeHostsInMaintenance))) {
|
||||
logger.info("No direct agent hosts available on management server node {} (id: {}), to transfer", fromMsId, fromMsUuid);
|
||||
return true;
|
||||
}
|
||||
@ -1421,7 +1420,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
int agentTransferFailedCount = 0;
|
||||
List<DataCenterVO> dataCenterList = dcDao.listAll();
|
||||
for (DataCenterVO dc : dataCenterList) {
|
||||
List<HostVO> directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId());
|
||||
List<HostVO> directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId(), excludeHostsInMaintenance);
|
||||
if (CollectionUtils.isEmpty(directAgentHostsInDc)) {
|
||||
continue;
|
||||
}
|
||||
@ -1455,9 +1454,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
return (agentTransferFailedCount == 0);
|
||||
}
|
||||
|
||||
private List<HostVO> getDirectAgentHosts(long msId) {
|
||||
private List<HostVO> getDirectAgentHosts(long msId, boolean excludeHostsInMaintenance) {
|
||||
List<HostVO> directAgentHosts = new ArrayList<>();
|
||||
List<HostVO> hosts = _hostDao.listHostsByMs(msId);
|
||||
List<ResourceState> statesToExclude = excludeHostsInMaintenance ? ResourceState.s_maintenanceStates : List.of();
|
||||
List<HostVO> hosts = _hostDao.listHostsByMsResourceState(msId, statesToExclude);
|
||||
for (HostVO host : hosts) {
|
||||
AgentAttache agent = findAttache(host.getId());
|
||||
if (agent instanceof DirectAgentAttache) {
|
||||
@ -1468,9 +1468,11 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
return directAgentHosts;
|
||||
}
|
||||
|
||||
private List<HostVO> getDirectAgentHostsInDc(long msId, long dcId) {
|
||||
private List<HostVO> getDirectAgentHostsInDc(long msId, long dcId, boolean excludeHostsInMaintenance) {
|
||||
List<HostVO> directAgentHosts = new ArrayList<>();
|
||||
List<HostVO> hosts = _hostDao.listHostsByMsAndDc(msId, dcId);
|
||||
// To exclude maintenance states use values from ResourceState as source of truth
|
||||
List<ResourceState> statesToExclude = excludeHostsInMaintenance ? ResourceState.s_maintenanceStates : List.of();
|
||||
List<HostVO> hosts = _hostDao.listHostsByMsDcResourceState(msId, dcId, statesToExclude);
|
||||
for (HostVO host : hosts) {
|
||||
AgentAttache agent = findAttache(host.getId());
|
||||
if (agent instanceof DirectAgentAttache) {
|
||||
@ -1506,6 +1508,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
public void onManagementServerCancelPreparingForMaintenance() {
|
||||
logger.debug("Management server cancel preparing for maintenance");
|
||||
super.onManagementServerPreparingForMaintenance();
|
||||
|
||||
// needed for the case when Management Server in Preparing For Maintenance but didn't go to Maintenance state
|
||||
// (where this variable will be reset)
|
||||
_agentLbHappened = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -177,14 +177,24 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
|
||||
|
||||
List<HostVO> listHostsByMsAndDc(long msId, long dcId);
|
||||
|
||||
List<HostVO> listHostsByMsDcResourceState(long msId, long dcId, List<ResourceState> excludedResourceStates);
|
||||
|
||||
List<HostVO> listHostsByMs(long msId);
|
||||
|
||||
List<HostVO> listHostsByMsResourceState(long msId, List<ResourceState> excludedResourceStates);
|
||||
|
||||
/**
|
||||
* Retrieves the number of hosts/agents this {@see ManagementServer} has responsibility over.
|
||||
* @param msId the id of the {@see ManagementServer}
|
||||
* @return the number of hosts/agents this {@see ManagementServer} has responsibility over
|
||||
* Count Hosts by given Management Server, Host and Hypervisor Types,
|
||||
* and exclude Hosts with given Resource States.
|
||||
*
|
||||
* @param msId Management Server Id
|
||||
* @param excludedResourceStates Resource States to be excluded
|
||||
* @param hostTypes Host Types
|
||||
* @param hypervisorTypes Hypervisor Types
|
||||
* @return Hosts count
|
||||
*/
|
||||
int countByMs(long msId);
|
||||
int countHostsByMsResourceStateTypeAndHypervisorType(long msId, List<ResourceState> excludedResourceStates,
|
||||
List<Type> hostTypes, List<HypervisorType> hypervisorTypes);
|
||||
|
||||
/**
|
||||
* Retrieves the host ids/agents this {@see ManagementServer} has responsibility over.
|
||||
|
||||
@ -72,6 +72,7 @@ import com.cloud.utils.db.GenericDaoBase;
|
||||
import com.cloud.utils.db.GenericSearchBuilder;
|
||||
import com.cloud.utils.db.JoinBuilder;
|
||||
import com.cloud.utils.db.JoinBuilder.JoinType;
|
||||
import com.cloud.utils.db.QueryBuilder;
|
||||
import com.cloud.utils.db.SearchBuilder;
|
||||
import com.cloud.utils.db.SearchCriteria;
|
||||
import com.cloud.utils.db.SearchCriteria.Func;
|
||||
@ -1600,6 +1601,17 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||
return listBy(sc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HostVO> listHostsByMsDcResourceState(long msId, long dcId, List<ResourceState> excludedResourceStates) {
|
||||
QueryBuilder<HostVO> sc = QueryBuilder.create(HostVO.class);
|
||||
sc.and(sc.entity().getManagementServerId(), Op.EQ, msId);
|
||||
sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId);
|
||||
if (CollectionUtils.isNotEmpty(excludedResourceStates)) {
|
||||
sc.and(sc.entity().getResourceState(), Op.NIN, excludedResourceStates.toArray());
|
||||
}
|
||||
return listBy(sc.create());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HostVO> listHostsByMs(long msId) {
|
||||
SearchCriteria<HostVO> sc = ResponsibleMsSearch.create();
|
||||
@ -1608,10 +1620,32 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countByMs(long msId) {
|
||||
SearchCriteria<HostVO> sc = ResponsibleMsSearch.create();
|
||||
sc.setParameters("managementServerId", msId);
|
||||
return getCount(sc);
|
||||
public List<HostVO> listHostsByMsResourceState(long msId, List<ResourceState> excludedResourceStates) {
|
||||
QueryBuilder<HostVO> sc = QueryBuilder.create(HostVO.class);
|
||||
sc.and(sc.entity().getManagementServerId(), Op.EQ, msId);
|
||||
if (CollectionUtils.isNotEmpty(excludedResourceStates)) {
|
||||
sc.and(sc.entity().getResourceState(), Op.NIN, excludedResourceStates.toArray());
|
||||
}
|
||||
return listBy(sc.create());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int countHostsByMsResourceStateTypeAndHypervisorType(long msId,
|
||||
List<ResourceState> excludedResourceStates,
|
||||
List<Type> hostTypes,
|
||||
List<HypervisorType> hypervisorTypes) {
|
||||
QueryBuilder<HostVO> sc = QueryBuilder.create(HostVO.class);
|
||||
sc.and(sc.entity().getManagementServerId(), Op.EQ, msId);
|
||||
if (CollectionUtils.isNotEmpty(excludedResourceStates)) {
|
||||
sc.and(sc.entity().getResourceState(), Op.NIN, excludedResourceStates.toArray());
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(hostTypes)) {
|
||||
sc.and(sc.entity().getType(), Op.IN, hostTypes.toArray());
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(hypervisorTypes)) {
|
||||
sc.and(sc.entity().getHypervisorType(), Op.IN, hypervisorTypes.toArray());
|
||||
}
|
||||
return getCount(sc.create());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -70,9 +70,11 @@ public interface IndirectAgentLB {
|
||||
*/
|
||||
Long getLBPreferredHostCheckInterval(Long clusterId);
|
||||
|
||||
void propagateMSListToAgents();
|
||||
void propagateMSListToAgents(boolean triggerHostLB);
|
||||
|
||||
boolean haveAgentBasedHosts(long msId);
|
||||
void propagateMSListToAgentsInCluster(Long clusterId);
|
||||
|
||||
boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs);
|
||||
boolean haveAgentBasedHosts(long msId, boolean excludeHostsInMaintenance);
|
||||
|
||||
boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs, boolean excludeHostsInMaintenance);
|
||||
}
|
||||
|
||||
@ -18,12 +18,15 @@
|
||||
package org.apache.cloudstack.api.command;
|
||||
|
||||
import org.apache.cloudstack.api.APICommand;
|
||||
import org.apache.cloudstack.api.ApiConstants;
|
||||
import org.apache.cloudstack.api.BaseCmd;
|
||||
|
||||
import com.cloud.user.Account;
|
||||
|
||||
import org.apache.cloudstack.api.Parameter;
|
||||
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
|
||||
import org.apache.cloudstack.acl.RoleType;
|
||||
import org.apache.commons.lang3.BooleanUtils;
|
||||
|
||||
@APICommand(name = CancelMaintenanceCmd.APINAME,
|
||||
description = "Cancels maintenance of the management server",
|
||||
@ -36,6 +39,13 @@ public class CancelMaintenanceCmd extends BaseMSMaintenanceActionCmd {
|
||||
|
||||
public static final String APINAME = "cancelMaintenance";
|
||||
|
||||
@Parameter(name = ApiConstants.REBALANCE, type = CommandType.BOOLEAN, description = "Rebalance agents (applicable for indirect agents, ensure the settings 'host' and 'indirect.agent.lb.algorithm' are properly configured) after cancelling maintenance, default is true")
|
||||
private Boolean rebalance;
|
||||
|
||||
public boolean getRebalance() {
|
||||
return BooleanUtils.toBooleanDefaultIfNull(rebalance, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCommandName() {
|
||||
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
|
||||
|
||||
@ -26,6 +26,7 @@ import com.cloud.user.Account;
|
||||
|
||||
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
|
||||
import org.apache.cloudstack.acl.RoleType;
|
||||
import org.apache.commons.lang3.BooleanUtils;
|
||||
|
||||
@APICommand(name = PrepareForMaintenanceCmd.APINAME,
|
||||
description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents",
|
||||
@ -40,6 +41,9 @@ public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd {
|
||||
" when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered")
|
||||
private String algorithm;
|
||||
|
||||
@Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN, description = "Force management server to maintenance after the maintenance window timeout, default is false")
|
||||
private Boolean forced;
|
||||
|
||||
public String getAlgorithm() {
|
||||
return algorithm;
|
||||
}
|
||||
@ -48,6 +52,10 @@ public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd {
|
||||
this.algorithm = algorithm;
|
||||
}
|
||||
|
||||
public boolean isForced() {
|
||||
return BooleanUtils.toBooleanDefaultIfNull(forced, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCommandName() {
|
||||
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
|
||||
|
||||
@ -40,6 +40,15 @@ public interface ManagementServerMaintenanceManager {
|
||||
ConfigKey.Scope.Global,
|
||||
null);
|
||||
|
||||
ConfigKey<Boolean> ManagementServerMaintenanceIgnoreMaintenanceHosts = new ConfigKey<>(Boolean.class,
|
||||
"management.server.maintenance.ignore.maintenance.hosts",
|
||||
"Advanced",
|
||||
String.valueOf(Boolean.FALSE),
|
||||
"Host in Maintenance state can sometimes block Management Server to go to Maintenance; this setting skips Host(s) in Maintenance state during Management Server Maintenance, default: false.",
|
||||
true,
|
||||
ConfigKey.Scope.Global,
|
||||
null);
|
||||
|
||||
void registerListener(ManagementServerMaintenanceListener listener);
|
||||
|
||||
void unregisterListener(ManagementServerMaintenanceListener listener);
|
||||
@ -76,14 +85,14 @@ public interface ManagementServerMaintenanceManager {
|
||||
// Indicates whether the current management server is preparing to maintenance
|
||||
boolean isPreparingForMaintenance();
|
||||
|
||||
void resetPreparingForMaintenance();
|
||||
void resetMaintenanceParams();
|
||||
|
||||
long getMaintenanceStartTime();
|
||||
|
||||
String getLbAlgorithm();
|
||||
|
||||
// Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs
|
||||
void prepareForMaintenance(String lbAlorithm);
|
||||
void prepareForMaintenance(String lbAlorithm, boolean forced);
|
||||
|
||||
// Cancels maintenance of the current management server
|
||||
void cancelMaintenance();
|
||||
|
||||
@ -26,7 +26,9 @@ import java.util.concurrent.TimeUnit;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
import com.cloud.resource.ResourceState;
|
||||
import org.apache.cloudstack.agent.lb.IndirectAgentLB;
|
||||
import org.apache.cloudstack.agent.lb.IndirectAgentLBServiceImpl;
|
||||
import org.apache.cloudstack.api.command.CancelMaintenanceCmd;
|
||||
import org.apache.cloudstack.api.command.CancelShutdownCmd;
|
||||
import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd;
|
||||
@ -39,6 +41,7 @@ import org.apache.cloudstack.framework.config.ConfigKey;
|
||||
import org.apache.cloudstack.framework.config.Configurable;
|
||||
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
|
||||
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
|
||||
import org.apache.cloudstack.management.ManagementServerHost;
|
||||
import org.apache.cloudstack.management.ManagementServerHost.State;
|
||||
import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand;
|
||||
import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand;
|
||||
@ -196,13 +199,20 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
return preparingForShutdown;
|
||||
}
|
||||
|
||||
private void resetShutdownParams() {
|
||||
logger.debug("Resetting shutdown params");
|
||||
preparingForShutdown = false;
|
||||
shutdownTriggered = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPreparingForMaintenance() {
|
||||
return preparingForMaintenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resetPreparingForMaintenance() {
|
||||
public void resetMaintenanceParams() {
|
||||
logger.debug("Resetting maintenance params");
|
||||
preparingForMaintenance = false;
|
||||
maintenanceStartTime = 0;
|
||||
lbAlgorithm = null;
|
||||
@ -235,6 +245,11 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
}
|
||||
this.shutdownTriggered = true;
|
||||
prepareForShutdown(true);
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
throw new CloudRuntimeException("Invalid node id for the management server");
|
||||
}
|
||||
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
|
||||
}
|
||||
|
||||
private void prepareForShutdown(boolean postTrigger) {
|
||||
@ -251,29 +266,38 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
|
||||
this.preparingForShutdown = true;
|
||||
jobManager.disableAsyncJobs();
|
||||
waitForPendingJobs();
|
||||
waitForPendingJobs(false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepareForShutdown() {
|
||||
prepareForShutdown(false);
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
throw new CloudRuntimeException("Invalid node id for the management server");
|
||||
}
|
||||
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cancelShutdown() {
|
||||
if (!this.preparingForShutdown) {
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
throw new CloudRuntimeException("Invalid node id for the management server");
|
||||
}
|
||||
if (!this.preparingForShutdown && !(State.PreparingForShutDown.equals(msHost.getState()) || State.ReadyToShutDown.equals(msHost.getState()))) {
|
||||
throw new CloudRuntimeException("Shutdown has not been triggered");
|
||||
}
|
||||
|
||||
this.preparingForShutdown = false;
|
||||
this.shutdownTriggered = false;
|
||||
resetPreparingForMaintenance();
|
||||
resetShutdownParams();
|
||||
resetMaintenanceParams();
|
||||
jobManager.enableAsyncJobs();
|
||||
cancelWaitForPendingJobs();
|
||||
msHostDao.updateState(msHost.getId(), State.Up);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prepareForMaintenance(String lbAlorithm) {
|
||||
public void prepareForMaintenance(String lbAlorithm, boolean forced) {
|
||||
if (this.preparingForShutdown) {
|
||||
throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again");
|
||||
}
|
||||
@ -281,41 +305,57 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
if (this.preparingForMaintenance) {
|
||||
throw new CloudRuntimeException("Maintenance has already been initiated");
|
||||
}
|
||||
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
throw new CloudRuntimeException("Invalid node id for the management server");
|
||||
}
|
||||
this.preparingForMaintenance = true;
|
||||
this.maintenanceStartTime = System.currentTimeMillis();
|
||||
this.lbAlgorithm = lbAlorithm;
|
||||
jobManager.disableAsyncJobs();
|
||||
onPreparingForMaintenance();
|
||||
waitForPendingJobs();
|
||||
waitForPendingJobs(forced);
|
||||
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cancelMaintenance() {
|
||||
if (!this.preparingForMaintenance) {
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
throw new CloudRuntimeException("Invalid node id for the management server");
|
||||
}
|
||||
if (!this.preparingForMaintenance && !(State.Maintenance.equals(msHost.getState()) || State.PreparingForMaintenance.equals(msHost.getState()))) {
|
||||
throw new CloudRuntimeException("Maintenance has not been initiated");
|
||||
}
|
||||
resetPreparingForMaintenance();
|
||||
this.preparingForShutdown = false;
|
||||
this.shutdownTriggered = false;
|
||||
resetMaintenanceParams();
|
||||
resetShutdownParams();
|
||||
jobManager.enableAsyncJobs();
|
||||
cancelWaitForPendingJobs();
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost != null) {
|
||||
if (State.PreparingForMaintenance.equals(msHost.getState())) {
|
||||
onCancelPreparingForMaintenance();
|
||||
}
|
||||
if (State.Maintenance.equals(msHost.getState())) {
|
||||
onCancelMaintenance();
|
||||
}
|
||||
msHostDao.updateState(msHost.getId(), State.Up);
|
||||
ScheduledExecutorService cancelMaintenanceService = Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("CancelMaintenance-Job"));
|
||||
cancelMaintenanceService.schedule(() -> {
|
||||
cancelMaintenanceTask(msHost.getState());
|
||||
}, 0, TimeUnit.SECONDS);
|
||||
cancelMaintenanceService.shutdown();
|
||||
}
|
||||
|
||||
private void cancelMaintenanceTask(ManagementServerHost.State msState) {
|
||||
if (State.PreparingForMaintenance.equals(msState)) {
|
||||
onCancelPreparingForMaintenance();
|
||||
}
|
||||
if (State.Maintenance.equals(msState)) {
|
||||
onCancelMaintenance();
|
||||
}
|
||||
}
|
||||
|
||||
private void waitForPendingJobs() {
|
||||
private void waitForPendingJobs(boolean forceMaintenance) {
|
||||
cancelWaitForPendingJobs();
|
||||
pendingJobsCheckTask = Executors.newScheduledThreadPool(1, new NamedThreadFactory("PendingJobsCheck"));
|
||||
long pendingJobsCheckDelayInSecs = 1L; // 1 sec
|
||||
long pendingJobsCheckPeriodInSecs = 3L; // every 3 secs, check more frequently for pending jobs
|
||||
pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS);
|
||||
boolean ignoreMaintenanceHosts = ManagementServerMaintenanceIgnoreMaintenanceHosts.value();
|
||||
pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this, ignoreMaintenanceHosts, forceMaintenance), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -349,7 +389,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid());
|
||||
executeCmd(msHost, cmds);
|
||||
|
||||
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
|
||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||
}
|
||||
|
||||
@ -375,7 +414,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid());
|
||||
executeCmd(msHost, cmds);
|
||||
|
||||
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
|
||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||
}
|
||||
|
||||
@ -395,7 +433,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid());
|
||||
executeCmd(msHost, cmds);
|
||||
|
||||
msHostDao.updateState(msHost.getId(), State.Up);
|
||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||
}
|
||||
|
||||
@ -426,7 +463,8 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
|
||||
checkAnyMsInPreparingStates("prepare for maintenance");
|
||||
|
||||
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) {
|
||||
boolean ignoreMaintenanceHosts = ManagementServerMaintenanceIgnoreMaintenanceHosts.value();
|
||||
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid(), ignoreMaintenanceHosts)) {
|
||||
List<String> indirectAgentMsList = indirectAgentLB.getManagementServerList();
|
||||
indirectAgentMsList.remove(msHost.getServiceIP());
|
||||
List<String> nonUpMsList = msHostDao.listNonUpStateMsIPs();
|
||||
@ -437,10 +475,9 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
}
|
||||
|
||||
final Command[] cmds = new Command[1];
|
||||
cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm());
|
||||
cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm(), cmd.isForced());
|
||||
executeCmd(msHost, cmds);
|
||||
|
||||
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
|
||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||
}
|
||||
|
||||
@ -460,7 +497,11 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid());
|
||||
executeCmd(msHost, cmds);
|
||||
|
||||
msHostDao.updateState(msHost.getId(), State.Up);
|
||||
if (cmd.getRebalance()) {
|
||||
logger.info("Propagate MS list and rebalance indirect agents");
|
||||
indirectAgentLB.propagateMSListToAgents(true);
|
||||
}
|
||||
|
||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||
}
|
||||
|
||||
@ -485,12 +526,14 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
|
||||
@Override
|
||||
public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) {
|
||||
resetPreparingForMaintenance();
|
||||
this.preparingForShutdown = false;
|
||||
this.shutdownTriggered = false;
|
||||
resetMaintenanceParams();
|
||||
resetShutdownParams();
|
||||
jobManager.enableAsyncJobs();
|
||||
if (msHost == null) {
|
||||
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
throw new CloudRuntimeException("Invalid node id for the management server");
|
||||
}
|
||||
}
|
||||
onCancelPreparingForMaintenance();
|
||||
msHostDao.updateState(msHost.getId(), State.Up);
|
||||
@ -546,17 +589,21 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
@Override
|
||||
public ConfigKey<?>[] getConfigKeys() {
|
||||
return new ConfigKey<?>[]{
|
||||
ManagementServerMaintenanceTimeoutInMins
|
||||
ManagementServerMaintenanceTimeoutInMins, ManagementServerMaintenanceIgnoreMaintenanceHosts
|
||||
};
|
||||
}
|
||||
|
||||
private final class CheckPendingJobsTask extends ManagedContextRunnable {
|
||||
|
||||
private ManagementServerMaintenanceManager managementServerMaintenanceManager;
|
||||
private boolean ignoreMaintenanceHosts = false;
|
||||
private boolean agentsTransferTriggered = false;
|
||||
private boolean forceMaintenance = false;
|
||||
|
||||
public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager) {
|
||||
public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager, boolean ignoreMaintenanceHosts, boolean forceMaintenance) {
|
||||
this.managementServerMaintenanceManager = managementServerMaintenanceManager;
|
||||
this.ignoreMaintenanceHosts = ignoreMaintenanceHosts;
|
||||
this.forceMaintenance = forceMaintenance;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -570,6 +617,19 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
}
|
||||
|
||||
if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) {
|
||||
if (forceMaintenance) {
|
||||
logger.debug("Maintenance window timeout, MS is forced to Maintenance Mode");
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
logger.warn("Unable to find the management server, invalid node id");
|
||||
return;
|
||||
}
|
||||
msHostDao.updateState(msHost.getId(), State.Maintenance);
|
||||
managementServerMaintenanceManager.onMaintenance();
|
||||
managementServerMaintenanceManager.cancelWaitForPendingJobs();
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug("Maintenance window timeout, terminating the pending jobs check timer task");
|
||||
managementServerMaintenanceManager.cancelPreparingForMaintenance(null);
|
||||
managementServerMaintenanceManager.cancelWaitForPendingJobs();
|
||||
@ -577,9 +637,11 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
}
|
||||
|
||||
long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId());
|
||||
int totalAgents = hostDao.countByMs(ManagementServerNode.getManagementServerId());
|
||||
String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]",
|
||||
managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents);
|
||||
|
||||
long totalAgents = totalAgentsInMs();
|
||||
|
||||
String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] preparingForShutdown[%b] preparingForMaintenance[%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]",
|
||||
managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isPreparingForShutdown(), managementServerMaintenanceManager.isPreparingForMaintenance(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents);
|
||||
logger.debug(msg);
|
||||
|
||||
if (totalPendingJobs > 0) {
|
||||
@ -594,6 +656,10 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
}
|
||||
if (managementServerMaintenanceManager.isPreparingForMaintenance()) {
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
logger.warn("Unable to find the management server, invalid node id");
|
||||
return;
|
||||
}
|
||||
if (totalAgents == 0) {
|
||||
logger.info("MS is in Maintenance Mode");
|
||||
msHostDao.updateState(msHost.getId(), State.Maintenance);
|
||||
@ -609,7 +675,7 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
|
||||
agentsTransferTriggered = true;
|
||||
logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
|
||||
boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs());
|
||||
boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs(), ignoreMaintenanceHosts);
|
||||
if (!agentsMigrated) {
|
||||
logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
|
||||
managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost);
|
||||
@ -617,18 +683,20 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
return;
|
||||
}
|
||||
|
||||
if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs())) {
|
||||
if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs(), ignoreMaintenanceHosts)) {
|
||||
logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
|
||||
managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost);
|
||||
managementServerMaintenanceManager.cancelWaitForPendingJobs();
|
||||
return;
|
||||
}
|
||||
} else if (managementServerMaintenanceManager.isPreparingForShutdown()) {
|
||||
logger.info("MS is Ready To Shutdown");
|
||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||
if (msHost == null) {
|
||||
logger.warn("Unable to find the management server, invalid node id");
|
||||
return;
|
||||
}
|
||||
msHostDao.updateState(msHost.getId(), State.ReadyToShutDown);
|
||||
managementServerMaintenanceManager.cancelWaitForPendingJobs();
|
||||
return;
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
logger.error("Error trying to check/run pending jobs task", e);
|
||||
@ -648,5 +716,14 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||
long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs;
|
||||
return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0;
|
||||
}
|
||||
|
||||
private long totalAgentsInMs() {
|
||||
/* Any Host in Maintenance state could block moving Management Server to Maintenance state, exclude those Hosts from total agents count
|
||||
* To exclude maintenance states use values from ResourceState as source of truth
|
||||
*/
|
||||
List<ResourceState> statesToExclude = ignoreMaintenanceHosts ? ResourceState.s_maintenanceStates : List.of();
|
||||
return hostDao.countHostsByMsResourceStateTypeAndHypervisorType(ManagementServerNode.getManagementServerId(), statesToExclude,
|
||||
IndirectAgentLBServiceImpl.agentValidHostTypes, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,17 +20,23 @@ package org.apache.cloudstack.maintenance.command;
|
||||
|
||||
public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {
|
||||
String lbAlgorithm;
|
||||
boolean forced;
|
||||
|
||||
public PrepareForMaintenanceManagementServerHostCommand(long msId) {
|
||||
super(msId);
|
||||
}
|
||||
|
||||
public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm) {
|
||||
public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm, boolean forced) {
|
||||
super(msId);
|
||||
this.lbAlgorithm = lbAlgorithm;
|
||||
this.forced = forced;
|
||||
}
|
||||
|
||||
public String getLbAlgorithm() {
|
||||
return lbAlgorithm;
|
||||
}
|
||||
|
||||
public boolean isForced() {
|
||||
return forced;
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,6 +92,8 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
@Test
|
||||
public void prepareForShutdown() {
|
||||
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
spy.prepareForShutdown();
|
||||
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||
|
||||
@ -106,6 +108,9 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
|
||||
@Test
|
||||
public void cancelShutdown() {
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
spy.cancelShutdown();
|
||||
});
|
||||
@ -115,6 +120,8 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
public void triggerShutdown() {
|
||||
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||
Mockito.lenient().when(spy.isShutdownTriggered()).thenReturn(false);
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
spy.triggerShutdown();
|
||||
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||
|
||||
@ -305,43 +312,44 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
@Test
|
||||
public void prepareForMaintenanceAndCancelFromMaintenanceState() {
|
||||
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||
spy.prepareForMaintenance("static");
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
spy.prepareForMaintenance("static", false);
|
||||
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
spy.prepareForMaintenance("static");
|
||||
spy.prepareForMaintenance("static", false);
|
||||
});
|
||||
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Maintenance);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
|
||||
spy.cancelMaintenance();
|
||||
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
||||
Mockito.verify(spy, Mockito.times(1)).onCancelMaintenance();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void prepareForMaintenanceAndCancelFromPreparingForMaintenanceState() {
|
||||
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||
spy.prepareForMaintenance("static");
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
spy.prepareForMaintenance("static", false);
|
||||
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
spy.prepareForMaintenance("static");
|
||||
spy.prepareForMaintenance("static", false);
|
||||
});
|
||||
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.PreparingForMaintenance);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
|
||||
spy.cancelMaintenance();
|
||||
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
||||
Mockito.verify(spy, Mockito.times(1)).onCancelPreparingForMaintenance();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void cancelMaintenance() {
|
||||
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
spy.cancelMaintenance();
|
||||
});
|
||||
@ -455,7 +463,7 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
Mockito.when(msHostDao.listNonUpStateMsIPs()).thenReturn(new ArrayList<>());
|
||||
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(true);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(true);
|
||||
Mockito.when(indirectAgentLBMock.getManagementServerList()).thenReturn(new ArrayList<>());
|
||||
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
@ -476,7 +484,7 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(false);
|
||||
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn(null);
|
||||
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
@ -497,7 +505,7 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(false);
|
||||
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Failed");
|
||||
|
||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||
@ -518,7 +526,7 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false);
|
||||
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong(), anyBoolean())).thenReturn(false);
|
||||
Mockito.when(hostDao.listByMs(anyLong())).thenReturn(new ArrayList<>());
|
||||
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");
|
||||
|
||||
|
||||
@ -287,6 +287,7 @@ import com.cloud.user.dao.AccountDao;
|
||||
import com.cloud.user.dao.UserDao;
|
||||
import com.cloud.utils.NumbersUtil;
|
||||
import com.cloud.utils.Pair;
|
||||
import com.cloud.utils.Ternary;
|
||||
import com.cloud.utils.UriUtils;
|
||||
import com.cloud.utils.component.ManagerBase;
|
||||
import com.cloud.utils.crypt.DBEncryptionUtil;
|
||||
@ -631,21 +632,30 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
|
||||
private void initMessageBusListener() {
|
||||
messageBus.subscribe(EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, new MessageSubscriber() {
|
||||
@Override
|
||||
public void onPublishMessage(String serderAddress, String subject, Object args) {
|
||||
String globalSettingUpdated = (String) args;
|
||||
if (StringUtils.isEmpty(globalSettingUpdated)) {
|
||||
public void onPublishMessage(String senderAddress, String subject, Object args) {
|
||||
Ternary<String, ConfigKey.Scope, Long> settingUpdated = (Ternary<String, ConfigKey.Scope, Long>) args;
|
||||
String settingNameUpdated = settingUpdated.first();
|
||||
if (StringUtils.isEmpty(settingNameUpdated)) {
|
||||
return;
|
||||
}
|
||||
if (globalSettingUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) ||
|
||||
globalSettingUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) {
|
||||
_indirectAgentLB.propagateMSListToAgents();
|
||||
} else if (globalSettingUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())
|
||||
|| globalSettingUpdated.equals(Config.MigrateWait.toString())) {
|
||||
if (settingNameUpdated.equals(ApiServiceConfiguration.ManagementServerAddresses.key()) ||
|
||||
settingNameUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBAlgorithm.key())) {
|
||||
_indirectAgentLB.propagateMSListToAgents(false);
|
||||
} else if (settingNameUpdated.equals(Config.RouterAggregationCommandEachTimeout.toString())
|
||||
|| settingNameUpdated.equals(Config.MigrateWait.toString())) {
|
||||
Map<String, String> params = new HashMap<String, String>();
|
||||
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
|
||||
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
|
||||
_agentManager.propagateChangeToAgents(params);
|
||||
} else if (VMLeaseManager.InstanceLeaseEnabled.key().equals(globalSettingUpdated)) {
|
||||
} else if (settingNameUpdated.equals(IndirectAgentLBServiceImpl.IndirectAgentLBCheckInterval.key())) {
|
||||
ConfigKey.Scope scope = settingUpdated.second();
|
||||
if (scope == ConfigKey.Scope.Global) {
|
||||
_indirectAgentLB.propagateMSListToAgents(false);
|
||||
} else if (scope == ConfigKey.Scope.Cluster) {
|
||||
Long clusterId = settingUpdated.third();
|
||||
_indirectAgentLB.propagateMSListToAgentsInCluster(clusterId);
|
||||
}
|
||||
} else if (VMLeaseManager.InstanceLeaseEnabled.key().equals(settingNameUpdated)) {
|
||||
vmLeaseManager.onLeaseFeatureToggle();
|
||||
}
|
||||
}
|
||||
@ -845,6 +855,7 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
|
||||
CallContext.current().setEventDetails(String.format(" Name: %s, New Value: %s, Scope: %s", name, value, scope.name()));
|
||||
|
||||
_configDepot.invalidateConfigCache(name, scope, resourceId);
|
||||
messageBus.publish(_name, EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, PublishScope.GLOBAL, new Ternary<>(name, scope, resourceId));
|
||||
return valueEncrypted ? DBEncryptionUtil.decrypt(value) : value;
|
||||
}
|
||||
|
||||
@ -939,7 +950,7 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
|
||||
}
|
||||
|
||||
txn.commit();
|
||||
messageBus.publish(_name, EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, PublishScope.GLOBAL, name);
|
||||
messageBus.publish(_name, EventTypes.EVENT_CONFIGURATION_VALUE_EDIT, PublishScope.GLOBAL, new Ternary<>(name, ConfigKey.Scope.Global, resourceId));
|
||||
return _configDao.getValue(name);
|
||||
}
|
||||
|
||||
|
||||
@ -30,6 +30,7 @@ import java.util.concurrent.TimeUnit;
|
||||
import javax.inject.Inject;
|
||||
import javax.naming.ConfigurationException;
|
||||
|
||||
import com.cloud.dc.ClusterVO;
|
||||
import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBRoundRobinAlgorithm;
|
||||
import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBShuffleAlgorithm;
|
||||
import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBStaticAlgorithm;
|
||||
@ -62,7 +63,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
|
||||
public static final ConfigKey<String> IndirectAgentLBAlgorithm = new ConfigKey<>(String.class,
|
||||
"indirect.agent.lb.algorithm", "Advanced", "static",
|
||||
"The algorithm to be applied on the provided management server list in the 'host' config that that is sent to indirect agents. Allowed values are: static, roundrobin and shuffle.",
|
||||
"The algorithm to be applied on the provided management server list in the 'host' config that that is sent to indirect agents. Allowed values are: static, roundrobin and shuffle. " +
|
||||
"Note: The lb algorithm 'shuffle' disables the indirect agent lb check background task once the algorithm is applied on the agent.",
|
||||
true, ConfigKey.Scope.Global, null, null, null, null, null, ConfigKey.Kind.Select, "static,roundrobin,shuffle");
|
||||
|
||||
public static final ConfigKey<Long> IndirectAgentLBCheckInterval = new ConfigKey<>("Advanced", Long.class,
|
||||
@ -89,7 +91,9 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
private static final List<ResourceState> agentValidResourceStates = List.of(
|
||||
ResourceState.Enabled, ResourceState.Maintenance, ResourceState.Disabled,
|
||||
ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance);
|
||||
private static final List<Host.Type> agentValidHostTypes = List.of(Host.Type.Routing, Host.Type.ConsoleProxy,
|
||||
private static final List<ResourceState> agentNonMaintenanceResourceStates = List.of(
|
||||
ResourceState.Enabled, ResourceState.Disabled);
|
||||
public static final List<Host.Type> agentValidHostTypes = List.of(Host.Type.Routing, Host.Type.ConsoleProxy,
|
||||
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
|
||||
private static final List<Host.Type> agentNonRoutingHostTypes = List.of(Host.Type.ConsoleProxy,
|
||||
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
|
||||
@ -132,7 +136,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm);
|
||||
List<Long> hostIdList = orderedHostIdList;
|
||||
if (hostIdList == null) {
|
||||
hostIdList = algorithm.isHostListNeeded() ? getOrderedHostIdList(dcId) : new ArrayList<>();
|
||||
hostIdList = algorithm.isHostListNeeded() ? getOrderedHostIdList(dcId, false) : new ArrayList<>();
|
||||
}
|
||||
|
||||
// just in case we have a host in creating state make sure it is in the list:
|
||||
@ -167,8 +171,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
return IndirectAgentLBCheckInterval.valueIn(clusterId);
|
||||
}
|
||||
|
||||
List<Long> getOrderedHostIdList(final Long dcId) {
|
||||
final List<Long> hostIdList = getAllAgentBasedHostsFromDB(dcId, null);
|
||||
List<Long> getOrderedHostIdList(final Long dcId, boolean excludeHostsInMaintenance) {
|
||||
final List<Long> hostIdList = getAllAgentBasedHostsFromDB(dcId, null, null, excludeHostsInMaintenance);
|
||||
hostIdList.sort(Comparator.comparingLong(x -> x));
|
||||
return hostIdList;
|
||||
}
|
||||
@ -259,19 +263,25 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
agentValidResourceStates, agentNonRoutingHostTypes, agentValidHypervisorTypes);
|
||||
}
|
||||
|
||||
private List<Long> getAllAgentBasedRoutingHostsFromDB(final Long zoneId, final Long clusterId, final Long msId) {
|
||||
private List<Long> getAllAgentBasedRoutingHostsFromDB(final Long zoneId, final Long clusterId, final Long msId, boolean excludeHostsInMaintenance) {
|
||||
List<ResourceState> validResourceStates = excludeHostsInMaintenance ? agentNonMaintenanceResourceStates : agentValidResourceStates;
|
||||
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, msId,
|
||||
agentValidResourceStates, List.of(Host.Type.Routing), agentValidHypervisorTypes);
|
||||
validResourceStates, List.of(Host.Type.Routing), agentValidHypervisorTypes);
|
||||
}
|
||||
|
||||
private List<Long> getAllAgentBasedHostsFromDB(final Long zoneId, final Long clusterId) {
|
||||
private List<Long> getAllAgentBasedHostsFromDB(final Long zoneId, final Long clusterId, final Long msId, boolean excludeHostsInMaintenance) {
|
||||
List<ResourceState> validResourceStates = excludeHostsInMaintenance ? agentNonMaintenanceResourceStates : agentValidResourceStates;
|
||||
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, null,
|
||||
agentValidResourceStates, agentValidHostTypes, agentValidHypervisorTypes);
|
||||
validResourceStates, agentValidHostTypes, agentValidHypervisorTypes);
|
||||
}
|
||||
|
||||
private List<Long> getAllAgentBasedHosts(long msId, boolean excludeHostsInMaintenance) {
|
||||
return getAllAgentBasedHostsFromDB(null, null, msId, excludeHostsInMaintenance);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean haveAgentBasedHosts(long msId) {
|
||||
return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId));
|
||||
public boolean haveAgentBasedHosts(long msId, boolean excludeHostsInMaintenance) {
|
||||
return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId, excludeHostsInMaintenance));
|
||||
}
|
||||
|
||||
private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() {
|
||||
@ -303,8 +313,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
@Override
|
||||
public void propagateMSListToAgents() {
|
||||
logger.debug("Propagating management server list update to agents");
|
||||
public void propagateMSListToAgents(boolean triggerHostLB) {
|
||||
logger.debug("Propagating management server list update to the agents");
|
||||
ExecutorService setupMSListExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("SetupMSList-Worker"));
|
||||
final String lbAlgorithm = getLBAlgorithmName();
|
||||
final Long globalLbCheckInterval = getLBPreferredHostCheckInterval(null);
|
||||
@ -316,20 +326,20 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
Map<Long, List<Long>> clusterHostIdsMap = new HashMap<>();
|
||||
List<Long> clusterIds = clusterDao.listAllClusterIds(zone.getId());
|
||||
for (Long clusterId : clusterIds) {
|
||||
List<Long> hostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null);
|
||||
List<Long> hostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null, false);
|
||||
clusterHostIdsMap.put(clusterId, hostIds);
|
||||
zoneHostIds.addAll(hostIds);
|
||||
}
|
||||
zoneHostIds.sort(Comparator.comparingLong(x -> x));
|
||||
final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs();
|
||||
for (Long nonRoutingHostId : nonRoutingHostIds) {
|
||||
setupMSListExecutorService.submit(new SetupMSListTask(nonRoutingHostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, globalLbCheckInterval));
|
||||
setupMSListExecutorService.submit(new SetupMSListTask(nonRoutingHostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, globalLbCheckInterval, triggerHostLB));
|
||||
}
|
||||
for (Long clusterId : clusterIds) {
|
||||
final Long clusterLbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
|
||||
List<Long> hostIds = clusterHostIdsMap.get(clusterId);
|
||||
for (Long hostId : hostIds) {
|
||||
setupMSListExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval));
|
||||
setupMSListExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval, triggerHostLB));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -345,6 +355,45 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void propagateMSListToAgentsInCluster(Long clusterId) {
|
||||
if (clusterId == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.debug("Propagating management server list update to the agents in cluster " + clusterId);
|
||||
ClusterVO cluster = clusterDao.findById(clusterId);
|
||||
if (cluster == null) {
|
||||
logger.warn("Unable to propagate management server list, couldn't find cluster " + clusterId);
|
||||
return;
|
||||
}
|
||||
DataCenterVO zone = dataCenterDao.findById(cluster.getDataCenterId());
|
||||
if (zone == null) {
|
||||
logger.warn("Unable to propagate management server list, couldn't find zone of the cluster " + clusterId);
|
||||
return;
|
||||
}
|
||||
|
||||
ExecutorService setupMSListInClusterExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("SetupMSListInCluster-Worker"));
|
||||
final String lbAlgorithm = getLBAlgorithmName();
|
||||
List<Long> clusterHostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null, false);
|
||||
clusterHostIds.sort(Comparator.comparingLong(x -> x));
|
||||
final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs();
|
||||
final Long clusterLbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
|
||||
for (Long hostId : clusterHostIds) {
|
||||
setupMSListInClusterExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), clusterHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval, false));
|
||||
}
|
||||
|
||||
setupMSListInClusterExecutorService.shutdown();
|
||||
try {
|
||||
if (!setupMSListInClusterExecutorService.awaitTermination(300, TimeUnit.SECONDS)) {
|
||||
setupMSListInClusterExecutorService.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
setupMSListInClusterExecutorService.shutdownNow();
|
||||
logger.debug(String.format("Force shutdown setup ms list in cluster service as it did not shutdown in the desired time due to: %s", e.getMessage()));
|
||||
}
|
||||
}
|
||||
|
||||
private final class SetupMSListTask extends ManagedContextRunnable {
|
||||
private Long hostId;
|
||||
private Long dcId;
|
||||
@ -352,21 +401,23 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
private List<String> avoidMsList;
|
||||
private String lbAlgorithm;
|
||||
private Long lbCheckInterval;
|
||||
private Boolean triggerHostLb;
|
||||
|
||||
public SetupMSListTask(Long hostId, Long dcId, List<Long> orderedHostIdList, List<String> avoidMsList,
|
||||
String lbAlgorithm, Long lbCheckInterval) {
|
||||
String lbAlgorithm, Long lbCheckInterval, Boolean triggerHostLb) {
|
||||
this.hostId = hostId;
|
||||
this.dcId = dcId;
|
||||
this.orderedHostIdList = orderedHostIdList;
|
||||
this.avoidMsList = avoidMsList;
|
||||
this.lbAlgorithm = lbAlgorithm;
|
||||
this.lbCheckInterval = lbCheckInterval;
|
||||
this.triggerHostLb = triggerHostLb;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void runInContext() {
|
||||
final List<String> msList = getManagementServerList(hostId, dcId, orderedHostIdList);
|
||||
final SetupMSListCommand cmd = new SetupMSListCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
|
||||
final SetupMSListCommand cmd = new SetupMSListCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval, triggerHostLb);
|
||||
cmd.setWait(60);
|
||||
final Answer answer = agentManager.easySend(hostId, cmd);
|
||||
if (answer == null || !answer.getResult()) {
|
||||
@ -419,9 +470,9 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
|
||||
protected boolean migrateRoutingHostAgentsInCluster(long clusterId, String fromMsUuid, long fromMsId, DataCenter dc,
|
||||
long migrationStartTimeInMs, long timeoutDurationInMs, final List<String> avoidMsList, String lbAlgorithm,
|
||||
boolean lbAlgorithmChanged, List<Long> orderedHostIdList) {
|
||||
boolean lbAlgorithmChanged, List<Long> orderedHostIdList, boolean excludeHostsInMaintenance) {
|
||||
|
||||
List<Long> agentBasedHostsOfMsInDcAndCluster = getAllAgentBasedRoutingHostsFromDB(dc.getId(), clusterId, fromMsId);
|
||||
List<Long> agentBasedHostsOfMsInDcAndCluster = getAllAgentBasedRoutingHostsFromDB(dc.getId(), clusterId, fromMsId, excludeHostsInMaintenance);
|
||||
if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDcAndCluster)) {
|
||||
return true;
|
||||
}
|
||||
@ -461,7 +512,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs) {
|
||||
public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
|
||||
if (timeoutDurationInMs <= 0) {
|
||||
logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid));
|
||||
return false;
|
||||
@ -469,7 +520,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
|
||||
logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid));
|
||||
long migrationStartTimeInMs = System.currentTimeMillis();
|
||||
if (!haveAgentBasedHosts(fromMsId)) {
|
||||
if (!haveAgentBasedHosts(fromMsId, excludeHostsInMaintenance)) {
|
||||
logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid));
|
||||
return true;
|
||||
}
|
||||
@ -489,7 +540,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
List<DataCenterVO> dataCenterList = dcDao.listAll();
|
||||
for (DataCenterVO dc : dataCenterList) {
|
||||
if (!migrateAgentsInZone(dc, fromMsUuid, fromMsId, avoidMsList, lbAlgorithm, lbAlgorithmChanged,
|
||||
migrationStartTimeInMs, timeoutDurationInMs)) {
|
||||
migrationStartTimeInMs, timeoutDurationInMs, excludeHostsInMaintenance)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -498,8 +549,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
}
|
||||
|
||||
private boolean migrateAgentsInZone(DataCenterVO dc, String fromMsUuid, long fromMsId, List<String> avoidMsList,
|
||||
String lbAlgorithm, boolean lbAlgorithmChanged, long migrationStartTimeInMs, long timeoutDurationInMs) {
|
||||
List<Long> orderedHostIdList = getOrderedHostIdList(dc.getId());
|
||||
String lbAlgorithm, boolean lbAlgorithmChanged, long migrationStartTimeInMs, long timeoutDurationInMs, boolean excludeHostsInMaintenance) {
|
||||
List<Long> orderedHostIdList = getOrderedHostIdList(dc.getId(), excludeHostsInMaintenance);
|
||||
if (!migrateNonRoutingHostAgentsInZone(fromMsUuid, fromMsId, dc, migrationStartTimeInMs,
|
||||
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) {
|
||||
return false;
|
||||
@ -507,7 +558,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
List<Long> clusterIds = clusterDao.listAllClusterIds(dc.getId());
|
||||
for (Long clusterId : clusterIds) {
|
||||
if (!migrateRoutingHostAgentsInCluster(clusterId, fromMsUuid, fromMsId, dc, migrationStartTimeInMs,
|
||||
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) {
|
||||
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList, excludeHostsInMaintenance)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -547,7 +598,9 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||
final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
|
||||
cmd.setWait(60);
|
||||
final Answer answer = agentManager.easySend(hostId, cmd); //may not receive answer when the agent disconnects immediately and try reconnecting to other ms host
|
||||
if (answer != null && !answer.getResult()) {
|
||||
if (answer == null) {
|
||||
logger.warn(String.format("Got empty answer while initiating migration of agent connection for host agent ID: %d", hostId));
|
||||
} else if (!answer.getResult()) {
|
||||
logger.warn(String.format("Error while initiating migration of agent connection for host agent ID: %d - %s", hostId, answer.getDetails()));
|
||||
}
|
||||
updateLastManagementServer(hostId, fromMsId);
|
||||
|
||||
@ -204,7 +204,7 @@ public class IndirectAgentLBServiceImplTest {
|
||||
public void testGetOrderedRunningHostIdsEmptyList() {
|
||||
doReturn(Collections.emptyList()).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(
|
||||
Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
||||
Assert.assertTrue(agentMSLB.getOrderedHostIdList(DC_1_ID).isEmpty());
|
||||
Assert.assertTrue(agentMSLB.getOrderedHostIdList(DC_1_ID, false).isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -213,6 +213,6 @@ public class IndirectAgentLBServiceImplTest {
|
||||
.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null),
|
||||
Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
||||
Assert.assertEquals(Arrays.asList(host1.getId(), host2.getId(), host3.getId(), host4.getId()),
|
||||
agentMSLB.getOrderedHostIdList(DC_1_ID));
|
||||
agentMSLB.getOrderedHostIdList(DC_1_ID, false));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1900,6 +1900,7 @@
|
||||
"label.read.io": "Read (IO)",
|
||||
"label.readonly": "Read-Only",
|
||||
"label.reason": "Reason",
|
||||
"label.rebalance": "Rebalance",
|
||||
"label.reboot": "Reboot",
|
||||
"label.recent.deliveries": "Recent deliveries",
|
||||
"label.receivedbytes": "Bytes received",
|
||||
|
||||
@ -75,6 +75,7 @@ export default {
|
||||
message: 'message.cancel.maintenance',
|
||||
dataView: true,
|
||||
popup: true,
|
||||
args: ['rebalance'],
|
||||
show: (record, store) => { return ['PreparingForMaintenance', 'Maintenance'].includes(record.state) },
|
||||
mapping: {
|
||||
managementserverid: {
|
||||
@ -109,7 +110,6 @@ export default {
|
||||
icon: 'close-circle-outlined',
|
||||
label: 'label.cancel.shutdown',
|
||||
message: 'message.cancel.shutdown',
|
||||
docHelp: 'installguide/configuration.html#adding-a-zone',
|
||||
dataView: true,
|
||||
popup: true,
|
||||
show: (record, store) => { return ['PreparingForShutDown', 'ReadyToShutDown', 'ShuttingDown'].includes(record.state) },
|
||||
|
||||
@ -1193,7 +1193,7 @@ export default {
|
||||
this.getFirstIndexFocus()
|
||||
|
||||
this.showAction = true
|
||||
const listIconForFillValues = ['copy-outlined', 'CopyOutlined', 'edit-outlined', 'EditOutlined', 'share-alt-outlined', 'ShareAltOutlined']
|
||||
const listIconForFillValues = ['copy-outlined', 'CopyOutlined', 'edit-outlined', 'EditOutlined', 'share-alt-outlined', 'ShareAltOutlined', 'minus-square-outlined']
|
||||
for (const param of this.currentAction.paramFields) {
|
||||
if (param.type === 'list' && ['tags', 'hosttags', 'storagetags', 'storageaccessgroups', 'files'].includes(param.name)) {
|
||||
param.type = 'string'
|
||||
@ -1422,6 +1422,8 @@ export default {
|
||||
fieldValue = this.resource[fieldName] ? this.resource[fieldName] : null
|
||||
if (fieldValue) {
|
||||
this.form[field.name] = fieldValue
|
||||
} else if (field.type === 'boolean' && field.name === 'rebalance' && this.currentAction.api === 'cancelMaintenance') {
|
||||
this.form[field.name] = true
|
||||
}
|
||||
})
|
||||
},
|
||||
@ -1578,6 +1580,10 @@ export default {
|
||||
}
|
||||
}
|
||||
|
||||
if (['cancelMaintenance'].includes(action.api) && (params.rebalance === undefined || params.rebalance === null || params.rebalance === '')) {
|
||||
params.rebalance = true
|
||||
}
|
||||
|
||||
for (const key in values) {
|
||||
const input = values[key]
|
||||
for (const param of action.params) {
|
||||
|
||||
@ -45,6 +45,12 @@
|
||||
</a-select-option>
|
||||
</a-select>
|
||||
</a-form-item>
|
||||
<a-form-item name="forced" ref="forced">
|
||||
<template #label>
|
||||
<tooltip-label :title="$t('label.forced')" :tooltip="prepareForMaintenanceApiParams.forced.description"/>
|
||||
</template>
|
||||
<a-switch v-model:checked="form.forced" />
|
||||
</a-form-item>
|
||||
<a-divider/>
|
||||
<a-alert type="error">
|
||||
<template #message>
|
||||
@ -135,6 +141,7 @@ export default {
|
||||
if (this.isPrepareForMaintenance && this.form.algorithm !== '') {
|
||||
params.algorithm = this.form.algorithm
|
||||
}
|
||||
params.forced = this.form.forced
|
||||
api(this.action.currentAction.api, params).then(() => {
|
||||
this.$message.success(this.$t(this.action.currentAction.label) + ' : ' + this.resource.name)
|
||||
this.closeAction()
|
||||
|
||||
@ -323,7 +323,7 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||
logger.trace("Reading from: {}", socketChannel.socket().toString());
|
||||
final byte[] data = link.read(socketChannel);
|
||||
if (data == null) {
|
||||
logger.trace("Packet is incomplete. Waiting for more.");
|
||||
logger.trace("Packet is incomplete. Waiting for more.");
|
||||
return;
|
||||
}
|
||||
final Task task = _factory.create(Task.Type.DATA, link, data);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user