From 3b108b968f0f52c0e1ee88041b7b24a14762d0dd Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Wed, 29 Jan 2025 13:31:15 +0530 Subject: [PATCH] Support for Management Server Maintenance Mode (#9854) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Support for Management Server Maintenance - New APIs: prepareForMaintenance and cancelMaintenance, with required parameter - managementserverid. - New management server states for maintenance: PreparingForMaintenance, Maintenance. - listHosts API with optional parameter – managementserverid, to list the hosts connected to the management server. - Support management server maintenance when more than one active management servers available. - Triggers transfer agents to other available management servers for maintenance, new agent command MigrateAgentConnectionCommand to initiate transfer of indirect agents. - New global config 'management.server.maintenance.timeout', to set the timeout (in mins) for the management server maintenance window, default: 60 mins. - UI changes: Prepare and Cancel Maintenance in Management Server section, Connected Agents tab, New fields for hosts and management servers. * Updated pending jobs check timer task with ScheduledExecutorService * keep maintenance state on trigger shutdown call when ms is in maintenance * add pending jobs count to ms response * during ms heartbeat, update state to up only when it's down * allow vm work jobs of async job created before prepare for maintenance * Revert "keep maintenance state on trigger shutdown call when ms is in maintenance" This reverts commit 607e13364679eac897f4d146bb3325ea7a61ba17. * skip maintenance test when multiple management servers are not available, and not configured in host setting for kvm --- .../src/main/java/com/cloud/agent/Agent.java | 98 ++- .../main/java/com/cloud/agent/AgentShell.java | 9 + .../java/com/cloud/agent/IAgentShell.java | 4 + api/src/main/java/com/cloud/host/Host.java | 2 + api/src/main/java/com/cloud/host/Status.java | 1 + .../com/cloud/resource/ResourceService.java | 8 +- .../server/ManagementServerHostStats.java | 5 + .../apache/cloudstack/api/ApiConstants.java | 5 +- ...Cmd.java => CancelHostMaintenanceCmd.java} | 2 +- .../api/command/admin/host/ListHostsCmd.java | 8 + ...java => PrepareForHostMaintenanceCmd.java} | 2 +- .../api/response/AsyncJobResponse.java | 16 +- .../cloudstack/api/response/HostResponse.java | 26 +- .../api/response/LoginCmdResponse.java | 12 + .../response/ManagementServerResponse.java | 24 + .../management/ManagementServerHost.java | 2 +- client/pom.xml | 2 +- .../api/MigrateAgentConnectionAnswer.java | 38 ++ .../api/MigrateAgentConnectionCommand.java | 61 ++ .../com/cloud/agent/api/StartupCommand.java | 9 + .../cloud/agent/api/TransferAgentCommand.java | 10 + .../com/cloud/resource/ServerResource.java | 4 + .../agent/test/CheckOnHostCommandTest.java | 5 + .../java/com/cloud/agent/AgentManager.java | 7 + .../com/cloud/resource/ResourceManager.java | 2 + engine/orchestration/pom.xml | 2 +- .../cloud/agent/manager/AgentManagerImpl.java | 110 +++- .../manager/ClusteredAgentManagerImpl.java | 220 ++++++- .../ClusteredAgentRebalanceService.java | 1 + .../entity/api/db/EngineHostVO.java | 12 + .../orchestration/NetworkOrchestrator.java | 2 +- .../src/main/java/com/cloud/host/HostVO.java | 12 + .../main/java/com/cloud/host/dao/HostDao.java | 15 +- .../java/com/cloud/host/dao/HostDaoImpl.java | 48 +- .../META-INF/db/schema-42010to42100.sql | 3 + .../cloudstack/agent/lb/IndirectAgentLB.java | 22 + .../com/cloud/cluster/ClusterManagerImpl.java | 24 +- .../cluster/dao/ManagementServerHostDao.java | 6 +- .../dao/ManagementServerHostDaoImpl.java | 42 +- .../dao/ManagementServerHostPeerDao.java | 3 + .../dao/ManagementServerHostPeerDaoImpl.java | 33 + .../jobs/impl/AsyncJobManagerImpl.java | 55 +- .../vmware/resource/VmwareResource.java | 6 + .../resource/CitrixResourceBase.java | 6 + .../resource/XenServer56Resource.java | 7 +- .../xenserver/XenServerResourceNewBase.java | 7 +- plugins/{shutdown => maintenance}/pom.xml | 4 +- .../command/BaseMSMaintenanceActionCmd.java} | 8 +- .../api/command/CancelMaintenanceCmd.java | 60 ++ .../api/command/CancelShutdownCmd.java | 10 +- .../api/command/PrepareForMaintenanceCmd.java | 72 +++ .../api/command/PrepareForShutdownCmd.java | 9 +- .../api/command/ReadyForShutdownCmd.java | 50 +- .../api/command/TriggerShutdownCmd.java | 10 +- .../ManagementServerMaintenanceResponse.java} | 80 ++- .../ManagementServerMaintenanceListener.java | 24 + .../ManagementServerMaintenanceManager.java | 108 ++++ ...anagementServerMaintenanceManagerImpl.java | 598 ++++++++++++++++++ ...seShutdownManagementServerHostCommand.java | 2 +- ...aintenanceManagementServerHostCommand.java | 26 + ...elShutdownManagementServerHostCommand.java | 2 +- ...aintenanceManagementServerHostCommand.java | 36 ++ ...orShutdownManagementServerHostCommand.java | 2 +- ...erShutdownManagementServerHostCommand.java | 2 +- .../cloudstack/maintenance}/module.properties | 2 +- .../spring-maintenance-context.xml} | 4 +- ...mentServerMaintenanceManagerImplTest.java} | 21 +- .../cloudstack/api/MetricConstants.java | 2 + .../metrics/MetricsServiceImpl.java | 2 + .../ManagementServerMetricsResponse.java | 17 + plugins/pom.xml | 2 +- .../cloudstack/shutdown/ShutdownManager.java | 60 -- .../shutdown/ShutdownManagerImpl.java | 265 -------- .../java/com/cloud/api/ApiDispatcher.java | 2 +- .../main/java/com/cloud/api/ApiServer.java | 16 +- .../com/cloud/api/query/QueryManagerImpl.java | 14 + .../api/query/dao/AsyncJobJoinDaoImpl.java | 15 +- .../cloud/api/query/dao/HostJoinDaoImpl.java | 11 + .../cloud/network/SshKeysDistriMonitor.java | 26 +- .../security/SecurityGroupListener.java | 33 +- .../cloud/resource/ResourceManagerImpl.java | 21 +- .../RollingMaintenanceManagerImpl.java | 4 +- .../ManagementServerHostStatsEntry.java | 21 + .../cloud/server/ManagementServerImpl.java | 8 +- .../java/com/cloud/server/StatsCollector.java | 3 + .../storage/listener/StoragePoolMonitor.java | 72 ++- .../agent/lb/IndirectAgentLBServiceImpl.java | 151 ++++- .../spring-server-core-managers-context.xml | 4 +- .../resource/MockResourceManagerImpl.java | 17 +- .../test/resources/createNetworkOffering.xml | 159 ++--- ... test_ms_maintenance_and_safe_shutdown.py} | 65 +- tools/apidoc/gen_toc.py | 3 +- ui/public/locales/en.json | 20 +- ui/src/components/page/GlobalLayout.vue | 22 +- ui/src/components/view/ListView.vue | 2 +- ui/src/config/section/infra/hosts.js | 3 +- .../config/section/infra/managementServers.js | 41 +- ui/src/store/getters.js | 2 + ui/src/store/modules/app.js | 6 + ui/src/store/modules/user.js | 19 + ui/src/store/mutation-types.js | 1 + ui/src/views/AutogenView.vue | 4 +- ui/src/views/infra/Confirmation.vue | 40 +- ui/src/views/infra/ConnectedAgentsTab.vue | 88 +++ .../com/cloud/utils/nio/NioConnection.java | 18 + 105 files changed, 2673 insertions(+), 714 deletions(-) rename api/src/main/java/org/apache/cloudstack/api/command/admin/host/{CancelMaintenanceCmd.java => CancelHostMaintenanceCmd.java} (98%) rename api/src/main/java/org/apache/cloudstack/api/command/admin/host/{PrepareForMaintenanceCmd.java => PrepareForHostMaintenanceCmd.java} (98%) create mode 100644 core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java create mode 100644 core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java rename plugins/{shutdown => maintenance}/pom.xml (92%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java => maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java} (85%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java (83%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java (85%) rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java (66%) rename plugins/{shutdown => maintenance}/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java (85%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java => maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java} (52%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/BaseShutdownManagementServerHostCommand.java (95%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/CancelShutdownManagementServerHostCommand.java (95%) create mode 100644 plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/PrepareForShutdownManagementServerHostCommand.java (95%) rename plugins/{shutdown/src/main/java/org/apache/cloudstack/shutdown => maintenance/src/main/java/org/apache/cloudstack/maintenance}/command/TriggerShutdownManagementServerHostCommand.java (95%) rename plugins/{shutdown/src/main/resources/META-INF/cloudstack/shutdown => maintenance/src/main/resources/META-INF/cloudstack/maintenance}/module.properties (97%) rename plugins/{shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml => maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml} (83%) rename plugins/{shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java => maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java} (84%) delete mode 100644 plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java delete mode 100644 plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java rename test/integration/smoke/{test_safe_shutdown.py => test_ms_maintenance_and_safe_shutdown.py} (58%) create mode 100644 ui/src/views/infra/ConnectedAgentsTab.vue diff --git a/agent/src/main/java/com/cloud/agent/Agent.java b/agent/src/main/java/com/cloud/agent/Agent.java index c84179d6660..97803477115 100644 --- a/agent/src/main/java/com/cloud/agent/Agent.java +++ b/agent/src/main/java/com/cloud/agent/Agent.java @@ -27,6 +27,7 @@ import java.net.UnknownHostException; import java.nio.channels.ClosedChannelException; import java.nio.charset.Charset; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -40,6 +41,8 @@ import java.util.concurrent.atomic.AtomicInteger; import javax.naming.ConfigurationException; +import com.cloud.agent.api.MigrateAgentConnectionAnswer; +import com.cloud.agent.api.MigrateAgentConnectionCommand; import com.cloud.resource.AgentStatusUpdater; import com.cloud.resource.ResourceStatusUpdater; import com.cloud.agent.api.PingAnswer; @@ -313,7 +316,6 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } _shell.updateConnectedHost(); scavengeOldAgentObjects(); - } public void stop(final String reason, final String detail) { @@ -477,6 +479,10 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } public void sendStartup(final Link link) { + sendStartup(link, false); + } + + public void sendStartup(final Link link, boolean transfer) { final StartupCommand[] startup = _resource.initialize(); if (startup != null) { final String msHostList = _shell.getPersistentProperty(null, "host"); @@ -484,6 +490,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater for (int i = 0; i < startup.length; i++) { setupStartupCommand(startup[i]); startup[i].setMSHostList(msHostList); + startup[i].setConnectionTransferred(transfer); commands[i] = startup[i]; } final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false); @@ -541,9 +548,14 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } protected void reconnect(final Link link) { - if (!_reconnectAllowed) { + reconnect(link, null, null, false); + } + + protected void reconnect(final Link link, String preferredHost, List avoidHostList, boolean forTransfer) { + if (!(forTransfer || _reconnectAllowed)) { return; } + synchronized (this) { if (_startup != null) { _startup.cancel(); @@ -575,22 +587,29 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater _shell.getBackoffAlgorithm().waitBeforeRetry(); } + String host = preferredHost; + if (StringUtils.isEmpty(host)) { + host = _shell.getNextHost(); + } + do { - final String host = _shell.getNextHost(); - _connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this); - logger.info("Reconnecting to host:{}", host); - try { - _connection.start(); - } catch (final NioConnectionException e) { - logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e); - _connection.stop(); + if (CollectionUtils.isEmpty(avoidHostList) || !avoidHostList.contains(host)) { + _connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this); + logger.info("Reconnecting to host:{}", host); try { - _connection.cleanUp(); - } catch (final IOException ex) { - logger.warn("Fail to clean up old connection. {}", ex); + _connection.start(); + } catch (final NioConnectionException e) { + logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e); + _connection.stop(); + try { + _connection.cleanUp(); + } catch (final IOException ex) { + logger.warn("Fail to clean up old connection. {}", ex); + } } } _shell.getBackoffAlgorithm().waitBeforeRetry(); + host = _shell.getNextHost(); } while (!_connection.isStartup()); _shell.updateConnectedHost(); logger.info("Connected to the host: {}", _shell.getConnectedHost()); @@ -703,6 +722,8 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater } } else if (cmd instanceof SetupMSListCommand) { answer = setupManagementServerList((SetupMSListCommand) cmd); + } else if (cmd instanceof MigrateAgentConnectionCommand) { + answer = migrateAgentToOtherMS((MigrateAgentConnectionCommand) cmd); } else { if (cmd instanceof ReadyCommand) { processReadyCommand(cmd); @@ -858,6 +879,53 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater return new SetupMSListAnswer(true); } + private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) { + try { + if (CollectionUtils.isNotEmpty(cmd.getMsList())) { + processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval()); + } + migrateAgentConnection(cmd.getAvoidMsList()); + } catch (Exception e) { + String errMsg = "Migrate agent connection failed, due to " + e.getMessage(); + logger.debug(errMsg, e); + return new MigrateAgentConnectionAnswer(errMsg); + } + return new MigrateAgentConnectionAnswer(true); + } + + private void migrateAgentConnection(List avoidMsList) { + final String[] msHosts = _shell.getHosts(); + if (msHosts == null || msHosts.length < 1) { + throw new CloudRuntimeException("Management Server hosts empty, not properly configured in agent"); + } + + List msHostsList = new ArrayList<>(Arrays.asList(msHosts)); + msHostsList.removeAll(avoidMsList); + if (msHostsList.isEmpty() || StringUtils.isEmpty(msHostsList.get(0))) { + throw new CloudRuntimeException("No other Management Server hosts to migrate"); + } + + String preferredHost = null; + for (String msHost : msHostsList) { + try (final Socket socket = new Socket()) { + socket.connect(new InetSocketAddress(msHost, _shell.getPort()), 5000); + preferredHost = msHost; + break; + } catch (final IOException e) { + throw new CloudRuntimeException("Management server host: " + msHost + " is not reachable, to migrate connection"); + } + } + + if (preferredHost == null) { + throw new CloudRuntimeException("Management server host(s) are not reachable, to migrate connection"); + } + + logger.debug("Management server host " + preferredHost + " is found to be reachable, trying to reconnect"); + _shell.resetHostCounter(); + _shell.setConnectionTransfer(true); + reconnect(_link, preferredHost, avoidMsList, true); + } + public void processResponse(final Response response, final Link link) { final Answer answer = response.getAnswer(); logger.debug("Received response: {}", response.toString()); @@ -1153,7 +1221,8 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater if (task.getType() == Task.Type.CONNECT) { _shell.getBackoffAlgorithm().reset(); setLink(task.getLink()); - sendStartup(task.getLink()); + sendStartup(task.getLink(), _shell.isConnectionTransfer()); + _shell.setConnectionTransfer(false); } else if (task.getType() == Task.Type.DATA) { Request request; try { @@ -1178,6 +1247,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater Thread.sleep(5000); } catch (InterruptedException e) { } + _shell.setConnectionTransfer(false); reconnect(task.getLink()); return; } else if (task.getType() == Task.Type.OTHER) { diff --git a/agent/src/main/java/com/cloud/agent/AgentShell.java b/agent/src/main/java/com/cloud/agent/AgentShell.java index 0699e00250b..d76e5551b45 100644 --- a/agent/src/main/java/com/cloud/agent/AgentShell.java +++ b/agent/src/main/java/com/cloud/agent/AgentShell.java @@ -77,6 +77,7 @@ public class AgentShell implements IAgentShell, Daemon { private String hostToConnect; private String connectedHost; private Long preferredHostCheckInterval; + private boolean connectionTransfer = false; protected AgentProperties agentProperties = new AgentProperties(); public AgentShell() { @@ -215,6 +216,14 @@ public class AgentShell implements IAgentShell, Daemon { _storage.persist(name, value); } + public boolean isConnectionTransfer() { + return connectionTransfer; + } + + public void setConnectionTransfer(boolean connectionTransfer) { + this.connectionTransfer = connectionTransfer; + } + void loadProperties() throws ConfigurationException { final File file = PropertiesUtil.findConfigFile("agent.properties"); diff --git a/agent/src/main/java/com/cloud/agent/IAgentShell.java b/agent/src/main/java/com/cloud/agent/IAgentShell.java index 2dd08fffd45..0b9d9e81e95 100644 --- a/agent/src/main/java/com/cloud/agent/IAgentShell.java +++ b/agent/src/main/java/com/cloud/agent/IAgentShell.java @@ -70,4 +70,8 @@ public interface IAgentShell { String getConnectedHost(); void launchNewAgent(ServerResource resource) throws ConfigurationException; + + boolean isConnectionTransfer(); + + void setConnectionTransfer(boolean connectionTransfer); } diff --git a/api/src/main/java/com/cloud/host/Host.java b/api/src/main/java/com/cloud/host/Host.java index 56b4ed75a31..afac6df5631 100644 --- a/api/src/main/java/com/cloud/host/Host.java +++ b/api/src/main/java/com/cloud/host/Host.java @@ -177,6 +177,8 @@ public interface Host extends StateObject, Identity, Partition, HAResour */ Long getManagementServerId(); + Long getLastManagementServerId(); + /* *@return removal date */ diff --git a/api/src/main/java/com/cloud/host/Status.java b/api/src/main/java/com/cloud/host/Status.java index 5dc82bbfaef..af6af82e973 100644 --- a/api/src/main/java/com/cloud/host/Status.java +++ b/api/src/main/java/com/cloud/host/Status.java @@ -127,6 +127,7 @@ public enum Status { s_fsm.addTransition(Status.Connecting, Event.HostDown, Status.Down); s_fsm.addTransition(Status.Connecting, Event.Ping, Status.Connecting); s_fsm.addTransition(Status.Connecting, Event.ManagementServerDown, Status.Disconnected); + s_fsm.addTransition(Status.Connecting, Event.StartAgentRebalance, Status.Rebalancing); s_fsm.addTransition(Status.Connecting, Event.AgentDisconnected, Status.Alert); s_fsm.addTransition(Status.Up, Event.PingTimeout, Status.Alert); s_fsm.addTransition(Status.Up, Event.AgentDisconnected, Status.Alert); diff --git a/api/src/main/java/com/cloud/resource/ResourceService.java b/api/src/main/java/com/cloud/resource/ResourceService.java index 2757c918ed6..562c3c418df 100644 --- a/api/src/main/java/com/cloud/resource/ResourceService.java +++ b/api/src/main/java/com/cloud/resource/ResourceService.java @@ -23,11 +23,11 @@ import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; @@ -51,7 +51,7 @@ public interface ResourceService { Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException; - Host cancelMaintenance(CancelMaintenanceCmd cmd); + Host cancelMaintenance(CancelHostMaintenanceCmd cmd); Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException; @@ -69,7 +69,7 @@ public interface ResourceService { List discoverHosts(AddSecondaryStorageCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException; - Host maintain(PrepareForMaintenanceCmd cmd); + Host maintain(PrepareForHostMaintenanceCmd cmd); Host declareHostAsDegraded(DeclareHostAsDegradedCmd cmd) throws NoTransitionException; diff --git a/api/src/main/java/com/cloud/server/ManagementServerHostStats.java b/api/src/main/java/com/cloud/server/ManagementServerHostStats.java index 1eea7addba3..6eb275031e8 100644 --- a/api/src/main/java/com/cloud/server/ManagementServerHostStats.java +++ b/api/src/main/java/com/cloud/server/ManagementServerHostStats.java @@ -19,6 +19,7 @@ package com.cloud.server; import java.util.Date; +import java.util.List; /** * management server related stats @@ -70,6 +71,10 @@ public interface ManagementServerHostStats { String getOsDistribution(); + List getLastAgents(); + + List getAgents(); + int getAgentCount(); long getHeapMemoryUsed(); diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index a406e2d7a72..03de07c37da 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -1136,9 +1136,12 @@ public class ApiConstants { public static final String LOGOUT = "logout"; public static final String LIST_IDPS = "listIdps"; - public static final String READY_FOR_SHUTDOWN = "readyforshutdown"; + public static final String MAINTENANCE_INITIATED = "maintenanceinitiated"; public static final String SHUTDOWN_TRIGGERED = "shutdowntriggered"; + public static final String READY_FOR_SHUTDOWN = "readyforshutdown"; public static final String PENDING_JOBS_COUNT = "pendingjobscount"; + public static final String AGENTS_COUNT = "agentscount"; + public static final String AGENTS = "agents"; public static final String PUBLIC_MTU = "publicmtu"; public static final String PRIVATE_MTU = "privatemtu"; diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java similarity index 98% rename from api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java rename to api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java index a514a61b8a4..55fe8ec23ce 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/CancelHostMaintenanceCmd.java @@ -33,7 +33,7 @@ import com.cloud.user.Account; @APICommand(name = "cancelHostMaintenance", description = "Cancels host maintenance.", responseObject = HostResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class CancelMaintenanceCmd extends BaseAsyncCmd { +public class CancelHostMaintenanceCmd extends BaseAsyncCmd { ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java index af87bbf33bb..5e229521efe 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/ListHostsCmd.java @@ -31,6 +31,7 @@ import org.apache.cloudstack.api.Parameter; import org.apache.cloudstack.api.response.ClusterResponse; import org.apache.cloudstack.api.response.HostResponse; import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.api.response.ManagementServerResponse; import org.apache.cloudstack.api.response.PodResponse; import org.apache.cloudstack.api.response.UserVmResponse; import org.apache.cloudstack.api.response.ZoneResponse; @@ -105,6 +106,9 @@ public class ListHostsCmd extends BaseListCmd { @Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, description = "hypervisor type of host: XenServer,KVM,VMware,Hyperv,BareMetal,Simulator") private String hypervisor; + @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", since="4.21.0") + private Long managementServerId; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -189,6 +193,10 @@ public class ListHostsCmd extends BaseListCmd { return outOfBandManagementPowerState; } + public Long getManagementServerId() { + return managementServerId; + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java similarity index 98% rename from api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java rename to api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java index 2641c54364e..5c2b50c8723 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForHostMaintenanceCmd.java @@ -33,7 +33,7 @@ import com.cloud.utils.exception.CloudRuntimeException; @APICommand(name = "prepareHostForMaintenance", description = "Prepares a host for maintenance.", responseObject = HostResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class PrepareForMaintenanceCmd extends BaseAsyncCmd { +public class PrepareForHostMaintenanceCmd extends BaseAsyncCmd { ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java index 3eeaaef2afa..5b47a7a06e4 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/AsyncJobResponse.java @@ -83,9 +83,13 @@ public class AsyncJobResponse extends BaseResponse { @Param(description = "the unique ID of the instance/entity object related to the job") private String jobInstanceId; - @SerializedName("managementserverid") + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) @Param(description = "the msid of the management server on which the job is running", since = "4.19") - private Long msid; + private String managementServerId; + + @SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME) + @Param(description = "the management server name of the host", since = "4.21.0") + private String managementServerName; @SerializedName(ApiConstants.CREATED) @Param(description = " the created date of the job") @@ -156,7 +160,11 @@ public class AsyncJobResponse extends BaseResponse { this.removed = removed; } - public void setMsid(Long msid) { - this.msid = msid; + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } + + public void setManagementServerName(String managementServerName) { + this.managementServerName = managementServerName; } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java index 62bcc07b16d..091d6391b31 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/HostResponse.java @@ -186,10 +186,18 @@ public class HostResponse extends BaseResponseWithAnnotations { @Param(description = "the date and time the host was last pinged") private Date lastPinged; - @SerializedName("managementserverid") + @SerializedName(ApiConstants.VIRTUAL_MACHINE_ID) + @Param(description = "the virtual machine id for host type ConsoleProxy and SecondaryStorageVM", since = "4.21.0") + private String virtualMachineId; + + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) @Param(description = "the management server ID of the host") private String managementServerId; + @SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME) + @Param(description = "the management server name of the host", since = "4.21.0") + private String managementServerName; + @SerializedName("clusterid") @Param(description = "the cluster ID of the host") private String clusterId; @@ -435,10 +443,18 @@ public class HostResponse extends BaseResponseWithAnnotations { this.lastPinged = lastPinged; } + public void setVirtualMachineId(String virtualMachineId) { + this.virtualMachineId = virtualMachineId; + } + public void setManagementServerId(String managementServerId) { this.managementServerId = managementServerId; } + public void setManagementServerName(String managementServerName) { + this.managementServerName = managementServerName; + } + public void setClusterId(String clusterId) { this.clusterId = clusterId; } @@ -723,10 +739,18 @@ public class HostResponse extends BaseResponseWithAnnotations { return lastPinged; } + public String getVirtualMachineId() { + return virtualMachineId; + } + public String getManagementServerId() { return managementServerId; } + public String getManagementServerName() { + return managementServerName; + } + public String getClusterId() { return clusterId; } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java index 84c79d32321..43f92db84cb 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/LoginCmdResponse.java @@ -86,6 +86,10 @@ public class LoginCmdResponse extends AuthenticationCmdResponse { @Param(description = "Two factor authentication issuer", since = "4.18.0.0") private String issuerFor2FA; + @SerializedName(value = ApiConstants.MANAGEMENT_SERVER_ID) + @Param(description = "Management Server ID that the user logged to", since = "4.21.0.0") + private String managementServerId; + public String getUsername() { return username; } @@ -211,4 +215,12 @@ public class LoginCmdResponse extends AuthenticationCmdResponse { public void setIssuerFor2FA(String issuerFor2FA) { this.issuerFor2FA = issuerFor2FA; } + + public String getManagementServerId() { + return managementServerId; + } + + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java index fc7d3b722ab..df55a63a060 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/ManagementServerResponse.java @@ -82,6 +82,14 @@ public class ManagementServerResponse extends BaseResponse { @Param(description = "the Management Server Peers") private List peers; + @SerializedName(ApiConstants.AGENTS_COUNT) + @Param(description = "the number of host agents this Management Server is responsible for", since = "4.21.0.0") + private Long agentsCount; + + @SerializedName(ApiConstants.PENDING_JOBS_COUNT) + @Param(description = "the number of pending jobs in this Management Server", since = "4.21.0.0") + private Long pendingJobsCount; + public String getId() { return this.id; } @@ -126,6 +134,14 @@ public class ManagementServerResponse extends BaseResponse { return serviceIp; } + public Long getAgentsCount() { + return this.agentsCount; + } + + public Long getPendingJobsCount() { + return this.pendingJobsCount; + } + public void setId(String id) { this.id = id; } @@ -174,6 +190,14 @@ public class ManagementServerResponse extends BaseResponse { this.serviceIp = serviceIp; } + public void setAgentsCount(Long agentsCount) { + this.agentsCount = agentsCount; + } + + public void setPendingJobsCount(Long pendingJobsCount) { + this.pendingJobsCount = pendingJobsCount; + } + public String getKernelVersion() { return kernelVersion; } diff --git a/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java b/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java index 54a53f39578..7f81523dab7 100644 --- a/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java +++ b/api/src/main/java/org/apache/cloudstack/management/ManagementServerHost.java @@ -22,7 +22,7 @@ import org.apache.cloudstack.api.InternalIdentity; public interface ManagementServerHost extends InternalIdentity, Identity, ControlledEntity { enum State { - Up, Down, PreparingToShutDown, ReadyToShutDown, ShuttingDown + Up, Down, PreparingForMaintenance, Maintenance, PreparingForShutDown, ReadyToShutDown, ShuttingDown } long getMsid(); diff --git a/client/pom.xml b/client/pom.xml index 2ef6c910509..e12e0395482 100644 --- a/client/pom.xml +++ b/client/pom.xml @@ -624,7 +624,7 @@ org.apache.cloudstack - cloud-plugin-shutdown + cloud-plugin-maintenance ${project.version} diff --git a/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java new file mode 100644 index 00000000000..33d32c7f6cc --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionAnswer.java @@ -0,0 +1,38 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +public class MigrateAgentConnectionAnswer extends Answer { + public MigrateAgentConnectionAnswer() { + } + + public MigrateAgentConnectionAnswer(boolean result) { + this.result = result; + } + + public MigrateAgentConnectionAnswer(String details) { + this.result = false; + this.details = details; + } + + public MigrateAgentConnectionAnswer(MigrateAgentConnectionCommand cmd, boolean result) { + super(cmd, result, null); + } +} diff --git a/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java new file mode 100644 index 00000000000..9471a68669f --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/MigrateAgentConnectionCommand.java @@ -0,0 +1,61 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +import java.util.List; + +public class MigrateAgentConnectionCommand extends Command { + private List msList; + private List avoidMsList; + private String lbAlgorithm; + private Long lbCheckInterval; + + public MigrateAgentConnectionCommand() { + } + + public MigrateAgentConnectionCommand(final List msList, final List avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) { + super(); + this.msList = msList; + this.avoidMsList = avoidMsList; + this.lbAlgorithm = lbAlgorithm; + this.lbCheckInterval = lbCheckInterval; + } + + public List getMsList() { + return msList; + } + + public List getAvoidMsList() { + return avoidMsList; + } + + public String getLbAlgorithm() { + return lbAlgorithm; + } + + public Long getLbCheckInterval() { + return lbCheckInterval; + } + + @Override + public boolean executeInSequence() { + return false; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/StartupCommand.java b/core/src/main/java/com/cloud/agent/api/StartupCommand.java index cca5e16b585..7a18ba2dccc 100644 --- a/core/src/main/java/com/cloud/agent/api/StartupCommand.java +++ b/core/src/main/java/com/cloud/agent/api/StartupCommand.java @@ -47,6 +47,7 @@ public class StartupCommand extends Command { String resourceName; String gatewayIpAddress; String msHostList; + boolean connectionTransferred; String arch; public StartupCommand(Host.Type type) { @@ -291,6 +292,14 @@ public class StartupCommand extends Command { this.msHostList = msHostList; } + public boolean isConnectionTransferred() { + return connectionTransferred; + } + + public void setConnectionTransferred(boolean connectionTransferred) { + this.connectionTransferred = connectionTransferred; + } + public String getArch() { return arch; } diff --git a/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java b/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java index ab74d9bcf85..9c6b3b5fc59 100644 --- a/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java +++ b/core/src/main/java/com/cloud/agent/api/TransferAgentCommand.java @@ -25,6 +25,7 @@ public class TransferAgentCommand extends Command { protected long agentId; protected long futureOwner; protected long currentOwner; + protected boolean isConnectionTransfer; Event event; protected TransferAgentCommand() { @@ -37,6 +38,11 @@ public class TransferAgentCommand extends Command { this.event = event; } + public TransferAgentCommand(long agentId, long currentOwner, long futureOwner, Event event, boolean isConnectionTransfer) { + this(agentId, currentOwner, futureOwner, event); + this.isConnectionTransfer = isConnectionTransfer; + } + public long getAgentId() { return agentId; } @@ -53,6 +59,10 @@ public class TransferAgentCommand extends Command { return currentOwner; } + public boolean isConnectionTransfer() { + return isConnectionTransfer; + } + @Override public boolean executeInSequence() { return false; diff --git a/core/src/main/java/com/cloud/resource/ServerResource.java b/core/src/main/java/com/cloud/resource/ServerResource.java index 1602a78d9a4..981f03b738a 100644 --- a/core/src/main/java/com/cloud/resource/ServerResource.java +++ b/core/src/main/java/com/cloud/resource/ServerResource.java @@ -50,6 +50,10 @@ public interface ServerResource extends Manager { */ StartupCommand[] initialize(); + default StartupCommand[] initialize(boolean isTransferredConnection) { + return initialize(); + } + /** * @param id id of the server to put in the PingCommand * @return PingCommand diff --git a/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java b/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java index 287769d6a76..be7563be045 100644 --- a/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java +++ b/core/src/test/java/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java @@ -189,6 +189,11 @@ public class CheckOnHostCommandTest { return 2L; }; + @Override + public Long getLastManagementServerId() { + return null; + }; + @Override public Date getRemoved() { Date date = null; diff --git a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java index 81525ca13f1..82e2d29f407 100644 --- a/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/main/java/com/cloud/agent/AgentManager.java @@ -16,6 +16,7 @@ // under the License. package com.cloud.agent; +import java.util.List; import java.util.Map; import org.apache.cloudstack.framework.config.ConfigKey; @@ -170,4 +171,10 @@ public interface AgentManager { void notifyMonitorsOfRemovedHost(long hostId, long clusterId); void propagateChangeToAgents(Map params); + + boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs); + + List getLastAgents(); + + void setLastAgents(List lastAgents); } diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java index 343ad0fa212..3db2afb503d 100755 --- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java +++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java @@ -85,6 +85,8 @@ public interface ResourceManager extends ResourceService, Configurable { public Host createHostAndAgent(Long hostId, ServerResource resource, Map details, boolean old, List hostTags, boolean forRebalance); + public Host createHostAndAgent(Long hostId, ServerResource resource, Map details, boolean old, List hostTags, boolean forRebalance, boolean isTransferredConnection); + public Host addHost(long zoneId, ServerResource resource, Type hostType, Map hostDetails); public HostVO createHostVOForConnectedAgent(StartupCommand[] cmds); diff --git a/engine/orchestration/pom.xml b/engine/orchestration/pom.xml index bf8ab14c952..437c98dac87 100755 --- a/engine/orchestration/pom.xml +++ b/engine/orchestration/pom.xml @@ -70,7 +70,7 @@ org.apache.cloudstack - cloud-plugin-shutdown + cloud-plugin-maintenance ${project.version} diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java index f3add1557ce..f154eaddc1e 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java @@ -16,6 +16,7 @@ // under the License. package com.cloud.agent.manager; +import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.nio.channels.ClosedChannelException; @@ -38,6 +39,8 @@ import java.util.concurrent.locks.ReentrantLock; import javax.inject.Inject; import javax.naming.ConfigurationException; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.configuration.Config; import com.cloud.org.Cluster; import com.cloud.utils.NumbersUtil; @@ -50,7 +53,10 @@ import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.jobs.AsyncJob; import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceListener; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.commons.collections.MapUtils; @@ -130,7 +136,7 @@ import org.apache.logging.log4j.ThreadContext; /** * Implementation of the Agent Manager. This class controls the connection to the agents. **/ -public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, Configurable { +public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, ManagementServerMaintenanceListener, Configurable { /** * _agents is a ConcurrentHashMap, but it is used from within a synchronized block. This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. Maybe a @@ -154,6 +160,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Inject protected HostDao _hostDao = null; @Inject + private ManagementServerHostDao _mshostDao; + @Inject protected OutOfBandManagementDao outOfBandManagementDao; @Inject protected DataCenterDao _dcDao = null; @@ -175,6 +183,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Inject protected IndirectAgentLB indirectAgentLB; + @Inject + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + protected int _retry = 2; protected long _nodeId = -1; @@ -187,6 +198,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl private int _directAgentThreadCap; + private List lastAgents = null; + protected StateMachine2 _statusStateMachine = Status.getStateMachine(); private final ConcurrentHashMap _pingMap = new ConcurrentHashMap(10007); @@ -226,6 +239,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl registerForHostEvents(new SetHostParamsListener(), true, true, false); + managementServerMaintenanceManager.registerListener(this); + _executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentTaskPool")); _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentConnectTaskPool")); @@ -296,6 +311,45 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl _hostMonitors.remove(id); } + @Override + public void onManagementServerMaintenance() { + logger.debug("Management server maintenance enabled"); + _monitorExecutor.shutdownNow(); + if (_connection != null) { + _connection.stop(); + + try { + _connection.cleanUp(); + } catch (final IOException e) { + logger.warn("Fail to clean up old connection", e); + } + } + _connectExecutor.shutdownNow(); + } + + @Override + public void onManagementServerCancelMaintenance() { + logger.debug("Management server maintenance disabled"); + if (_connectExecutor.isShutdown()) { + _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentConnectTaskPool")); + _connectExecutor.allowCoreThreadTimeOut(true); + } + + startDirectlyConnectedHosts(true); + if (_connection != null) { + try { + _connection.start(); + } catch (final NioConnectionException e) { + logger.error("Error when connecting to the NioServer!", e); + } + } + + if (_monitorExecutor.isShutdown()) { + _monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor")); + _monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS); + } + } + private AgentControlAnswer handleControlCommand(final AgentAttache attache, final AgentControlCommand cmd) { AgentControlAnswer answer = null; @@ -332,6 +386,16 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl return attache; } + @Override + public List getLastAgents() { + return lastAgents; + } + + @Override + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + @Override public Answer sendTo(final Long dcId, final HypervisorType type, final Command cmd) { final List clusters = _clusterDao.listByDcHyType(dcId, type.toString()); @@ -616,10 +680,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl final long hostId = attache.getId(); final HostVO host = _hostDao.findById(hostId); for (final Pair monitor : _hostMonitors) { - logger.debug("Sending Connect to listener: {}", monitor.second().getClass().getSimpleName()); + logger.debug("Sending Connect to listener: {}, for rebalance: {}", monitor.second().getClass().getSimpleName(), forRebalance); for (int i = 0; i < cmd.length; i++) { try { - logger.debug("process connection to issue {} forRebalance == {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), forRebalance); + logger.debug("process connection to issue: {} for host: {}, forRebalance: {}, connection transferred: {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), hostId, forRebalance, cmd[i].isConnectionTransferred()); monitor.second().processConnect(host, cmd[i], forRebalance); } catch (final ConnectionException ce) { if (ce.isSetupError()) { @@ -675,7 +739,13 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Override public boolean start() { - startDirectlyConnectedHosts(); + ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId); + if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) { + _monitorExecutor.shutdownNow(); + return true; + } + + startDirectlyConnectedHosts(false); if (_connection != null) { try { @@ -690,10 +760,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl return true; } - public void startDirectlyConnectedHosts() { + public void startDirectlyConnectedHosts(final boolean forRebalance) { final List hosts = _resourceMgr.findDirectlyConnectedHosts(); for (final HostVO host : hosts) { - loadDirectlyConnectedHost(host, false); + loadDirectlyConnectedHost(host, forRebalance); } } @@ -768,6 +838,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } protected boolean loadDirectlyConnectedHost(final HostVO host, final boolean forRebalance) { + return loadDirectlyConnectedHost(host, forRebalance, false); + } + + protected boolean loadDirectlyConnectedHost(final HostVO host, final boolean forRebalance, final boolean isTransferredConnection) { boolean initialized = false; ServerResource resource = null; try { @@ -796,7 +870,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl if (forRebalance) { tapLoadingAgents(host.getId(), TapAgentsAction.Add); - final Host h = _resourceMgr.createHostAndAgent(host.getId(), resource, host.getDetails(), false, null, true); + final Host h = _resourceMgr.createHostAndAgent(host.getId(), resource, host.getDetails(), false, null, true, isTransferredConnection); tapLoadingAgents(host.getId(), TapAgentsAction.Del); return h == null ? false : true; @@ -1918,12 +1992,15 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl @Override public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) { - if (cmd instanceof StartupRoutingCommand) { - if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { - Map params = new HashMap(); - params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); - params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); - params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId()))); + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { + Map params = new HashMap(); + params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString())); + params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString())); + params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId()))); try { SetHostParamsCommand cmds = new SetHostParamsCommand(params); @@ -1935,8 +2012,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } } - } - @Override public boolean processDisconnect(final long agentId, final Status state) { return true; @@ -2004,6 +2079,11 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl } } + @Override + public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { + return true; + } + private GlobalLock getHostJoinLock(Long hostId) { return GlobalLock.getInternLock(String.format("%s-%s", "Host-Join", hostId)); } diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index be327418205..732ce9d61f5 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -47,14 +47,16 @@ import org.apache.cloudstack.framework.config.ConfigDepot; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.ha.dao.HAConfigDao; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; +import org.apache.cloudstack.maintenance.command.BaseShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextTimerTask; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; -import org.apache.cloudstack.shutdown.ShutdownManager; -import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.BaseShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.security.SSLUtils; @@ -74,12 +76,17 @@ import com.cloud.cluster.ClusterManagerListener; import com.cloud.cluster.ClusterServicePdu; import com.cloud.cluster.ClusteredAgentRebalanceService; import org.apache.cloudstack.management.ManagementServerHost; +import org.apache.commons.collections.CollectionUtils; + import com.cloud.cluster.ManagementServerHostVO; import com.cloud.cluster.agentlb.AgentLoadBalancerPlanner; import com.cloud.cluster.agentlb.HostTransferMapVO; import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState; import com.cloud.cluster.agentlb.dao.HostTransferMapDao; import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.cluster.dao.ManagementServerHostPeerDao; +import com.cloud.dc.DataCenterVO; +import com.cloud.dc.dao.DataCenterDao; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.UnsupportedVersionException; @@ -101,7 +108,7 @@ import com.cloud.utils.nio.Task; import com.google.gson.Gson; public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ClusteredAgentRebalanceService { - private static final ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); + private static ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); private final long rebalanceTimeOut = 300000; // 5 mins - after this time remove the agent from the transfer list public final static long STARTUP_DELAY = 5000; @@ -113,12 +120,15 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust protected HashMap _sslEngines; private final Timer _timer = new Timer("ClusteredAgentManager Timer"); boolean _agentLbHappened = false; + private int _mshostCounter = 0; @Inject protected ClusterManager _clusterMgr = null; @Inject protected ManagementServerHostDao _mshostDao; @Inject + protected ManagementServerHostPeerDao _mshostPeerDao; + @Inject protected HostTransferMapDao _hostTransferDao; @Inject protected List _lbPlanners; @@ -133,7 +143,9 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Inject private CAManager caService; @Inject - private ShutdownManager shutdownManager; + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + @Inject + private DataCenterDao dcDao; protected ClusteredAgentManagerImpl() { super(); @@ -172,6 +184,13 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust _timer.schedule(new DirectAgentScanTimerTask(), STARTUP_DELAY, ScanInterval.value()); logger.debug("Scheduled direct agent scan task to run at an interval of {} seconds", ScanInterval.value()); + ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId); + if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) { + s_transferExecutor.shutdownNow(); + cleanupTransferMap(_nodeId); + return true; + } + // Schedule tasks for agent rebalancing if (isAgentRebalanceEnabled()) { cleanupTransferMap(_nodeId); @@ -585,7 +604,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } @Override - public void startDirectlyConnectedHosts() { + public void startDirectlyConnectedHosts(final boolean forRebalance) { // override and let it be dummy for purpose, we will scan and load direct agents periodically. // We may also pickup agents that have been left over from other crashed management server } @@ -742,12 +761,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust @Override public boolean executeRebalanceRequest(final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event) throws AgentUnavailableException, OperationTimedoutException { + return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event, false); + } + + @Override + public boolean executeRebalanceRequest(final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException { boolean result = false; if (event == Event.RequestAgentRebalance) { return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId); } else if (event == Event.StartAgentRebalance) { try { - result = rebalanceHost(agentId, currentOwnerId, futureOwnerId); + result = rebalanceHost(agentId, currentOwnerId, futureOwnerId, isConnectionTransfer); } catch (final Exception e) { logger.warn("Unable to rebalance host id={} ({})", agentId, findAttache(agentId), e); } @@ -871,7 +895,11 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } private Answer[] sendRebalanceCommand(final long peer, final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event) { - final TransferAgentCommand transfer = new TransferAgentCommand(agentId, currentOwnerId, futureOwnerId, event); + return sendRebalanceCommand(peer, agentId, currentOwnerId, futureOwnerId, event, false); + } + + private Answer[] sendRebalanceCommand(final long peer, final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event, final boolean isConnectionTransfer) { + final TransferAgentCommand transfer = new TransferAgentCommand(agentId, currentOwnerId, futureOwnerId, event, isConnectionTransfer); final Commands commands = new Commands(Command.OnError.Stop); commands.addCommand(transfer); @@ -1004,7 +1032,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } protected boolean rebalanceHost(final long hostId, final long currentOwnerId, final long futureOwnerId) throws AgentUnavailableException { + return rebalanceHost(hostId, currentOwnerId, futureOwnerId, false); + } + protected boolean rebalanceHost(final long hostId, final long currentOwnerId, final long futureOwnerId, final boolean isConnectionTransfer) throws AgentUnavailableException { boolean result = true; if (currentOwnerId == _nodeId) { if (!startRebalance(hostId)) { @@ -1013,7 +1044,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust return false; } try { - final Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance); + final Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance, isConnectionTransfer); if (answer == null || !answer[0].getResult()) { result = false; } @@ -1043,7 +1074,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (result) { logger.debug("Loading directly connected host {} to the management server {} as a part of rebalance process", host, _nodeId); - result = loadDirectlyConnectedHost(host, true); + result = loadDirectlyConnectedHost(host, true, isConnectionTransfer); } else { logger.warn("Failed to disconnect {} as a part of rebalance process without notification", host); } @@ -1253,10 +1284,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } else if (cmds.length == 1 && cmds[0] instanceof TransferAgentCommand) { final TransferAgentCommand cmd = (TransferAgentCommand)cmds[0]; - logger.debug("Intercepting command for agent rebalancing: agent {} event: {}", cmd.getAgentId(), cmd.getEvent()); + logger.debug("Intercepting command for agent rebalancing: agent: {}, event: {}, connection transfer: {}", cmd.getAgentId(), cmd.getEvent(), cmd.isConnectionTransfer()); boolean result = false; try { - result = rebalanceAgent(cmd.getAgentId(), cmd.getEvent(), cmd.getCurrentOwner(), cmd.getFutureOwner()); + result = rebalanceAgent(cmd.getAgentId(), cmd.getEvent(), cmd.getCurrentOwner(), cmd.getFutureOwner(), cmd.isConnectionTransfer()); logger.debug("Result is {}", result); } catch (final AgentUnavailableException e) { @@ -1320,10 +1351,28 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } private String handleShutdownManagementServerHostCommand(BaseShutdownManagementServerHostCommand cmd) { - if (cmd instanceof PrepareForShutdownManagementServerHostCommand) { - logger.debug("Received BaseShutdownManagementServerHostCommand - preparing to shut down"); + if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) { + logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance"); try { - shutdownManager.prepareForShutdown(); + managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm()); + return "Successfully prepared for maintenance"; + } catch(CloudRuntimeException e) { + return e.getMessage(); + } + } + if (cmd instanceof CancelMaintenanceManagementServerHostCommand) { + logger.debug("Received CancelMaintenanceManagementServerHostCommand - cancelling maintenance"); + try { + managementServerMaintenanceManager.cancelMaintenance(); + return "Successfully cancelled maintenance"; + } catch(CloudRuntimeException e) { + return e.getMessage(); + } + } + if (cmd instanceof PrepareForShutdownManagementServerHostCommand) { + logger.debug("Received PrepareForShutdownManagementServerHostCommand - preparing to shut down"); + try { + managementServerMaintenanceManager.prepareForShutdown(); return "Successfully prepared for shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); @@ -1332,7 +1381,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (cmd instanceof TriggerShutdownManagementServerHostCommand) { logger.debug("Received TriggerShutdownManagementServerHostCommand - triggering a shut down"); try { - shutdownManager.triggerShutdown(); + managementServerMaintenanceManager.triggerShutdown(); return "Successfully triggered shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); @@ -1341,8 +1390,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust if (cmd instanceof CancelShutdownManagementServerHostCommand) { logger.debug("Received CancelShutdownManagementServerHostCommand - cancelling shut down"); try { - shutdownManager.cancelShutdown(); - return "Successfully prepared for shutdown"; + managementServerMaintenanceManager.cancelShutdown(); + return "Successfully cancelled shutdown"; } catch(CloudRuntimeException e) { return e.getMessage(); } @@ -1351,6 +1400,133 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust } } + @Override + public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) { + if (timeoutDurationInMs <= 0) { + logger.debug(String.format("Not transferring direct agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); + return false; + } + + long transferStartTime = System.currentTimeMillis(); + if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) { + logger.info(String.format("No direct agent hosts available on management server node %d (id: %s), to transfer", fromMsId, fromMsUuid)); + return true; + } + + List msHosts = getUpMsHostsExcludingMs(fromMsId); + if (msHosts.isEmpty()) { + logger.warn(String.format("No management server nodes available to transfer agents from management server node %d (id: %s)", fromMsId, fromMsUuid)); + return false; + } + + logger.debug(String.format("Transferring direct agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); + int agentTransferFailedCount = 0; + List dataCenterList = dcDao.listAll(); + for (DataCenterVO dc : dataCenterList) { + List directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId()); + if (CollectionUtils.isEmpty(directAgentHostsInDc)) { + continue; + } + logger.debug(String.format("Transferring %d direct agents from management server node %d (id: %s) of zone %s", directAgentHostsInDc.size(), fromMsId, fromMsUuid, dc.toString())); + for (HostVO host : directAgentHostsInDc) { + long transferElapsedTimeInMs = System.currentTimeMillis() - transferStartTime; + if (transferElapsedTimeInMs >= timeoutDurationInMs) { + logger.debug(String.format("Stop transferring remaining direct agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + return false; + } + + try { + if (_mshostCounter >= msHosts.size()) { + _mshostCounter = 0; + } + ManagementServerHostVO msHost = msHosts.get(_mshostCounter % msHosts.size()); + _mshostCounter++; + + _hostTransferDao.startAgentTransfering(host.getId(), fromMsId, msHost.getMsid()); + if (!rebalanceAgent(host.getId(), Event.StartAgentRebalance, fromMsId, msHost.getMsid(), true)) { + agentTransferFailedCount++; + } else { + updateLastManagementServer(host.getId(), fromMsId); + } + } catch (Exception e) { + logger.warn(String.format("Failed to transfer direct agent of the host %s from management server node %d (id: %s), due to %s", host, fromMsId, fromMsUuid, e.getMessage())); + } + } + } + + return (agentTransferFailedCount == 0); + } + + private List getDirectAgentHosts(long msId) { + List directAgentHosts = new ArrayList<>(); + List hosts = _hostDao.listHostsByMs(msId); + for (HostVO host : hosts) { + AgentAttache agent = findAttache(host.getId()); + if (agent != null && agent instanceof DirectAgentAttache) { + directAgentHosts.add(host); + } + } + + return directAgentHosts; + } + + private List getDirectAgentHostsInDc(long msId, long dcId) { + List directAgentHosts = new ArrayList<>(); + List hosts = _hostDao.listHostsByMsAndDc(msId, dcId); + for (HostVO host : hosts) { + AgentAttache agent = findAttache(host.getId()); + if (agent != null && agent instanceof DirectAgentAttache) { + directAgentHosts.add(host); + } + } + + return directAgentHosts; + } + + private List getUpMsHostsExcludingMs(long avoidMsId) { + final List msHosts = _mshostDao.listBy(ManagementServerHost.State.Up); + Iterator iterator = msHosts.iterator(); + while (iterator.hasNext()) { + ManagementServerHostVO ms = iterator.next(); + if (ms.getMsid() == avoidMsId || _mshostPeerDao.findByPeerMsAndState(ms.getId(), ManagementServerHost.State.Up) == null) { + iterator.remove(); + } + } + + return msHosts; + } + + private void updateLastManagementServer(long hostId, long msId) { + HostVO hostVO = _hostDao.findById(hostId); + if (hostVO != null) { + hostVO.setLastManagementServerId(msId); + _hostDao.update(hostId, hostVO); + } + } + + @Override + public void onManagementServerMaintenance() { + logger.debug("Management server maintenance enabled"); + s_transferExecutor.shutdownNow(); + cleanupTransferMap(_nodeId); + _agentLbHappened = false; + super.onManagementServerMaintenance(); + } + + @Override + public void onManagementServerCancelMaintenance() { + logger.debug("Management server maintenance disabled"); + super.onManagementServerCancelMaintenance(); + if (isAgentRebalanceEnabled()) { + cleanupTransferMap(_nodeId); + if (s_transferExecutor.isShutdown()) { + s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor")); + s_transferExecutor.scheduleAtFixedRate(getAgentRebalanceScanTask(), 60000, 60000, TimeUnit.MILLISECONDS); + s_transferExecutor.scheduleAtFixedRate(getTransferScanTask(), 60000, ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL, TimeUnit.MILLISECONDS); + } + } + } + public boolean executeAgentUserRequest(final long agentId, final Event event) throws AgentUnavailableException { return executeUserRequest(agentId, event); } @@ -1359,6 +1535,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event); } + public boolean rebalanceAgent(final long agentId, final Event event, final long currentOwnerId, final long futureOwnerId, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException { + return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event, isConnectionTransfer); + } + public boolean isAgentRebalanceEnabled() { return EnableLB.value(); } diff --git a/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java b/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java index ed52eb1a241..524b1c3adb6 100644 --- a/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java +++ b/engine/orchestration/src/main/java/com/cloud/cluster/ClusteredAgentRebalanceService.java @@ -27,4 +27,5 @@ public interface ClusteredAgentRebalanceService { boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException; + boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException; } diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java index 053d9ac218e..8ef2de3f74d 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java @@ -372,6 +372,9 @@ public class EngineHostVO implements EngineHost, Identity { @Column(name = "mgmt_server_id") private Long managementServerId; + @Column(name = "last_mgmt_server_id") + private Long lastManagementServerId; + @Column(name = "dom0_memory") private long dom0MinMemory; @@ -556,6 +559,10 @@ public class EngineHostVO implements EngineHost, Identity { this.managementServerId = managementServerId; } + public void setLastManagementServerId(Long lastManagementServerId) { + this.lastManagementServerId = lastManagementServerId; + } + @Override public long getLastPinged() { return lastPinged; @@ -625,6 +632,11 @@ public class EngineHostVO implements EngineHost, Identity { return managementServerId; } + @Override + public Long getLastManagementServerId() { + return lastManagementServerId; + } + @Override public Date getDisconnectedOn() { return disconnectedOn; diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java index 7efc29b02a6..64eb2ac024b 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/NetworkOrchestrator.java @@ -4263,7 +4263,7 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra @Override public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) throws ConnectionException { - if (!(cmd instanceof StartupRoutingCommand)) { + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { return; } final long hostId = host.getId(); diff --git a/engine/schema/src/main/java/com/cloud/host/HostVO.java b/engine/schema/src/main/java/com/cloud/host/HostVO.java index a449eb450cf..bd6768fa0dd 100644 --- a/engine/schema/src/main/java/com/cloud/host/HostVO.java +++ b/engine/schema/src/main/java/com/cloud/host/HostVO.java @@ -404,6 +404,9 @@ public class HostVO implements Host { @Column(name = "mgmt_server_id") private Long managementServerId; + @Column(name = "last_mgmt_server_id") + private Long lastManagementServerId; + @Column(name = "dom0_memory") private long dom0MinMemory; @@ -570,6 +573,10 @@ public class HostVO implements Host { this.managementServerId = managementServerId; } + public void setLastManagementServerId(Long lastManagementServerId) { + this.lastManagementServerId = lastManagementServerId; + } + @Override public long getLastPinged() { return lastPinged; @@ -639,6 +646,11 @@ public class HostVO implements Host { return managementServerId; } + @Override + public Long getLastManagementServerId() { + return lastManagementServerId; + } + @Override public Date getDisconnectedOn() { return disconnectedOn; diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java index a2df6db44e5..abdf50ab399 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java @@ -151,12 +151,23 @@ public interface HostDao extends GenericDao, StateDao listHostsWithActiveVMs(long offeringId); + List listHostsByMsAndDc(long msId, long dcId); + + List listHostsByMs(long msId); + /** * Retrieves the number of hosts/agents this {@see ManagementServer} has responsibility over. - * @param msid the id of the {@see ManagementServer} + * @param msId the id of the {@see ManagementServer} * @return the number of hosts/agents this {@see ManagementServer} has responsibility over */ - int countByMs(long msid); + int countByMs(long msId); + + /** + * Retrieves the host ids/agents this {@see ManagementServer} has responsibility over. + * @param msId the id of the {@see ManagementServer} + * @return the host ids/agents this {@see ManagementServer} has responsibility over + */ + List listByMs(long msId); /** * Retrieves the hypervisor versions of the hosts in the datacenter which are in Up state in ascending order diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java index 63950294654..4e1be3ae0fb 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java @@ -124,7 +124,9 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao protected SearchBuilder UnmanagedApplianceSearch; protected SearchBuilder MaintenanceCountSearch; protected SearchBuilder HostTypeCountSearch; - protected SearchBuilder ResponsibleMsCountSearch; + protected SearchBuilder ResponsibleMsSearch; + protected SearchBuilder ResponsibleMsDcSearch; + protected GenericSearchBuilder ResponsibleMsIdSearch; protected SearchBuilder HostTypeZoneCountSearch; protected SearchBuilder ClusterStatusSearch; protected SearchBuilder TypeNameZoneSearch; @@ -189,9 +191,19 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao HostTypeCountSearch.and("type", HostTypeCountSearch.entity().getType(), SearchCriteria.Op.EQ); HostTypeCountSearch.done(); - ResponsibleMsCountSearch = createSearchBuilder(); - ResponsibleMsCountSearch.and("managementServerId", ResponsibleMsCountSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); - ResponsibleMsCountSearch.done(); + ResponsibleMsSearch = createSearchBuilder(); + ResponsibleMsSearch.and("managementServerId", ResponsibleMsSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsSearch.done(); + + ResponsibleMsDcSearch = createSearchBuilder(); + ResponsibleMsDcSearch.and("managementServerId", ResponsibleMsDcSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsDcSearch.and("dcId", ResponsibleMsDcSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); + ResponsibleMsDcSearch.done(); + + ResponsibleMsIdSearch = createSearchBuilder(String.class); + ResponsibleMsIdSearch.selectFields(ResponsibleMsIdSearch.entity().getUuid()); + ResponsibleMsIdSearch.and("managementServerId", ResponsibleMsIdSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); + ResponsibleMsIdSearch.done(); HostTypeZoneCountSearch = createSearchBuilder(); HostTypeZoneCountSearch.and("type", HostTypeZoneCountSearch.entity().getType(), SearchCriteria.Op.EQ); @@ -1424,12 +1436,34 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao } @Override - public int countByMs(long msid) { - SearchCriteria sc = ResponsibleMsCountSearch.create(); - sc.setParameters("managementServerId", msid); + public List listHostsByMsAndDc(long msId, long dcId) { + SearchCriteria sc = ResponsibleMsDcSearch.create(); + sc.setParameters("managementServerId", msId); + sc.setParameters("dcId", dcId); + return listBy(sc); + } + + @Override + public List listHostsByMs(long msId) { + SearchCriteria sc = ResponsibleMsSearch.create(); + sc.setParameters("managementServerId", msId); + return listBy(sc); + } + + @Override + public int countByMs(long msId) { + SearchCriteria sc = ResponsibleMsSearch.create(); + sc.setParameters("managementServerId", msId); return getCount(sc); } + @Override + public List listByMs(long msId) { + SearchCriteria sc = ResponsibleMsIdSearch.create(); + sc.addAnd("managementServerId", SearchCriteria.Op.EQ, msId); + return customSearch(sc, null); + } + @Override public List listOrderedHostsHypervisorVersionsInDatacenter(long datacenterId, HypervisorType hypervisorType) { PreparedStatement pstmt = null; diff --git a/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql b/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql index 47e7bebbee4..4a5a0203a15 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-42010to42100.sql @@ -31,3 +31,6 @@ SELECT uuid(), role_id, 'quotaCreditsList', permission, sort_order FROM `cloud`.`role_permissions` rp WHERE rp.rule = 'quotaStatement' AND NOT EXISTS(SELECT 1 FROM cloud.role_permissions rp_ WHERE rp.role_id = rp_.role_id AND rp_.rule = 'quotaCreditsList'); + +CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.host', 'last_mgmt_server_id', 'bigint unsigned DEFAULT NULL COMMENT "last management server this host is connected to" AFTER `mgmt_server_id`'); + diff --git a/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java b/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java index 9dfb9e1654e..b136b8e842b 100644 --- a/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java +++ b/framework/agent-lb/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLB.java @@ -20,6 +20,12 @@ import java.util.List; public interface IndirectAgentLB { + /** + * Return list of management server addresses from host setting + * @return management servers string list + */ + List getManagementServerList(); + /** * Return list of management server addresses after applying configured lb algorithm * for a host in a zone. @@ -30,6 +36,17 @@ public interface IndirectAgentLB { */ List getManagementServerList(Long hostId, Long dcId, List orderedHostIdList); + /** + * Return list of management server addresses after applying the lb algorithm + * for a host in a zone. + * @param hostId host id (if present) + * @param dcId zone id + * @param orderedHostIdList (optional) list of ordered host id list + * @param lbAlgorithm lb algorithm + * @return management servers string list + */ + List getManagementServerList(Long hostId, Long dcId, List orderedHostIdList, String lbAlgorithm); + /** * Compares received management server list against expected list for a host in a zone. * @param hostId host id @@ -45,6 +62,8 @@ public interface IndirectAgentLB { */ String getLBAlgorithmName(); + void checkLBAlgorithmName(String lbAlgorithm); + /** * Returns the configured LB preferred host check interval (if applicable at cluster scope) * @return returns interval in seconds @@ -53,4 +72,7 @@ public interface IndirectAgentLB { void propagateMSListToAgents(); + boolean haveAgentBasedHosts(long msId); + + boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs); } diff --git a/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java b/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java index e26e32e7b2e..1b45910b88a 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/ClusterManagerImpl.java @@ -941,7 +941,7 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C try { JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId()); } catch (final Exception e) { - logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString()); + logger.warn("Unable to deregister cluster node from JMX monitoring due to exception " + e.toString()); } } @@ -1063,8 +1063,12 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C logger.info("New instance of management server {}, runId {} is being started", mshost, _runId); } } else { + ManagementServerHost.State msHostState = ManagementServerHost.State.Up; + if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) { + msHostState = ManagementServerHost.State.Maintenance; + } _mshostDao.update(mshost.getId(), _runId, NetUtils.getCanonicalHostName(), version, _clusterNodeIP, _currentServiceAdapter.getServicePort(), - DateUtil.currentGMTTime()); + DateUtil.currentGMTTime(), msHostState); if (logger.isInfoEnabled()) { logger.info("Management server {}, runId {} is being started", mshost, _runId); } @@ -1102,11 +1106,17 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C if (_mshostId != null) { final ManagementServerHostVO mshost = _mshostDao.findByMsid(_msId); - final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid()); - mshost.setState(ManagementServerHost.State.Down); - mshostStatus.setLastJvmStop(new Date()); - _mshostDao.update(_mshostId, mshost); - mshostStatusDao.update(mshostStatus.getId(), mshostStatus); + if (mshost != null) { + final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid()); + mshostStatus.setLastJvmStop(new Date()); + mshostStatusDao.update(mshostStatus.getId(), mshostStatus); + + ManagementServerHost.State msHostState = ManagementServerHost.State.Down; + if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) { + msHostState = ManagementServerHost.State.Maintenance; + } + _mshostDao.updateState(mshost.getId(), msHostState); + } } _heartbeatScheduler.shutdownNow(); diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java index 96d57ee0425..6c8ffcac78b 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDao.java @@ -33,10 +33,12 @@ public interface ManagementServerHostDao extends GenericDao getActiveList(Date cutTime); List getInactiveList(Date cutTime); @@ -47,6 +49,8 @@ public interface ManagementServerHostDao extends GenericDao listBy(ManagementServerHost.State... states); + List listNonUpStateMsIPs(); + /** * Lists msids for which hosts are orphaned, i.e. msids that hosts refer as their owning ms whilst no mshost entry exists with those msids * diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java index 27b6d52f61b..ec943a9c26b 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostDaoImpl.java @@ -35,6 +35,7 @@ import com.cloud.utils.DateUtil; import com.cloud.utils.db.DB; import com.cloud.utils.db.Filter; import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.GenericSearchBuilder; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.TransactionLegacy; @@ -46,6 +47,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase ActiveSearch; private final SearchBuilder InactiveSearch; private final SearchBuilder StateSearch; + protected GenericSearchBuilder NonUpStateMsSearch; @Override public void invalidateRunSession(long id, long runid) { @@ -77,7 +79,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase getActiveList(Date cutTime) { SearchCriteria sc = ActiveSearch.create(); @@ -205,6 +229,11 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase listNonUpStateMsIPs() { + SearchCriteria sc = NonUpStateMsSearch.create(); + sc.addAnd("state", SearchCriteria.Op.NLIKE, State.Up); + return customSearch(sc, null); + } + @Override public List listOrphanMsids() { List orphanList = new ArrayList(); diff --git a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java index 55559946cf0..37601e8ce78 100644 --- a/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java +++ b/framework/cluster/src/main/java/com/cloud/cluster/dao/ManagementServerHostPeerDao.java @@ -33,4 +33,7 @@ public interface ManagementServerHostPeerDao extends GenericDao FindForUpdateSearch; private final SearchBuilder CountSearch; private final SearchBuilder ActiveSearch; + private final SearchBuilder FindByOwnerAndPeerMsSearch; + private final SearchBuilder FindByPeerMsAndStateSearch; + public ManagementServerHostPeerDaoImpl() { ClearPeerSearch = createSearchBuilder(); @@ -59,6 +62,17 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase 0; } + + @Override + public ManagementServerHostPeerVO findByOwnerAndPeerMsHost(long ownerMshost, long peerMshost, ManagementServerHost.State peerState) { + SearchCriteria sc = FindByOwnerAndPeerMsSearch.create(); + sc.setParameters("ownerMshost", ownerMshost); + sc.setParameters("peerMshost", peerMshost); + sc.setParameters("peerState", peerState); + + return findOneBy(sc); + } + + @Override + public ManagementServerHostPeerVO findByPeerMsAndState(long peerMshost, ManagementServerHost.State peerState) { + SearchCriteria sc = FindByPeerMsAndStateSearch.create(); + sc.setParameters("peerMshost", peerMshost); + sc.setParameters("peerState", peerState); + + return findOneBy(sc); + } } diff --git a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 47bf27bd6c4..448a4eb219c 100644 --- a/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/main/java/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -174,7 +174,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, private ExecutorService _apiJobExecutor; private ExecutorService _workerJobExecutor; - private boolean asyncJobsEnabled = true; + private boolean asyncJobsDisabled = false; + private long asyncJobsDisabledTime = 0; @Override public String getConfigComponentName() { @@ -218,16 +219,48 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, return submitAsyncJob(job, false); } - private void checkShutdown() { - if (!isAsyncJobsEnabled()) { - throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs"); + private void checkAsyncJobAllowed(AsyncJob job) { + if (isAsyncJobsEnabled()) { + return; } + + if (job instanceof VmWorkJobVO) { + String related = job.getRelated(); + if (StringUtils.isNotBlank(related)) { + AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related)); + if (relatedJob != null) { + long relatedJobCreatedTime = relatedJob.getCreated().getTime(); + if ((asyncJobsDisabledTime - relatedJobCreatedTime) >= 0) { + return; + } + } + } + } + + throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs"); + } + + private boolean checkSyncQueueItemAllowed(SyncQueueItemVO item) { + if (isAsyncJobsEnabled()) { + return true; + } + + Long contentId = item.getContentId(); + AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(contentId); + if (relatedJob != null) { + long relatedJobCreatedTime = relatedJob.getCreated().getTime(); + if ((asyncJobsDisabledTime - relatedJobCreatedTime) >= 0) { + return true; + } + } + + return false; } @SuppressWarnings("unchecked") @DB public long submitAsyncJob(AsyncJob job, boolean scheduleJobExecutionInContext) { - checkShutdown(); + checkAsyncJobAllowed(job); @SuppressWarnings("rawtypes") GenericDao dao = GenericDaoBase.getDao(job.getClass()); @@ -248,7 +281,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, @Override @DB public long submitAsyncJob(final AsyncJob job, final String syncObjType, final long syncObjId) { - checkShutdown(); + checkAsyncJobAllowed(job); try { @SuppressWarnings("rawtypes") @@ -860,7 +893,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, protected void reallyRun() { try { if (!isAsyncJobsEnabled()) { - logger.info("A shutdown has been triggered. Not executing any async job"); + logger.info("Maintenance or Shutdown has been initiated on this management server. Not executing any async jobs"); return; } @@ -1301,16 +1334,18 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, @Override public void enableAsyncJobs() { - this.asyncJobsEnabled = true; + this.asyncJobsDisabled = false; + this.asyncJobsDisabledTime = 0; } @Override public void disableAsyncJobs() { - this.asyncJobsEnabled = false; + this.asyncJobsDisabled = true; + this.asyncJobsDisabledTime = System.currentTimeMillis(); } @Override public boolean isAsyncJobsEnabled() { - return asyncJobsEnabled; + return !asyncJobsDisabled; } } diff --git a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java index 512715988bb..f2c5ba2228e 100644 --- a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java +++ b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java @@ -6036,6 +6036,11 @@ public class VmwareResource extends ServerResourceBase implements StoragePoolRes @Override public StartupCommand[] initialize() { + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) { try { String hostApiVersion = "4.1"; VmwareContext context = getServiceContext(); @@ -6064,6 +6069,7 @@ public class VmwareResource extends ServerResourceBase implements StoragePoolRes cmd.setHypervisorType(HypervisorType.VMware); cmd.setCluster(_cluster); cmd.setHypervisorVersion(hostApiVersion); + cmd.setConnectionTransferred(isTransferredConnection); List storageCmds = initializeLocalStorage(); StartupCommand[] answerCmds = new StartupCommand[1 + storageCmds.size()]; diff --git a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java index e06268cd6ab..79d3c4d04ff 100644 --- a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java +++ b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/CitrixResourceBase.java @@ -3723,6 +3723,11 @@ public abstract class CitrixResourceBase extends ServerResourceBase implements S @Override public StartupCommand[] initialize() throws IllegalArgumentException { + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) throws IllegalArgumentException { final Connection conn = getConnection(); if (!getHostInfo(conn)) { logger.warn("Unable to get host information for " + _host.getIp()); @@ -3733,6 +3738,7 @@ public abstract class CitrixResourceBase extends ServerResourceBase implements S cmd.setHypervisorType(HypervisorType.XenServer); cmd.setCluster(_cluster); cmd.setPoolSync(false); + cmd.setConnectionTransferred(isTransferredConnection); try { final Pool pool = Pool.getByUuid(conn, _host.getPool()); diff --git a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java index 92e812d8d78..d0b96aecbb8 100644 --- a/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java +++ b/plugins/hypervisors/xenserver/src/main/java/com/cloud/hypervisor/xenserver/resource/XenServer56Resource.java @@ -128,7 +128,12 @@ public class XenServer56Resource extends CitrixResourceBase { @Override public StartupCommand[] initialize() { + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) { pingXAPI(); - return super.initialize(); + return super.initialize(isTransferredConnection); } } diff --git a/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java b/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java index 5120a0cc5af..43249857d21 100644 --- a/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java +++ b/plugins/hypervisors/xenserver/src/main/java/org/apache/cloudstack/hypervisor/xenserver/XenServerResourceNewBase.java @@ -64,7 +64,12 @@ public class XenServerResourceNewBase extends XenServer620SP1Resource { @Override public StartupCommand[] initialize() throws IllegalArgumentException { - final StartupCommand[] cmds = super.initialize(); + return initialize(false); + } + + @Override + public StartupCommand[] initialize(boolean isTransferredConnection) throws IllegalArgumentException { + final StartupCommand[] cmds = super.initialize(isTransferredConnection); final Connection conn = getConnection(); Pool pool; diff --git a/plugins/shutdown/pom.xml b/plugins/maintenance/pom.xml similarity index 92% rename from plugins/shutdown/pom.xml rename to plugins/maintenance/pom.xml index 372095c55c8..fb8f64cd16c 100644 --- a/plugins/shutdown/pom.xml +++ b/plugins/maintenance/pom.xml @@ -21,8 +21,8 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - cloud-plugin-shutdown - Apache CloudStack Plugin - Safe Shutdown + cloud-plugin-maintenance + Apache CloudStack Plugin - MS Maintenance and Safe Shutdown org.apache.cloudstack cloudstack-plugins diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java index d7f4953291b..da9263ed4fd 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/BaseShutdownActionCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/BaseMSMaintenanceActionCmd.java @@ -25,18 +25,18 @@ import org.apache.cloudstack.api.Parameter; import org.apache.cloudstack.api.response.ManagementServerResponse; -import org.apache.cloudstack.shutdown.ShutdownManager; +import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager; -public abstract class BaseShutdownActionCmd extends BaseCmd { +public abstract class BaseMSMaintenanceActionCmd extends BaseCmd { @Inject - protected ShutdownManager shutdownManager; + protected ManagementServerMaintenanceManager managementServerMaintenanceManager; ///////////////////////////////////////////////////// //////////////// API parameters ///////////////////// ///////////////////////////////////////////////////// - @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server", required = true) + @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", required = true) private Long managementServerId; ///////////////////////////////////////////////////// diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java new file mode 100644 index 00000000000..a0f091ef1e4 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelMaintenanceCmd.java @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.BaseCmd; + +import com.cloud.user.Account; + +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.acl.RoleType; + +@APICommand(name = CancelMaintenanceCmd.APINAME, + description = "Cancels maintenance of the management server", + since = "4.21.0", + responseObject = ManagementServerMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) + +public class CancelMaintenanceCmd extends BaseMSMaintenanceActionCmd { + + public static final String APINAME = "cancelMaintenance"; + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelMaintenance(this); + response.setResponseName(getCommandName()); + response.setObjectName("cancelmaintenance"); + setResponseObject(response); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java similarity index 83% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java index aa90d7fcbdc..35bfa4f4264 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/CancelShutdownCmd.java @@ -15,24 +15,24 @@ // specific language governing permissions and limitations // under the License. - package org.apache.cloudstack.api.command; +package org.apache.cloudstack.api.command; import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = CancelShutdownCmd.APINAME, description = "Cancels a triggered shutdown", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class CancelShutdownCmd extends BaseShutdownActionCmd { +public class CancelShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "cancelShutdown"; @@ -52,7 +52,7 @@ public class CancelShutdownCmd extends BaseShutdownActionCmd { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.cancelShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("cancelshutdown"); setResponseObject(response); diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java new file mode 100644 index 00000000000..3c036c4c35f --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForMaintenanceCmd.java @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; + +import com.cloud.user.Account; + +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.acl.RoleType; + +@APICommand(name = PrepareForMaintenanceCmd.APINAME, + description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents", + since = "4.21.0", + responseObject = ManagementServerMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) +public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd { + public static final String APINAME = "prepareForMaintenance"; + + @Parameter(name = ApiConstants.ALGORITHM, type = CommandType.STRING, description = "indirect agents load balancer algorithm (static, roundrobin, shuffle);" + + " when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered") + private String algorithm; + + public String getAlgorithm() { + return algorithm; + } + + public void setAlgorithm(String algorithm) { + this.algorithm = algorithm; + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForMaintenance(this); + response.setResponseName(getCommandName()); + response.setObjectName("prepareformaintenance"); + setResponseObject(response); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java index c86d2856047..273cc2743ad 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/PrepareForShutdownCmd.java @@ -17,22 +17,21 @@ package org.apache.cloudstack.api.command; - import org.apache.cloudstack.api.APICommand; import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = PrepareForShutdownCmd.APINAME, description = "Prepares CloudStack for a safe manual shutdown by preventing new jobs from being accepted", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class PrepareForShutdownCmd extends BaseShutdownActionCmd { +public class PrepareForShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "prepareForShutdown"; @Override @@ -51,7 +50,7 @@ public class PrepareForShutdownCmd extends BaseShutdownActionCmd { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.prepareForShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("prepareforshutdown"); setResponseObject(response); diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java similarity index 66% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java index de4db9c0428..782b23a0422 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/ReadyForShutdownCmd.java @@ -17,55 +17,23 @@ package org.apache.cloudstack.api.command; -import javax.inject.Inject; - import org.apache.cloudstack.api.APICommand; -import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseCmd; -import org.apache.cloudstack.api.Parameter; -import org.apache.cloudstack.api.response.ManagementServerResponse; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; -import org.apache.cloudstack.shutdown.ShutdownManager; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import com.cloud.user.Account; @APICommand(name = ReadyForShutdownCmd.APINAME, description = "Returns the status of CloudStack, whether a shutdown has been triggered and if ready to shutdown", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false) -public class ReadyForShutdownCmd extends BaseCmd { +public class ReadyForShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "readyForShutdown"; - @Inject - private ShutdownManager shutdownManager; - - ///////////////////////////////////////////////////// - //////////////// API parameters ///////////////////// - ///////////////////////////////////////////////////// - - @Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server") - private Long managementServerId; - ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// - public Long getManagementServerId() { - return managementServerId; - } - - ///////////////////////////////////////////////////// - /////////////// API Implementation/////////////////// - ///////////////////////////////////////////////////// - - @Override - public void execute() { - final ReadyForShutdownResponse response = shutdownManager.readyForShutdown(this); - response.setResponseName(getCommandName()); - response.setObjectName("readyforshutdown"); - setResponseObject(response); - } - @Override public String getCommandName() { return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; @@ -75,4 +43,16 @@ public class ReadyForShutdownCmd extends BaseCmd { public long getEntityOwnerId() { return Account.ACCOUNT_ID_SYSTEM; } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() { + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.readyForShutdown(this); + response.setResponseName(getCommandName()); + response.setObjectName("readyforshutdown"); + setResponseObject(response); + } } diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java similarity index 85% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java index b4ef7c1f67a..dc729593b0d 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/command/TriggerShutdownCmd.java @@ -22,16 +22,16 @@ import org.apache.cloudstack.api.BaseCmd; import com.cloud.user.Account; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; import org.apache.cloudstack.acl.RoleType; @APICommand(name = TriggerShutdownCmd.APINAME, - description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobbs have been completed. Triggers an immediate shutdown if forced", + description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobs have been completed.", since = "4.19.0", - responseObject = ReadyForShutdownResponse.class, + responseObject = ManagementServerMaintenanceResponse.class, requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, authorized = {RoleType.Admin}) -public class TriggerShutdownCmd extends BaseShutdownActionCmd { +public class TriggerShutdownCmd extends BaseMSMaintenanceActionCmd { public static final String APINAME = "triggerShutdown"; ///////////////////////////////////////////////////// @@ -54,7 +54,7 @@ public class TriggerShutdownCmd extends BaseShutdownActionCmd { @Override public void execute() { - final ReadyForShutdownResponse response = shutdownManager.triggerShutdown(this); + final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.triggerShutdown(this); response.setResponseName(getCommandName()); response.setObjectName("triggershutdown"); setResponseObject(response); diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java similarity index 52% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java index d1b2353d2a3..52bd8ab3fb8 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/api/response/ReadyForShutdownResponse.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/api/response/ManagementServerMaintenanceResponse.java @@ -16,35 +16,81 @@ // under the License. package org.apache.cloudstack.api.response; +import java.util.List; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.management.ManagementServerHost.State; import com.cloud.serializer.Param; import com.google.gson.annotations.SerializedName; -public class ReadyForShutdownResponse extends BaseResponse { - @SerializedName(ApiConstants.READY_FOR_SHUTDOWN) - @Param(description = "Indicates whether CloudStack is ready to shutdown") - private Boolean readyForShutdown; +public class ManagementServerMaintenanceResponse extends BaseResponse { + @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) + @Param(description = "The id of the management server") + private String managementServerId; + + @SerializedName(ApiConstants.STATE) + @Param(description = "the state of the management server") + private State state; + + @SerializedName(ApiConstants.MAINTENANCE_INITIATED) + @Param(description = "Indicates whether maintenance has been initiated") + private Boolean maintenanceInitiated; @SerializedName(ApiConstants.SHUTDOWN_TRIGGERED) @Param(description = "Indicates whether a shutdown has been triggered") private Boolean shutdownTriggered; + @SerializedName(ApiConstants.READY_FOR_SHUTDOWN) + @Param(description = "Indicates whether CloudStack is ready to shutdown") + private Boolean readyForShutdown; + @SerializedName(ApiConstants.PENDING_JOBS_COUNT) @Param(description = "The number of jobs in progress") private Long pendingJobsCount; - @SerializedName(ApiConstants.MANAGEMENT_SERVER_ID) - @Param(description = "The id of the management server") - private Long msId; + @SerializedName(ApiConstants.AGENTS_COUNT) + @Param(description = "The number of host agents this management server is responsible for") + private Long agentsCount; - public ReadyForShutdownResponse(Long msId, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount) { - this.msId = msId; + @SerializedName(ApiConstants.AGENTS) + @Param(description = "The host agents this management server is responsible for") + private List agents; + + public ManagementServerMaintenanceResponse(String managementServerId, State state, Boolean maintenanceInitiated, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount, long agentsCount, List agents) { + this.managementServerId = managementServerId; + this.state = state; + this.maintenanceInitiated = maintenanceInitiated; this.shutdownTriggered = shutdownTriggered; this.readyForShutdown = readyForShutdown; this.pendingJobsCount = pendingJobsCount; + this.agentsCount = agentsCount; + this.agents = agents; + } + + public String getManagementServerId() { + return managementServerId; + } + + public void setManagementServerId(String managementServerId) { + this.managementServerId = managementServerId; + } + + public State getState() { + return state; + } + + public void setState(State state) { + this.state = state; + } + + public Boolean getMaintenanceInitiated() { + return this.maintenanceInitiated; + } + + public void setMaintenanceInitiated(Boolean maintenanceInitiated) { + this.maintenanceInitiated = maintenanceInitiated; } public Boolean getShutdownTriggered() { @@ -71,11 +117,19 @@ public class ReadyForShutdownResponse extends BaseResponse { this.pendingJobsCount = pendingJobsCount; } - public Long getMsId() { - return msId; + public Long getAgentsCount() { + return this.agentsCount; } - public void setMsId(Long msId) { - this.msId = msId; + public void setAgentsCount(Long agentsCount) { + this.agentsCount = agentsCount; + } + + public List getAgents() { + return agents; + } + + public void setAgents(List agents) { + this.agents = agents; } } diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java new file mode 100644 index 00000000000..bd82d1b257d --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceListener.java @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +public interface ManagementServerMaintenanceListener { + void onManagementServerMaintenance(); + + void onManagementServerCancelMaintenance(); +} diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java new file mode 100644 index 00000000000..d474f718826 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManager.java @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +import org.apache.cloudstack.api.command.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.CancelShutdownCmd; +import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.PrepareForShutdownCmd; +import org.apache.cloudstack.api.command.ReadyForShutdownCmd; +import org.apache.cloudstack.api.command.TriggerShutdownCmd; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.framework.config.ConfigKey; + +import com.cloud.cluster.ManagementServerHostVO; + +public interface ManagementServerMaintenanceManager { + int DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS = 60; + + ConfigKey ManagementServerMaintenanceTimeoutInMins = new ConfigKey<>(Integer.class, + "management.server.maintenance.timeout", + "Advanced", + String.valueOf(DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS), + "Timeout (in mins) for the maintenance window for the management server, default: 60 mins.", + true, + ConfigKey.Scope.Global, + null); + + void registerListener(ManagementServerMaintenanceListener listener); + + void unregisterListener(ManagementServerMaintenanceListener listener); + + void onMaintenance(); + + void onCancelMaintenance(); + + // Returns the number of pending jobs for the given management server msids. + // NOTE: This is the msid and NOT the id + long countPendingJobs(Long... msIds); + + boolean isAsyncJobsEnabled(); + + // Indicates whether a shutdown has been triggered on the current management server + boolean isShutdownTriggered(); + + // Indicates whether the current management server is preparing to shutdown + boolean isPreparingForShutdown(); + + // Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs + void triggerShutdown(); + + // Prepares the current management server to shutdown by not accepting any more async jobs + void prepareForShutdown(); + + // Cancels the shutdown on the current management server + void cancelShutdown(); + + // Indicates whether the current management server is preparing to maintenance + boolean isPreparingForMaintenance(); + + void resetPreparingForMaintenance(); + + long getMaintenanceStartTime(); + + String getLbAlgorithm(); + + // Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs + void prepareForMaintenance(String lbAlorithm); + + // Cancels maintenance of the current management server + void cancelMaintenance(); + + void cancelPreparingForMaintenance(ManagementServerHostVO msHost); + + void cancelWaitForPendingJobs(); + + // Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms + ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd); + + // Prepares the specified management server to shutdown by not accepting any more async jobs + ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd); + + // Cancels the shutdown on the specified management server + ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd); + + // Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs + ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd); + + // Prepares the specified management server to maintenance by migrating the agents and not accepting any more async jobs + ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd); + + // Cancels maintenance of the specified management server + ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd); +} diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java new file mode 100644 index 00000000000..0af8a7c114d --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImpl.java @@ -0,0 +1,598 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.maintenance; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; + +import javax.inject.Inject; + +import org.apache.cloudstack.agent.lb.IndirectAgentLB; +import org.apache.cloudstack.api.command.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.CancelShutdownCmd; +import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.PrepareForShutdownCmd; +import org.apache.cloudstack.api.command.ReadyForShutdownCmd; +import org.apache.cloudstack.api.command.TriggerShutdownCmd; +import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse; +import org.apache.cloudstack.config.ApiServiceConfiguration; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.jobs.AsyncJobManager; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.management.ManagementServerHost.State; +import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand; +import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.commons.collections.CollectionUtils; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Command; +import com.cloud.cluster.ClusterManager; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.host.dao.HostDao; +import com.cloud.serializer.GsonHelper; +import com.cloud.utils.StringUtils; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.component.PluggableService; +import com.cloud.utils.concurrency.NamedThreadFactory; +import com.cloud.utils.exception.CloudRuntimeException; +import com.google.gson.Gson; + +public class ManagementServerMaintenanceManagerImpl extends ManagerBase implements ManagementServerMaintenanceManager, PluggableService, Configurable { + + Gson gson; + + @Inject + private AsyncJobManager jobManager; + @Inject + private ClusterManager clusterManager; + @Inject + private AgentManager agentMgr; + @Inject + private IndirectAgentLB indirectAgentLB; + @Inject + private ManagementServerHostDao msHostDao; + @Inject + private HostDao hostDao; + + private final List _listeners = new ArrayList<>(); + + private boolean shutdownTriggered = false; + private boolean preparingForShutdown = false; + private boolean preparingForMaintenance = false; + private long maintenanceStartTime = 0; + private String lbAlgorithm; + + private ScheduledExecutorService pendingJobsCheckTask; + + protected ManagementServerMaintenanceManagerImpl() { + super(); + gson = GsonHelper.getGson(); + } + + @Override + public boolean start() { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null) { + State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance}; + if (Arrays.asList(maintenanceStates).contains(msHost.getState())) { + this.preparingForMaintenance = true; + jobManager.disableAsyncJobs(); + msHostDao.updateState(msHost.getId(), State.Maintenance); + } + } + return true; + } + + @Override + public void registerListener(ManagementServerMaintenanceListener listener) { + synchronized (_listeners) { + logger.info("Register management server maintenance listener " + listener.getClass()); + _listeners.add(listener); + } + } + + @Override + public void unregisterListener(ManagementServerMaintenanceListener listener) { + synchronized (_listeners) { + logger.info("Unregister management server maintenance listener " + listener.getClass()); + _listeners.remove(listener); + } + } + + @Override + public void onMaintenance() { + synchronized (_listeners) { + for (final ManagementServerMaintenanceListener listener : _listeners) { + logger.info("Invoke, on maintenance for listener " + listener.getClass()); + listener.onManagementServerMaintenance(); + } + } + } + + @Override + public void onCancelMaintenance() { + synchronized (_listeners) { + for (final ManagementServerMaintenanceListener listener : _listeners) { + logger.info("Invoke, on cancel maintenance for listener " + listener.getClass()); + listener.onManagementServerCancelMaintenance(); + } + } + } + + @Override + public boolean isShutdownTriggered() { + return shutdownTriggered; + } + + @Override + public boolean isPreparingForShutdown() { + return preparingForShutdown; + } + + @Override + public boolean isPreparingForMaintenance() { + return preparingForMaintenance; + } + + @Override + public void resetPreparingForMaintenance() { + preparingForMaintenance = false; + maintenanceStartTime = 0; + lbAlgorithm = null; + } + + @Override + public long getMaintenanceStartTime() { + return maintenanceStartTime; + } + + @Override + public String getLbAlgorithm() { + return lbAlgorithm; + } + + @Override + public long countPendingJobs(Long... msIds) { + return jobManager.countPendingNonPseudoJobs(msIds); + } + + @Override + public boolean isAsyncJobsEnabled() { + return jobManager.isAsyncJobsEnabled(); + } + + @Override + public void triggerShutdown() { + if (this.shutdownTriggered) { + throw new CloudRuntimeException("Shutdown has already been triggered"); + } + this.shutdownTriggered = true; + prepareForShutdown(true); + } + + private void prepareForShutdown(boolean postTrigger) { + if (!postTrigger) { + if (this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has already been initiated, cancel maintenance and try again"); + } + + // Ensure we don't throw an error if triggering a shutdown after just preparing for it + if (this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has already been triggered"); + } + } + + this.preparingForShutdown = true; + jobManager.disableAsyncJobs(); + waitForPendingJobs(); + } + + @Override + public void prepareForShutdown() { + prepareForShutdown(false); + } + + @Override + public void cancelShutdown() { + if (!this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has not been triggered"); + } + + this.preparingForShutdown = false; + this.shutdownTriggered = false; + resetPreparingForMaintenance(); + jobManager.enableAsyncJobs(); + cancelWaitForPendingJobs(); + } + + @Override + public void prepareForMaintenance(String lbAlorithm) { + if (this.preparingForShutdown) { + throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again"); + } + + if (this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has already been initiated"); + } + this.preparingForMaintenance = true; + this.maintenanceStartTime = System.currentTimeMillis(); + this.lbAlgorithm = lbAlorithm; + jobManager.disableAsyncJobs(); + waitForPendingJobs(); + } + + @Override + public void cancelMaintenance() { + if (!this.preparingForMaintenance) { + throw new CloudRuntimeException("Maintenance has not been initiated"); + } + resetPreparingForMaintenance(); + this.preparingForShutdown = false; + this.shutdownTriggered = false; + jobManager.enableAsyncJobs(); + cancelWaitForPendingJobs(); + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null && State.Maintenance.equals(msHost.getState())) { + onCancelMaintenance(); + } + } + + private void waitForPendingJobs() { + cancelWaitForPendingJobs(); + pendingJobsCheckTask = Executors.newScheduledThreadPool(1, new NamedThreadFactory("PendingJobsCheck")); + long pendingJobsCheckDelayInSecs = 1L; // 1 sec + long pendingJobsCheckPeriodInSecs = 3L; // every 3 secs, check more frequently for pending jobs + pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS); + } + + @Override + public void cancelWaitForPendingJobs() { + if (pendingJobsCheckTask != null) { + pendingJobsCheckTask.shutdown(); + pendingJobsCheckTask = null; + } + } + + @Override + public ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd) { + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot prepare for shutdown"); + } + + if (!State.Up.equals(msHost.getState())) { + throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("PrepareForShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot trigger shutdown"); + } + + if (!(State.Up.equals(msHost.getState()) || State.Maintenance.equals(msHost.getState()) || State.PreparingForShutDown.equals(msHost.getState()) || + State.ReadyToShutDown.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to trigger shutdown"); + } + + if (State.Up.equals(msHost.getState())) { + msHostDao.updateState(msHost.getId(), State.PreparingForShutDown); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("TriggerShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.ShuttingDown); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot cancel shutdown"); + } + + if (!(State.PreparingForShutDown.equals(msHost.getState()) || State.ReadyToShutDown.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to cancel shutdown"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("CancelShutdownCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.Up); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd) { + if (StringUtils.isNotBlank(cmd.getAlgorithm())) { + indirectAgentLB.checkLBAlgorithmName(cmd.getAlgorithm()); + } + + final List activeMsList = msHostDao.listBy(State.Up); + if (CollectionUtils.isEmpty(activeMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, no active management servers found"); + } + + if (activeMsList.size() == 1) { + throw new CloudRuntimeException("Prepare for maintenance not supported, there is only one active management server"); + } + + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Cannot prepare for maintenance, unable to find the management server"); + } + + if (!State.Up.equals(msHost.getState())) { + throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance"); + } + + final List preparingForMaintenanceMsList = msHostDao.listBy(State.PreparingForMaintenance); + if (CollectionUtils.isNotEmpty(preparingForMaintenanceMsList)) { + throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance"); + } + + if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) { + List indirectAgentMsList = indirectAgentLB.getManagementServerList(); + indirectAgentMsList.remove(msHost.getServiceIP()); + List nonUpMsList = msHostDao.listNonUpStateMsIPs(); + indirectAgentMsList.removeAll(nonUpMsList); + if (CollectionUtils.isEmpty(indirectAgentMsList)) { + throw new CloudRuntimeException(String.format("Cannot prepare for maintenance, no other active management servers found from '%s' setting", ApiServiceConfiguration.ManagementServerAddresses.key())); + } + } + + List lastAgents = hostDao.listByMs(cmd.getManagementServerId()); + agentMgr.setLastAgents(lastAgents); + + final Command[] cmds = new Command[1]; + cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("PrepareForMaintenanceCmd result : " + result); + if (!result.startsWith("Success")) { + agentMgr.setLastAgents(null); + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd) { + ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server, cannot cancel maintenance"); + } + + if (!(State.Maintenance.equals(msHost.getState()) || State.PreparingForMaintenance.equals(msHost.getState()))) { + throw new CloudRuntimeException("Management server is not in the right state to cancel maintenance"); + } + + final Command[] cmds = new Command[1]; + cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid()); + String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); + logger.info("CancelMaintenanceCmd result : " + result); + if (!result.startsWith("Success")) { + throw new CloudRuntimeException(result); + } + + msHostDao.updateState(msHost.getId(), State.Up); + agentMgr.setLastAgents(null); + return prepareMaintenanceResponse(cmd.getManagementServerId()); + } + + @Override + public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) { + resetPreparingForMaintenance(); + this.preparingForShutdown = false; + this.shutdownTriggered = false; + jobManager.enableAsyncJobs(); + if (msHost == null) { + msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + } + msHostDao.updateState(msHost.getId(), State.Up); + } + + private ManagementServerMaintenanceResponse prepareMaintenanceResponse(Long managementServerId) { + ManagementServerHostVO msHost; + Long[] msIds; + if (managementServerId == null) { + msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + } else { + msHost = msHostDao.findById(managementServerId); + } + if (msHost == null) { + throw new CloudRuntimeException("Unable to find the management server"); + } + + State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance}; + State[] shutdownStates = {State.ShuttingDown, State.PreparingForShutDown, State.ReadyToShutDown}; + boolean maintenanceInitiatedForMS = Arrays.asList(maintenanceStates).contains(msHost.getState()); + boolean shutdownTriggeredForMS = Arrays.asList(shutdownStates).contains(msHost.getState()); + msIds = new Long[]{msHost.getMsid()}; + List agents = hostDao.listByMs(managementServerId); + long agentsCount = hostDao.countByMs(managementServerId); + long pendingJobCount = countPendingJobs(msIds); + return new ManagementServerMaintenanceResponse(msHost.getUuid(), msHost.getState(), maintenanceInitiatedForMS, shutdownTriggeredForMS, pendingJobCount == 0, pendingJobCount, agentsCount, agents); + } + + @Override + public List> getCommands() { + final List> cmdList = new ArrayList<>(); + cmdList.add(PrepareForMaintenanceCmd.class); + cmdList.add(CancelMaintenanceCmd.class); + cmdList.add(PrepareForShutdownCmd.class); + cmdList.add(CancelShutdownCmd.class); + cmdList.add(ReadyForShutdownCmd.class); + cmdList.add(TriggerShutdownCmd.class); + return cmdList; + } + + @Override + public String getConfigComponentName() { + return ManagementServerMaintenanceManager.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[]{ + ManagementServerMaintenanceTimeoutInMins + }; + } + + private final class CheckPendingJobsTask extends ManagedContextRunnable { + + private ManagementServerMaintenanceManager managementServerMaintenanceManager; + private boolean agentsTransferTriggered = false; + + public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager) { + this.managementServerMaintenanceManager = managementServerMaintenanceManager; + } + + @Override + protected void runInContext() { + try { + // If the maintenance or shutdown has been cancelled + if (!(managementServerMaintenanceManager.isPreparingForMaintenance() || managementServerMaintenanceManager.isPreparingForShutdown())) { + logger.info("Maintenance/Shutdown cancelled, terminating the pending jobs check timer task"); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) { + logger.debug("Maintenance window timeout, terminating the pending jobs check timer task"); + managementServerMaintenanceManager.cancelPreparingForMaintenance(null); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId()); + int totalAgents = hostDao.countByMs(ManagementServerNode.getManagementServerId()); + String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]", + managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents); + logger.debug(msg); + + if (totalPendingJobs > 0) { + logger.info(String.format("There are %d pending jobs, trying again later", totalPendingJobs)); + return; + } + + // No more pending jobs. Good to terminate + if (managementServerMaintenanceManager.isShutdownTriggered()) { + logger.info("MS is Shutting Down Now"); + // update state to down ? + System.exit(0); + } + if (managementServerMaintenanceManager.isPreparingForMaintenance()) { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (totalAgents == 0) { + logger.info("MS is in Maintenance Mode"); + msHostDao.updateState(msHost.getId(), State.Maintenance); + managementServerMaintenanceManager.onMaintenance(); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + if (agentsTransferTriggered) { + logger.info(String.format("There are %d agents, trying again later", totalAgents)); + return; + } + + agentsTransferTriggered = true; + logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs()); + if (!agentsMigrated) { + logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + + if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs())) { + logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid())); + managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + } else if (managementServerMaintenanceManager.isPreparingForShutdown()) { + logger.info("MS is Ready To Shutdown"); + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + msHostDao.updateState(msHost.getId(), State.ReadyToShutDown); + managementServerMaintenanceManager.cancelWaitForPendingJobs(); + return; + } + } catch (final Exception e) { + logger.error("Error trying to check/run pending jobs task", e); + } + } + + private boolean isMaintenanceWindowExpired() { + long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime(); + if (maintenanceElapsedTimeInMs >= (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000)) { + return true; + } + return false; + } + + private long remainingMaintenanceWindowInMs() { + long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime(); + long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs; + return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0; + } + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java index 8fe33317bc0..093a5d35eba 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/BaseShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/BaseShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; import com.cloud.agent.api.Command; diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java new file mode 100644 index 00000000000..50eb73b7bca --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelMaintenanceManagementServerHostCommand.java @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.maintenance.command; + +public class CancelMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { + + public CancelMaintenanceManagementServerHostCommand(long msId) { + super(msId); + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java index eef44446aa1..2cbdbd2f07a 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/CancelShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/CancelShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class CancelShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java new file mode 100644 index 00000000000..8f2a4e62b32 --- /dev/null +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForMaintenanceManagementServerHostCommand.java @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.maintenance.command; + +public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { + String lbAlgorithm; + + public PrepareForMaintenanceManagementServerHostCommand(long msId) { + super(msId); + } + + public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm) { + super(msId); + this.lbAlgorithm = lbAlgorithm; + } + + public String getLbAlgorithm() { + return lbAlgorithm; + } +} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java index 32a9201d551..15f04ae11e6 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/PrepareForShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/PrepareForShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class PrepareForShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java similarity index 95% rename from plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java rename to plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java index e0d1879fa35..41e2e7e86a0 100644 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/command/TriggerShutdownManagementServerHostCommand.java +++ b/plugins/maintenance/src/main/java/org/apache/cloudstack/maintenance/command/TriggerShutdownManagementServerHostCommand.java @@ -16,7 +16,7 @@ // under the License. -package org.apache.cloudstack.shutdown.command; +package org.apache.cloudstack.maintenance.command; public class TriggerShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand { diff --git a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties similarity index 97% rename from plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties rename to plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties index fd85c3085ca..547afff867f 100644 --- a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/module.properties +++ b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/module.properties @@ -14,5 +14,5 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -name=shutdown +name=maintenance parent=api diff --git a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml similarity index 83% rename from plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml rename to plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml index 5318b3bf446..bc5504634ce 100644 --- a/plugins/shutdown/src/main/resources/META-INF/cloudstack/shutdown/spring-shutdown-context.xml +++ b/plugins/maintenance/src/main/resources/META-INF/cloudstack/maintenance/spring-maintenance-context.xml @@ -22,8 +22,8 @@ http://www.springframework.org/schema/beans/spring-beans.xsd" > - - + + diff --git a/plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java b/plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java similarity index 84% rename from plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java rename to plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java index 9f75251c93f..8e1c09bf995 100644 --- a/plugins/shutdown/src/test/java/org/apache/cloudstack/shutdown/ShutdownManagerImplTest.java +++ b/plugins/maintenance/src/test/java/org/apache/cloudstack/maintenance/ManagementServerMaintenanceManagerImplTest.java @@ -15,18 +15,15 @@ // specific language governing permissions and limitations // under the License. -package org.apache.cloudstack.shutdown; +package org.apache.cloudstack.maintenance; import org.apache.cloudstack.framework.jobs.AsyncJobManager; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.InjectMocks; import org.mockito.Mock; import org.mockito.Mockito; -import org.mockito.MockitoAnnotations; import org.mockito.Spy; import org.mockito.junit.MockitoJUnitRunner; @@ -34,20 +31,14 @@ import com.cloud.utils.exception.CloudRuntimeException; @RunWith(MockitoJUnitRunner.class) -public class ShutdownManagerImplTest { +public class ManagementServerMaintenanceManagerImplTest { @Spy @InjectMocks - ShutdownManagerImpl spy; + ManagementServerMaintenanceManagerImpl spy; @Mock AsyncJobManager jobManagerMock; - private AutoCloseable closeable; - - @Before - public void setUp() throws Exception { - closeable = MockitoAnnotations.openMocks(this); - } private long prepareCountPendingJobs() { long expectedCount = 1L; @@ -79,14 +70,8 @@ public class ShutdownManagerImplTest { spy.prepareForShutdown(); }); - Mockito.doNothing().when(jobManagerMock).enableAsyncJobs(); spy.cancelShutdown(); Mockito.verify(jobManagerMock).enableAsyncJobs(); } - - @After - public void tearDown() throws Exception { - closeable.close(); - } } diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java b/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java index 8c93f2e1f44..ba4822fa852 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/api/MetricConstants.java @@ -20,6 +20,8 @@ package org.apache.cloudstack.api; * metric local api constants */ public interface MetricConstants { + String LAST_AGENTS = "lastagents"; + String AGENTS = "agents"; String AGENT_COUNT = "agentcount"; String AVAILABLE_PROCESSORS = "availableprocessors"; String CONNECTIONS = "connections"; diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java b/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java index 51524c12912..9fca23dcee3 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/metrics/MetricsServiceImpl.java @@ -895,6 +895,8 @@ public class MetricsServiceImpl extends MutualExclusiveIdsManagerBase implements metricsResponse.setDbLocal(status.isDbLocal()); metricsResponse.setUsageLocal(status.isUsageLocal()); metricsResponse.setAvailableProcessors(status.getAvailableProcessors()); + metricsResponse.setLastAgents(status.getLastAgents()); + metricsResponse.setAgents(status.getAgents()); metricsResponse.setAgentCount(status.getAgentCount()); metricsResponse.setCollectionTime(status.getCollectionTime()); metricsResponse.setSessions(status.getSessions()); diff --git a/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java b/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java index 95c3fd09c07..d96f5b14f0d 100644 --- a/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java +++ b/plugins/metrics/src/main/java/org/apache/cloudstack/response/ManagementServerMetricsResponse.java @@ -22,6 +22,7 @@ import org.apache.cloudstack.api.MetricConstants; import org.apache.cloudstack.api.response.ManagementServerResponse; import java.util.Date; +import java.util.List; public class ManagementServerMetricsResponse extends ManagementServerResponse { @@ -29,6 +30,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse { @Param(description = "the number of processors available to the JVM") private Integer availableProcessors; + @SerializedName(MetricConstants.LAST_AGENTS) + @Param(description = "the last agents this Management Server is responsible for, before preparing for maintenance", since = "4.18.1") + private List lastAgents; + + @SerializedName(MetricConstants.AGENTS) + @Param(description = "the agents this Management Server is responsible for", since = "4.18.1") + private List agents; + @SerializedName(MetricConstants.AGENT_COUNT) @Param(description = "the number of agents this Management Server is responsible for") private Integer agentCount; @@ -121,6 +130,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse { this.availableProcessors = availableProcessors; } + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + + public void setAgents(List agents) { + this.agents = agents; + } + public void setAgentCount(int agentCount) { this.agentCount = agentCount; } diff --git a/plugins/pom.xml b/plugins/pom.xml index 3d5da1d59ac..1667e151cfc 100755 --- a/plugins/pom.xml +++ b/plugins/pom.xml @@ -118,7 +118,7 @@ outofbandmanagement-drivers/nested-cloudstack outofbandmanagement-drivers/redfish - shutdown + maintenance storage/sharedfs/storagevm storage/image/default diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java b/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java deleted file mode 100644 index 22f43cb4f62..00000000000 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManager.java +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.cloudstack.shutdown; - -import org.apache.cloudstack.api.command.CancelShutdownCmd; -import org.apache.cloudstack.api.command.PrepareForShutdownCmd; -import org.apache.cloudstack.api.command.ReadyForShutdownCmd; -import org.apache.cloudstack.api.command.TriggerShutdownCmd; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; - -public interface ShutdownManager { - // Returns the number of pending jobs for the given Management server msids. - // NOTE: This is the msid and NOT the id - long countPendingJobs(Long... msIds); - - // Indicates whether a shutdown has been triggered on the current management server - boolean isShutdownTriggered(); - - // Indicates whether the current management server is preparing to shutdown - boolean isPreparingForShutdown(); - - // Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs - void triggerShutdown(); - - // Prepares the current management server to shutdown by not accepting any more async jobs - void prepareForShutdown(); - - // Cancels the shutdown on the current management server - void cancelShutdown(); - - // Returns whether the given ms can be shut down - ReadyForShutdownResponse readyForShutdown(Long managementserverid); - - // Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms - ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd); - - // Prepares the specified management server to shutdown by not accepting any more async jobs - ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd); - - // Cancels the shutdown on the specified management server - ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd); - - // Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs - ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd); -} diff --git a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java b/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java deleted file mode 100644 index c33243357fc..00000000000 --- a/plugins/shutdown/src/main/java/org/apache/cloudstack/shutdown/ShutdownManagerImpl.java +++ /dev/null @@ -1,265 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.cloudstack.shutdown; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Timer; -import java.util.TimerTask; - -import javax.inject.Inject; - -import org.apache.cloudstack.api.command.CancelShutdownCmd; -import org.apache.cloudstack.api.command.PrepareForShutdownCmd; -import org.apache.cloudstack.api.command.ReadyForShutdownCmd; -import org.apache.cloudstack.api.command.TriggerShutdownCmd; -import org.apache.cloudstack.api.response.ReadyForShutdownResponse; -import org.apache.cloudstack.framework.jobs.AsyncJobManager; -import org.apache.cloudstack.management.ManagementServerHost.State; -import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand; -import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand; -import org.apache.cloudstack.utils.identity.ManagementServerNode; - -import com.cloud.agent.api.Command; -import com.cloud.cluster.ClusterManager; -import com.cloud.cluster.ManagementServerHostVO; -import com.cloud.cluster.dao.ManagementServerHostDao; -import com.cloud.serializer.GsonHelper; -import com.cloud.utils.component.ManagerBase; -import com.cloud.utils.component.PluggableService; -import com.cloud.utils.exception.CloudRuntimeException; -import com.google.gson.Gson; - -public class ShutdownManagerImpl extends ManagerBase implements ShutdownManager, PluggableService{ - Gson gson; - - @Inject - private AsyncJobManager jobManager; - @Inject - private ManagementServerHostDao msHostDao; - @Inject - private ClusterManager clusterManager; - - private boolean shutdownTriggered = false; - private boolean preparingForShutdown = false; - - private Timer timer = new Timer(); - private TimerTask shutdownTask; - - protected ShutdownManagerImpl() { - super(); - gson = GsonHelper.getGson(); - } - - @Override - public boolean isShutdownTriggered() { - return shutdownTriggered; - } - - @Override - public boolean isPreparingForShutdown() { - return preparingForShutdown; - } - - @Override - public long countPendingJobs(Long... msIds) { - return jobManager.countPendingNonPseudoJobs(msIds); - } - - @Override - public void triggerShutdown() { - if (this.shutdownTriggered) { - throw new CloudRuntimeException("A shutdown has already been triggered"); - } - this.shutdownTriggered = true; - prepareForShutdown(true); - } - - private void prepareForShutdown(boolean postTrigger) { - // Ensure we don't throw an error if triggering a shutdown after just preparing for it - if (!postTrigger && this.preparingForShutdown) { - throw new CloudRuntimeException("A shutdown has already been triggered"); - } - this.preparingForShutdown = true; - jobManager.disableAsyncJobs(); - if (this.shutdownTask != null) { - this.shutdownTask.cancel(); - this.shutdownTask = null; - } - this.shutdownTask = new ShutdownTask(this); - long period = 30L * 1000; - long delay = period / 2; - logger.debug(String.format("Scheduling shutdown task with delay: %d and period: %d", delay, period)); - timer.scheduleAtFixedRate(shutdownTask, delay, period); - } - - @Override - public void prepareForShutdown() { - prepareForShutdown(false); - } - - @Override - public void cancelShutdown() { - if (!this.preparingForShutdown) { - throw new CloudRuntimeException("A shutdown has not been triggered"); - } - - this.preparingForShutdown = false; - this.shutdownTriggered = false; - jobManager.enableAsyncJobs(); - if (shutdownTask != null) { - shutdownTask.cancel(); - } - shutdownTask = null; - } - - @Override - public ReadyForShutdownResponse readyForShutdown(Long managementserverid) { - Long[] msIds = null; - boolean shutdownTriggeredAnywhere = false; - State[] shutdownTriggeredStates = {State.ShuttingDown, State.PreparingToShutDown, State.ReadyToShutDown}; - if (managementserverid == null) { - List msHosts = msHostDao.listBy(shutdownTriggeredStates); - if (msHosts != null && !msHosts.isEmpty()) { - msIds = new Long[msHosts.size()]; - for (int i = 0; i < msHosts.size(); i++) { - msIds[i] = msHosts.get(i).getMsid(); - } - shutdownTriggeredAnywhere = !msHosts.isEmpty(); - } - } else { - ManagementServerHostVO msHost = msHostDao.findById(managementserverid); - msIds = new Long[]{msHost.getMsid()}; - shutdownTriggeredAnywhere = Arrays.asList(shutdownTriggeredStates).contains(msHost.getState()); - } - long pendingJobCount = countPendingJobs(msIds); - return new ReadyForShutdownResponse(managementserverid, shutdownTriggeredAnywhere, pendingJobCount == 0, pendingJobCount); - } - - @Override - public ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd) { - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("PrepareForShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.PreparingToShutDown); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("TriggerShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.ShuttingDown); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd) { - ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId()); - final Command[] cmds = new Command[1]; - cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid()); - String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true); - logger.info("CancelShutdownCmd result : " + result); - if (!result.contains("Success")) { - throw new CloudRuntimeException(result); - } - - msHost.setState(State.Up); - msHostDao.persist(msHost); - - return readyForShutdown(cmd.getManagementServerId()); - } - - @Override - public List> getCommands() { - final List> cmdList = new ArrayList<>(); - cmdList.add(CancelShutdownCmd.class); - cmdList.add(PrepareForShutdownCmd.class); - cmdList.add(ReadyForShutdownCmd.class); - cmdList.add(TriggerShutdownCmd.class); - return cmdList; - } - - private final class ShutdownTask extends TimerTask { - - private ShutdownManager shutdownManager; - - public ShutdownTask(ShutdownManager shutdownManager) { - this.shutdownManager = shutdownManager; - } - - @Override - public void run() { - try { - Long totalPendingJobs = shutdownManager.countPendingJobs(ManagementServerNode.getManagementServerId()); - String msg = String.format("Checking for triggered shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d]", - shutdownManager.isShutdownTriggered(), shutdownManager.isPreparingForShutdown(), totalPendingJobs); - logger.info(msg); - - // If the shutdown has been cancelled - if (!shutdownManager.isPreparingForShutdown()) { - logger.info("Shutdown cancelled. Terminating the shutdown timer task"); - this.cancel(); - return; - } - - // No more pending jobs. Good to terminate - if (totalPendingJobs == 0) { - if (shutdownManager.isShutdownTriggered()) { - logger.info("Shutting down now"); - System.exit(0); - } - if (shutdownManager.isPreparingForShutdown()) { - logger.info("Ready to shutdown"); - ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); - msHost.setState(State.ReadyToShutDown); - msHostDao.persist(msHost); - } - } - - logger.info("Pending jobs. Trying again later"); - } catch (final Exception e) { - logger.error("Error trying to run shutdown task", e); - } - } - } -} diff --git a/server/src/main/java/com/cloud/api/ApiDispatcher.java b/server/src/main/java/com/cloud/api/ApiDispatcher.java index d8eb26ea0a7..6a43ff10f31 100644 --- a/server/src/main/java/com/cloud/api/ApiDispatcher.java +++ b/server/src/main/java/com/cloud/api/ApiDispatcher.java @@ -94,7 +94,7 @@ public class ApiDispatcher { if (asyncJobManager.isAsyncJobsEnabled()) { asyncCreationDispatchChain.dispatch(new DispatchTask(cmd, params)); } else { - throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs"); + throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs"); } } diff --git a/server/src/main/java/com/cloud/api/ApiServer.java b/server/src/main/java/com/cloud/api/ApiServer.java index 824d60eec81..6016b24502e 100644 --- a/server/src/main/java/com/cloud/api/ApiServer.java +++ b/server/src/main/java/com/cloud/api/ApiServer.java @@ -57,6 +57,8 @@ import javax.naming.ConfigurationException; import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpSession; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.user.Account; import com.cloud.user.AccountManager; import com.cloud.user.AccountManagerImpl; @@ -113,6 +115,7 @@ import org.apache.cloudstack.framework.messagebus.MessageDispatcher; import org.apache.cloudstack.framework.messagebus.MessageHandler; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.user.UserPasswordResetManager; +import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang3.EnumUtils; import org.apache.http.ConnectionClosedException; @@ -222,6 +225,8 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer @Inject private ProjectDao projectDao; @Inject + private ManagementServerHostDao msHostDao; + @Inject private UUIDManager uuidMgr; @Inject private UserPasswordResetManager userPasswordResetManager; @@ -471,7 +476,6 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer s_apiNameCmdClassMap.put(apiName, apiCmdList); } apiCmdList.add(cmdClass); - } setEncodeApiResponse(EncodeApiResponse.value()); @@ -1172,6 +1176,9 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer if (ApiConstants.ISSUER_FOR_2FA.equalsIgnoreCase(attrName)) { response.setIssuerFor2FA(attrObj.toString()); } + if (ApiConstants.MANAGEMENT_SERVER_ID.equalsIgnoreCase(attrName)) { + response.setManagementServerId(attrObj.toString()); + } } } response.setResponseName("loginresponse"); @@ -1249,6 +1256,13 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer session.setAttribute(ApiConstants.PROVIDER_FOR_2FA, userAcct.getUser2faProvider()); session.setAttribute(ApiConstants.ISSUER_FOR_2FA, issuerFor2FA); + if (accountMgr.isRootAdmin(userAcct.getAccountId())) { + ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId()); + if (msHost != null && msHost.getUuid() != null) { + session.setAttribute(ApiConstants.MANAGEMENT_SERVER_ID, msHost.getUuid()); + } + } + // (bug 5483) generate a session key that the user must submit on every request to prevent CSRF, add that // to the login response so that session-based authenticators know to send the key back final SecureRandom sesssionKeyRandom = new SecureRandom(); diff --git a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java index 631cdc5b403..3964cb0b1cf 100644 --- a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java +++ b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java @@ -143,6 +143,7 @@ import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; import org.apache.cloudstack.engine.subsystem.api.storage.TemplateState; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.jobs.AsyncJobManager; import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO; import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementVO; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; @@ -613,6 +614,8 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q @Inject private ManagementServerHostPeerJoinDao mshostPeerJoinDao; + @Inject + private AsyncJobManager jobManager; private SearchCriteria getMinimumCpuServiceOfferingJoinSearchCriteria(int cpu) { SearchCriteria sc = _srvOfferingJoinDao.createSearchCriteria(); @@ -2353,6 +2356,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q Long startIndex = cmd.getStartIndex(); Long pageSize = cmd.getPageSizeVal(); Hypervisor.HypervisorType hypervisorType = cmd.getHypervisor(); + Long msId = cmd.getManagementServerId(); Filter searchFilter = new Filter(HostVO.class, "id", Boolean.TRUE, startIndex, pageSize); @@ -2368,6 +2372,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q hostSearchBuilder.and("clusterId", hostSearchBuilder.entity().getClusterId(), SearchCriteria.Op.EQ); hostSearchBuilder.and("resourceState", hostSearchBuilder.entity().getResourceState(), SearchCriteria.Op.EQ); hostSearchBuilder.and("hypervisor_type", hostSearchBuilder.entity().getHypervisorType(), SearchCriteria.Op.EQ); + hostSearchBuilder.and("mgmt_server_id", hostSearchBuilder.entity().getManagementServerId(), SearchCriteria.Op.EQ); if (keyword != null) { hostSearchBuilder.and().op("keywordName", hostSearchBuilder.entity().getName(), SearchCriteria.Op.LIKE); @@ -2448,6 +2453,13 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q sc.setParameters("hypervisor_type", hypervisorType); } + if (msId != null) { + ManagementServerHostVO msHost = msHostDao.findById(msId); + if (msHost != null) { + sc.setParameters("mgmt_server_id", msHost.getMsid()); + } + } + Pair, Integer> uniqueHostPair = hostDao.searchAndCount(sc, searchFilter); Integer count = uniqueHostPair.second(); List hostIds = uniqueHostPair.first().stream().map(HostVO::getId).collect(Collectors.toList()); @@ -5426,6 +5438,8 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer)); } } + mgmtResponse.setAgentsCount((long) hostDao.countByMs(mgmt.getMsid())); + mgmtResponse.setPendingJobsCount(jobManager.countPendingNonPseudoJobs(mgmt.getMsid())); mgmtResponse.setObjectName("managementserver"); return mgmtResponse; } diff --git a/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java index 319e08deb39..08b896edb17 100644 --- a/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/AsyncJobJoinDaoImpl.java @@ -20,6 +20,8 @@ import java.util.Date; import java.util.List; +import javax.inject.Inject; + import org.springframework.stereotype.Component; import org.apache.cloudstack.api.ResponseObject; @@ -29,6 +31,8 @@ import org.apache.cloudstack.framework.jobs.AsyncJob; import com.cloud.api.ApiSerializerHelper; import com.cloud.api.SerializationContext; import com.cloud.api.query.vo.AsyncJobJoinVO; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; @@ -36,6 +40,9 @@ import com.cloud.utils.db.SearchCriteria; @Component public class AsyncJobJoinDaoImpl extends GenericDaoBase implements AsyncJobJoinDao { + @Inject + private ManagementServerHostDao managementServerHostDao; + private final SearchBuilder jobIdSearch; protected AsyncJobJoinDaoImpl() { @@ -63,7 +70,13 @@ public class AsyncJobJoinDaoImpl extends GenericDaoBase im jobResponse.setJobId(job.getUuid()); jobResponse.setJobStatus(job.getStatus()); jobResponse.setJobProcStatus(job.getProcessStatus()); - jobResponse.setMsid(job.getExecutingMsid()); + if (job.getExecutingMsid() != null) { + ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(job.getExecutingMsid()); + if (managementServer != null) { + jobResponse.setManagementServerId(managementServer.getUuid()); + jobResponse.setManagementServerName(managementServer.getName()); + } + } if (job.getInstanceType() != null && job.getInstanceId() != null) { jobResponse.setJobInstanceType(job.getInstanceType().toString()); diff --git a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java index 42966435d4a..feee12dcb20 100644 --- a/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/main/java/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -58,6 +58,8 @@ import com.cloud.storage.StorageStats; import com.cloud.utils.db.GenericDaoBase; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.dao.VMInstanceDao; @Component public class HostJoinDaoImpl extends GenericDaoBase implements HostJoinDao { @@ -73,6 +75,8 @@ public class HostJoinDaoImpl extends GenericDaoBase implements @Inject private ManagementServerHostDao managementServerHostDao; @Inject + private VMInstanceDao virtualMachineDao; + @Inject private AnnotationDao annotationDao; @Inject private AccountManager accountManager; @@ -126,12 +130,19 @@ public class HostJoinDaoImpl extends GenericDaoBase implements hostResponse.setHypervisor(hypervisorType); } hostResponse.setHostType(host.getType()); + if (host.getType().equals(Host.Type.ConsoleProxy) || host.getType().equals(Host.Type.SecondaryStorageVM)) { + VMInstanceVO vm = virtualMachineDao.findVMByInstanceNameIncludingRemoved(host.getName()); + if (vm != null) { + hostResponse.setVirtualMachineId(vm.getUuid()); + } + } hostResponse.setLastPinged(new Date(host.getLastPinged())); Long mshostId = host.getManagementServerId(); if (mshostId != null) { ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(host.getManagementServerId()); if (managementServer != null) { hostResponse.setManagementServerId(managementServer.getUuid()); + hostResponse.setManagementServerName(managementServer.getName()); } } hostResponse.setName(host.getName()); diff --git a/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java b/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java index 06ccc1a63f7..373eb80349f 100644 --- a/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java +++ b/server/src/main/java/com/cloud/network/SshKeysDistriMonitor.java @@ -85,21 +85,23 @@ public class SshKeysDistriMonitor implements Listener { @Override public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException { - if (cmd instanceof StartupRoutingCommand) { - if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer || + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) { - /*TODO: Get the private/public keys here*/ + /*TODO: Get the private/public keys here*/ - String pubKey = _configDao.getValue("ssh.publickey"); - String prvKey = _configDao.getValue("ssh.privatekey"); + String pubKey = _configDao.getValue("ssh.publickey"); + String prvKey = _configDao.getValue("ssh.privatekey"); - try { - ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey); - Commands c = new Commands(cmds); - _agentMgr.send(host.getId(), c, this); - } catch (AgentUnavailableException e) { - logger.debug("Failed to send keys to agent: {}", host); - } + try { + ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey); + Commands c = new Commands(cmds); + _agentMgr.send(host.getId(), c, this); + } catch (AgentUnavailableException e) { + logger.debug("Failed to send keys to agent: {}", host); } } } diff --git a/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java b/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java index 067f2fbdbb2..0c37336c09b 100644 --- a/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java +++ b/server/src/main/java/com/cloud/network/security/SecurityGroupListener.java @@ -164,22 +164,23 @@ public class SecurityGroupListener implements Listener { if (logger.isInfoEnabled()) logger.info("Received a host startup notification"); - if (cmd instanceof StartupRoutingCommand) { - //if (Boolean.toString(true).equals(host.getDetail("can_bridge_firewall"))) { - try { - int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2); - CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval); - Commands c = new Commands(cleanupCmd); - _agentMgr.send(host.getId(), c, this); - if (logger.isInfoEnabled()) - logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval()); - } catch (AgentUnavailableException e) { - //usually hypervisors that do not understand sec group rules. - logger.debug("Unable to schedule network rules cleanup for host {}", host, e); - } - if (_workTracker != null) { - _workTracker.processConnect(host.getId()); - } + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + try { + int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2); + CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval); + Commands c = new Commands(cleanupCmd); + _agentMgr.send(host.getId(), c, this); + if (logger.isInfoEnabled()) + logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval()); + } catch (AgentUnavailableException e) { + //usually hypervisors that do not understand sec group rules. + logger.debug("Unable to schedule network rules cleanup for host {}", host, e); + } + if (_workTracker != null) { + _workTracker.processConnect(host.getId()); } } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 1349e03f205..3cb01ba058c 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -47,9 +47,9 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; @@ -1284,7 +1284,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } @Override - public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { + public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) { final Long hostId = cmd.getId(); // verify input parameters @@ -1501,7 +1501,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } @Override - public Host maintain(final PrepareForMaintenanceCmd cmd) { + public Host maintain(final PrepareForHostMaintenanceCmd cmd) { final Long hostId = cmd.getId(); final HostVO host = _hostDao.findById(hostId); @@ -2508,13 +2508,17 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } private Host createHostAndAgent(final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { + return createHostAndAgent(resource, details, old, hostTags, forRebalance, false); + } + + private Host createHostAndAgent(final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance, final boolean isTransferredConnection) { HostVO host = null; StartupCommand[] cmds = null; boolean hostExists = false; boolean created = false; try { - cmds = resource.initialize(); + cmds = resource.initialize(isTransferredConnection); if (cmds == null) { logger.info("Unable to fully initialize the agent because no StartupCommands are returned"); return null; @@ -2685,7 +2689,12 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, @Override public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { - final Host host = createHostAndAgent(resource, details, old, hostTags, forRebalance); + return createHostAndAgent(hostId, resource, details, old, hostTags, forRebalance, false); + } + + @Override + public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance, boolean isTransferredConnection) { + final Host host = createHostAndAgent(resource, details, old, hostTags, forRebalance, isTransferredConnection); return host; } diff --git a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java index 72c28953021..b0f11e4fcba 100644 --- a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java @@ -32,7 +32,7 @@ import javax.naming.ConfigurationException; import org.apache.cloudstack.affinity.AffinityGroupProcessor; import org.apache.cloudstack.api.ApiCommandResourceType; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.framework.config.ConfigKey; @@ -405,7 +405,7 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin */ private void putHostIntoMaintenance(Host host) throws InterruptedException, AgentUnavailableException { logger.debug(String.format("Trying to set %s into maintenance", host)); - PrepareForMaintenanceCmd cmd = new PrepareForMaintenanceCmd(); + PrepareForHostMaintenanceCmd cmd = new PrepareForHostMaintenanceCmd(); cmd.setId(host.getId()); resourceManager.maintain(cmd); waitForHostInMaintenance(host.getId()); diff --git a/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java b/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java index 172ab1e83eb..c23e8ed2c9d 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java +++ b/server/src/main/java/com/cloud/server/ManagementServerHostStatsEntry.java @@ -19,6 +19,7 @@ package com.cloud.server; import java.util.Date; +import java.util.List; public class ManagementServerHostStatsEntry implements ManagementServerHostStats { @@ -45,6 +46,8 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats private String jvmVendor; private String jvmVersion; private String osDistribution; + private List lastAgents; + private List agents; private int agentCount; private long heapMemoryUsed; @@ -199,6 +202,16 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats return osDistribution; } + @Override + public List getLastAgents() { + return lastAgents; + } + + @Override + public List getAgents() { + return agents; + } + @Override public int getAgentCount() { return agentCount; @@ -290,6 +303,14 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats this.osDistribution = osDistribution; } + public void setLastAgents(List lastAgents) { + this.lastAgents = lastAgents; + } + + public void setAgents(List agents) { + this.agents = agents; + } + public void setAgentCount(int agentCount) { this.agentCount = agentCount; } diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index 76d2943e18c..790e4bbbd38 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -101,13 +101,13 @@ import org.apache.cloudstack.api.command.admin.guest.UpdateGuestOsMappingCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.DeleteHostCmd; import org.apache.cloudstack.api.command.admin.host.FindHostsForMigrationCmd; import org.apache.cloudstack.api.command.admin.host.ListHostTagsCmd; import org.apache.cloudstack.api.command.admin.host.ListHostsCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.ReleaseHostReservationCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; @@ -3508,14 +3508,14 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe cmdList.add(MoveDomainCmd.class); cmdList.add(AddHostCmd.class); cmdList.add(AddSecondaryStorageCmd.class); - cmdList.add(CancelMaintenanceCmd.class); + cmdList.add(CancelHostMaintenanceCmd.class); cmdList.add(CancelHostAsDegradedCmd.class); cmdList.add(DeclareHostAsDegradedCmd.class); cmdList.add(DeleteHostCmd.class); cmdList.add(ListHostsCmd.class); cmdList.add(ListHostTagsCmd.class); cmdList.add(FindHostsForMigrationCmd.class); - cmdList.add(PrepareForMaintenanceCmd.class); + cmdList.add(PrepareForHostMaintenanceCmd.class); cmdList.add(ReconnectHostCmd.class); cmdList.add(UpdateHostCmd.class); cmdList.add(UpdateHostPasswordCmd.class); diff --git a/server/src/main/java/com/cloud/server/StatsCollector.java b/server/src/main/java/com/cloud/server/StatsCollector.java index 2bdc008ca1a..c70b36b4091 100644 --- a/server/src/main/java/com/cloud/server/StatsCollector.java +++ b/server/src/main/java/com/cloud/server/StatsCollector.java @@ -829,6 +829,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc } private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) { + newEntry.setLastAgents(_agentMgr.getLastAgents()); + List agents = _hostDao.listByMs(msid); + newEntry.setAgents(agents); int count = _hostDao.countByMs(msid); newEntry.setAgentCount(count); } diff --git a/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java b/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java index a0e10c646b5..6f484870e72 100644 --- a/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java +++ b/server/src/main/java/com/cloud/storage/listener/StoragePoolMonitor.java @@ -95,49 +95,51 @@ public class StoragePoolMonitor implements Listener { @Override public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException { - if (cmd instanceof StartupRoutingCommand) { - StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd; - if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM || + if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) { + return; + } + + StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd; + if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM || scCmd.getHypervisorType() == HypervisorType.VMware || scCmd.getHypervisorType() == HypervisorType.Simulator || scCmd.getHypervisorType() == HypervisorType.Ovm || scCmd.getHypervisorType() == HypervisorType.Hyperv || scCmd.getHypervisorType() == HypervisorType.LXC || scCmd.getHypervisorType() == HypervisorType.Ovm3) { - List pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER); - List zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false); - List zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType()); - zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor); - pools.addAll(zoneStoragePoolsByTags); - List zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any); - pools.addAll(zoneStoragePoolsByAnyHypervisor); + List pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER); + List zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false); + List zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType()); + zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor); + pools.addAll(zoneStoragePoolsByTags); + List zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any); + pools.addAll(zoneStoragePoolsByAnyHypervisor); - // get the zone wide disabled pools list if global setting is true. - if (StorageManager.MountDisabledStoragePool.value()) { - pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE)); + // get the zone wide disabled pools list if global setting is true. + if (StorageManager.MountDisabledStoragePool.value()) { + pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE)); + } + + // get the cluster wide disabled pool list + if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) { + pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER)); + } + + for (StoragePoolVO pool : pools) { + if (!pool.isShared()) { + continue; } - // get the cluster wide disabled pool list - if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) { - pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER)); + if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) { + throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool)); } - for (StoragePoolVO pool : pools) { - if (!pool.isShared()) { - continue; - } - - if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) { - throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool)); - } - - Long hostId = host.getId(); - if (logger.isDebugEnabled()) { - logger.debug("Host {} connected, connecting host to shared pool {} and sending storage pool information ...", host, pool); - } - try { - _storageManager.connectHostToSharedPool(host, pool.getId()); - _storageManager.createCapacityEntry(pool.getId()); - } catch (Exception e) { - throw new ConnectionException(true, String.format("Unable to connect host %s to storage pool %s due to %s", host, pool, e.toString()), e); - } + Long hostId = host.getId(); + if (logger.isDebugEnabled()) { + logger.debug("Host {} connected, connecting host to shared pool {} and sending storage pool information ...", host, pool); + } + try { + _storageManager.connectHostToSharedPool(host, pool.getId()); + _storageManager.createCapacityEntry(pool.getId()); + } catch (Exception e) { + throw new ConnectionException(true, String.format("Unable to connect host %s to storage pool %s due to %s", host, pool, e.toString()), e); } } } diff --git a/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java b/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java index 97e503974cf..027a0530383 100644 --- a/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java +++ b/server/src/main/java/org/apache/cloudstack/agent/lb/IndirectAgentLBServiceImpl.java @@ -37,6 +37,11 @@ import org.apache.cloudstack.framework.config.Configurable; import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; +import com.cloud.agent.api.MigrateAgentConnectionCommand; +import com.cloud.cluster.ManagementServerHostVO; +import com.cloud.cluster.dao.ManagementServerHostDao; +import com.cloud.dc.DataCenterVO; +import com.cloud.dc.dao.DataCenterDao; import com.cloud.host.Host; import com.cloud.host.HostVO; import com.cloud.host.dao.HostDao; @@ -44,6 +49,8 @@ import com.cloud.hypervisor.Hypervisor; import com.cloud.resource.ResourceState; import com.cloud.utils.component.ComponentLifecycleBase; import com.cloud.utils.exception.CloudRuntimeException; + +import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implements IndirectAgentLB, Configurable { @@ -63,14 +70,35 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement @Inject private HostDao hostDao; @Inject + private DataCenterDao dcDao; + @Inject + private ManagementServerHostDao mshostDao; + @Inject private AgentManager agentManager; ////////////////////////////////////////////////////// /////////////// Agent MSLB Methods /////////////////// ////////////////////////////////////////////////////// + @Override + public List getManagementServerList() { + final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value(); + if (StringUtils.isEmpty(msServerAddresses)) { + throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting", + ApiServiceConfiguration.ManagementServerAddresses.key())); + } + + List msList = new ArrayList<>(Arrays.asList(msServerAddresses.replace(" ", "").split(","))); + return msList; + } + @Override public List getManagementServerList(final Long hostId, final Long dcId, final List orderedHostIdList) { + return getManagementServerList(hostId, dcId, orderedHostIdList, null); + } + + @Override + public List getManagementServerList(final Long hostId, final Long dcId, final List orderedHostIdList, String lbAlgorithm) { final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value(); if (StringUtils.isEmpty(msServerAddresses)) { throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting", @@ -90,7 +118,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement hostIdList.add(hostId); } - final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(); + final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm); final List msList = Arrays.asList(msServerAddresses.replace(" ", "").split(",")); return algorithm.sort(msList, hostIdList, hostId); } @@ -146,6 +174,30 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement return agentBasedHosts; } + private List getAllAgentBasedHosts(long msId) { + final List allHosts = hostDao.listHostsByMs(msId); + if (allHosts == null) { + return new ArrayList<>(); + } + final List agentBasedHosts = new ArrayList<>(); + for (final Host host : allHosts) { + conditionallyAddHost(agentBasedHosts, host); + } + return agentBasedHosts; + } + + private List getAllAgentBasedHostsInDc(long msId, long dcId) { + final List allHosts = hostDao.listHostsByMsAndDc(msId, dcId); + if (allHosts == null) { + return new ArrayList<>(); + } + final List agentBasedHosts = new ArrayList<>(); + for (final Host host : allHosts) { + conditionallyAddHost(agentBasedHosts, host); + } + return agentBasedHosts; + } + private void conditionallyAddHost(List agentBasedHosts, Host host) { if (host == null) { if (logger.isTraceEnabled()) { @@ -191,13 +243,33 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement agentBasedHosts.add(host); } + @Override + public boolean haveAgentBasedHosts(long msId) { + return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId)); + } + private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() { - final String algorithm = getLBAlgorithmName(); - if (algorithmMap.containsKey(algorithm)) { - return algorithmMap.get(algorithm); + return getAgentMSLBAlgorithm(null); + } + + private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm(String lbAlgorithm) { + boolean algorithmNameFromConfig = false; + if (StringUtils.isEmpty(lbAlgorithm)) { + lbAlgorithm = getLBAlgorithmName(); + algorithmNameFromConfig = true; + } + if (algorithmMap.containsKey(lbAlgorithm)) { + return algorithmMap.get(lbAlgorithm); + } + throw new CloudRuntimeException(String.format("Algorithm %s%s not found, valid values are: %s", + lbAlgorithm, algorithmNameFromConfig? " configured for '" + IndirectAgentLBAlgorithm.key() + "'" : "", algorithmMap.keySet())); + } + + @Override + public void checkLBAlgorithmName(String lbAlgorithm) { + if (!algorithmMap.containsKey(lbAlgorithm)) { + throw new CloudRuntimeException(String.format("Invalid algorithm %s, valid values are: %s", lbAlgorithm, algorithmMap.keySet())); } - throw new CloudRuntimeException(String.format("Algorithm configured for '%s' not found, valid values are: %s", - IndirectAgentLBAlgorithm.key(), algorithmMap.keySet())); } //////////////////////////////////////////////////////////// @@ -224,6 +296,73 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement } } + @Override + public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs) { + if (timeoutDurationInMs <= 0) { + logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid)); + return false; + } + + logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid)); + long migrationStartTime = System.currentTimeMillis(); + if (!haveAgentBasedHosts(fromMsId)) { + logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid)); + return true; + } + + boolean lbAlgorithmChanged = false; + if (StringUtils.isNotBlank(lbAlgorithm) && !lbAlgorithm.equalsIgnoreCase(getLBAlgorithmName())) { + logger.debug(String.format("Indirect agent lb algorithm changed to %s", lbAlgorithm)); + lbAlgorithmChanged = true; + } + + final List avoidMsList = mshostDao.listNonUpStateMsIPs(); + ManagementServerHostVO ms = mshostDao.findByMsid(fromMsId); + if (ms != null && !avoidMsList.contains(ms.getServiceIP())) { + avoidMsList.add(ms.getServiceIP()); + } + + List dataCenterList = dcDao.listAll(); + for (DataCenterVO dc : dataCenterList) { + Long dcId = dc.getId(); + List orderedHostIdList = getOrderedHostIdList(dcId); + List agentBasedHostsOfMsInDc = getAllAgentBasedHostsInDc(fromMsId, dcId); + if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDc)) { + continue; + } + logger.debug(String.format("Migrating %d indirect agents from management server node %d (id: %s) of zone %s", agentBasedHostsOfMsInDc.size(), fromMsId, fromMsUuid, dc.toString())); + for (final Host host : agentBasedHostsOfMsInDc) { + long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTime; + if (migrationElapsedTimeInMs >= timeoutDurationInMs) { + logger.debug(String.format("Stop migrating remaining indirect agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid)); + return false; + } + + List msList = null; + Long lbCheckInterval = 0L; + if (lbAlgorithmChanged) { + // send new MS list when there is change in lb algorithm + msList = getManagementServerList(host.getId(), dcId, orderedHostIdList, lbAlgorithm); + lbCheckInterval = getLBPreferredHostCheckInterval(host.getClusterId()); + } + + final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval); + agentManager.easySend(host.getId(), cmd); //answer not received as the agent disconnects and reconnects to other ms + updateLastManagementServer(host.getId(), fromMsId); + } + } + + return true; + } + + private void updateLastManagementServer(long hostId, long msId) { + HostVO hostVO = hostDao.findById(hostId); + if (hostVO != null) { + hostVO.setLastManagementServerId(msId); + hostDao.update(hostId, hostVO); + } + } + private void configureAlgorithmMap() { final List algorithms = new ArrayList<>(); algorithms.add(new IndirectAgentLBStaticAlgorithm()); diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 68abe7a16f1..60c2095d5f4 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -268,8 +268,8 @@ - - + + diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index e8b297ff188..9d3a4fbee45 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -46,9 +46,9 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; -import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; @@ -79,7 +79,7 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana * @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd) */ @Override - public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { + public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) { // TODO Auto-generated method stub return null; } @@ -142,7 +142,7 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana * @see com.cloud.resource.ResourceService#maintain(com.cloud.api.commands.PrepareForMaintenanceCmd) */ @Override - public Host maintain(final PrepareForMaintenanceCmd cmd) { + public Host maintain(final PrepareForHostMaintenanceCmd cmd) { // TODO Auto-generated method stub return null; } @@ -250,6 +250,15 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana return null; } + /* (non-Javadoc) + * @see com.cloud.resource.ResourceManager#createHostAndAgent(java.lang.Long, com.cloud.resource.ServerResource, java.util.Map, boolean, java.util.List, boolean, boolean) + */ + @Override + public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance, boolean isTransferredConnection) { + // TODO Auto-generated method stub + return null; + } + /* (non-Javadoc) * @see com.cloud.resource.ResourceManager#addHost(long, com.cloud.resource.ServerResource, com.cloud.host.Host.Type, java.util.Map) */ diff --git a/server/src/test/resources/createNetworkOffering.xml b/server/src/test/resources/createNetworkOffering.xml index 99418467e0a..a3f43407c61 100644 --- a/server/src/test/resources/createNetworkOffering.xml +++ b/server/src/test/resources/createNetworkOffering.xml @@ -1,79 +1,80 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/integration/smoke/test_safe_shutdown.py b/test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py similarity index 58% rename from test/integration/smoke/test_safe_shutdown.py rename to test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py index d757bb6d068..4eff33122fe 100644 --- a/test/integration/smoke/test_safe_shutdown.py +++ b/test/integration/smoke/test_ms_maintenance_and_safe_shutdown.py @@ -22,19 +22,19 @@ from marvin.lib.utils import * from marvin.lib.base import * from marvin.lib.common import * -class TestSafeShutdown(cloudstackTestCase): +class TestMSMaintenanceAndSafeShutdown(cloudstackTestCase): """ - Tests safely shutting down the Management Server + Tests MS maintenance and safe shutting down the Management Server """ def setUp(self): self.apiclient = self.testClient.getApiClient() + self.hypervisor = self.testClient.getHypervisorInfo() self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ self.cleanup = [] def tearDown(self): - self.startServer() - super(TestSafeShutdown, self).tearDown() + super(TestMSMaintenanceAndSafeShutdown, self).tearDown() def isServerShutdown(self): sshClient = SshClient( @@ -87,8 +87,61 @@ class TestSafeShutdown(cloudstackTestCase): {"name": "test", "displaytext": "test"} ) + def getActiveManagementServers(self): + cmd = listManagementServers.listManagementServersCmd() + servers = self.apiclient.listManagementServers(cmd) + active_servers = [] + for idx, server in enumerate(servers): + if server.state == 'Up': + active_servers.append(server.serviceip) + return active_servers + @attr(tags=["advanced", "smoke"]) - def test_01_prepare_and_cancel_shutdown(self): + def test_01_prepare_and_cancel_maintenance(self): + active_management_servers = self.getActiveManagementServers() + if len(active_management_servers) <= 1: + self.skipTest("Skipping test case, this test is intended for only multiple management servers") + + hypervisor = self.hypervisor.lower() + if hypervisor == 'kvm': + list_configurations_cmd = listConfigurations.listConfigurationsCmd() + list_configurations_cmd.name = "host" + list_configurations_response = self.apiclient.listConfigurations(list_configurations_cmd) + self.assertNotEqual(len(list_configurations_response), 0, + "Check if the list configurations API returns a non-empty response") + + for item in list_configurations_response: + if item.name == list_configurations_cmd.name: + host_config = item + + hosts = host_config.value.split(",") + if len(hosts) <= 1: + self.skipTest( + "Skipping test case, this test is intended for only multiple management server hosts configured on host setting for kvm") + + try : + prepare_for_maintenance_cmd = prepareForMaintenance.prepareForMaintenanceCmd() + prepare_for_maintenance_cmd.managementserverid = 1 + response = self.apiclient.prepareForMaintenance(prepare_for_maintenance_cmd) + self.assertEqual( + response.maintenanceinitiated, + True, + "Failed to prepare for maintenance" + ) + try : + self.run_async_cmd() + except Exception as e: + self.debug("Prepare for maintenance check successful, API failure: %s" % e) + finally : + cancel_maintenance_cmd = cancelMaintenance.cancelMaintenanceCmd() + cancel_maintenance_cmd.managementserverid = 1 + self.apiclient.cancelMaintenance(cancel_maintenance_cmd) + ## Just to be sure, run another async command + project = self.run_async_cmd() + self.cleanup.append(project) + + @attr(tags=["advanced", "smoke"]) + def test_02_prepare_and_cancel_shutdown(self): try : prepare_for_shutdown_cmd = prepareForShutdown.prepareForShutdownCmd() prepare_for_shutdown_cmd.managementserverid = 1 @@ -111,7 +164,7 @@ class TestSafeShutdown(cloudstackTestCase): self.cleanup.append(project) @attr(tags=["advanced", "smoke"]) - def test_02_trigger_shutdown(self): + def test_03_trigger_shutdown(self): try : cmd = triggerShutdown.triggerShutdownCmd() cmd.managementserverid = 1 diff --git a/tools/apidoc/gen_toc.py b/tools/apidoc/gen_toc.py index 8d28749a637..c05b8fe2798 100644 --- a/tools/apidoc/gen_toc.py +++ b/tools/apidoc/gen_toc.py @@ -233,7 +233,8 @@ known_categories = { 'listQuarantinedIp': 'IP Quarantine', 'updateQuarantinedIp': 'IP Quarantine', 'removeQuarantinedIp': 'IP Quarantine', - 'Shutdown': 'Management', + 'Shutdown': 'Maintenance', + 'Maintenance': 'Maintenance', 'addObjectStoragePool': 'Object Store', 'listObjectStoragePools': 'Object Store', 'deleteObjectStoragePool': 'Object Store', diff --git a/ui/public/locales/en.json b/ui/public/locales/en.json index 820f08b8cb4..e07962d63d0 100644 --- a/ui/public/locales/en.json +++ b/ui/public/locales/en.json @@ -349,6 +349,7 @@ "label.agent.username": "Agent username", "label.agentport": "Agent port", "label.agentstate": "Agent state", +"label.agentscount": "Number Of connected agents", "label.agree": "Agree", "label.alert": "Alert", "label.alert.details": "Alert details", @@ -470,7 +471,7 @@ "label.cachemode": "Write-cache type", "label.cancel": "Cancel", "label.cancel.shutdown": "Cancel Shutdown", -"label.cancelmaintenance": "Cancel maintenance", +"label.cancel.maintenance": "Cancel Maintenance", "label.cancel.host.as.degraded": "Cancel host as degraded", "label.capacity": "Capacity", "label.capacitybytes": "Capacity bytes", @@ -569,6 +570,7 @@ "label.confirmdeclineinvitation": "Are you sure you want to decline this project invitation?", "label.confirmpassword": "Confirm password", "label.confirmpassword.description": "Please type the same password again.", +"label.connected.agents": "Connected Agents", "label.connect": "Connect", "label.connectiontimeout": "Connection timeout", "label.conservemode": "Conserve mode", @@ -1385,6 +1387,7 @@ "label.management.server": "Management server", "label.management.servers": "Management servers", "label.management.server.peers": "Peers", +"label.managementservername": "Management Server", "label.managementservers": "Number of management servers", "label.matchall": "Match all", "label.max": "Max.", @@ -1684,6 +1687,7 @@ "label.peerstate": "Peer State", "label.peerstate.lastupdated": "Peer State Updated Time", "label.pending.jobs": "Pending Jobs", +"label.pendingjobscount": "Number Of pending jobs", "label.per.account": "Per Account", "label.per.zone": "Per zone", "label.percentage": "Percentage", @@ -1723,7 +1727,7 @@ "label.prefix": "Prefix", "label.prefix.type": "Prefix type", "label.prepare.for.shutdown": "Prepare for Shutdown", -"label.prepareformaintenance": "Prepare for Maintenance", +"label.prepare.for.maintenance": "Prepare for Maintenance", "label.presetup": "PreSetup", "label.prev": "Prev", "label.previous": "Previous", @@ -2052,6 +2056,7 @@ "label.sequence": "Sequence", "label.server": "Server", "label.server.certificate": "Server certificate", +"label.serviceip": "Service IP", "label.service.connectivity.distributedroutercapabilitycheckbox": "Distributed router", "label.service.connectivity.regionlevelvpccapabilitycheckbox": "Region level VPC", "label.service.group": "Service group", @@ -2825,7 +2830,8 @@ "message.backup.create": "Are you sure you want create an Instance backup?", "message.backup.offering.remove": "Are you sure you want to remove Instance from backup offering and delete the backup chain?", "message.backup.restore": "Please confirm that you want to restore the Instance backup?", -"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management server. It will resume accepting any new Async Jobs.", +"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management Server. It will resume accepting any new Async Jobs.", +"message.cancel.maintenance": "Please confirm that you would like to cancel the maintenance on this Management Server. It will resume accepting any new Async Jobs.", "message.certificate.upload.processing": "Certificate upload in progress", "message.change.disk.offering.sharedfs.failed": "Failed to change disk offering for the Shared FileSystem.", "message.change.disk.offering.sharedfs.processing": "Changing disk offering for the Shared FileSystem.", @@ -3341,7 +3347,8 @@ "message.please.wait.while.zone.is.being.created": "Please wait while your zone is being created; this may take a while...", "message.pod.dedicated": "Pod dedicated.", "message.pod.dedication.released": "Pod dedication released.", -"message.prepare.for.shutdown": "Please confirm that you would like to prep this Management server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.", +"message.prepare.for.shutdown": "Please confirm that you would like to prepare this Management Server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.", +"message.prepare.for.maintenance": "Please confirm that you would like to prepare this Management Server for maintenance. It will not accept any new Async Jobs.", "message.primary.storage.invalid.state": "Primary storage is not in Up state", "message.processing.complete": "Processing complete!", "message.protocol.description": "For XenServer, choose NFS, iSCSI, or PreSetup. For KVM, choose NFS, SharedMountPoint, RDB, CLVM or Gluster. For vSphere, choose NFS, PreSetup (VMFS or iSCSI or FiberChannel or vSAN or vVols) or DatastoreCluster. For Hyper-V, choose SMB/CIFS. For LXC, choose NFS or SharedMountPoint. For OVM, choose NFS or OCFS2.", @@ -3431,7 +3438,8 @@ "message.setup.physical.network.during.zone.creation.basic": "When adding a basic zone, you can set up one physical Network, which corresponds to a NIC on the hypervisor. The Network carries several types of traffic.

You may also add other traffic types onto the physical Network.", "message.shared.network.offering.warning": "Domain admins and regular Users can only create shared Networks from Network offering with the setting specifyvlan=false. Please contact an administrator to create a Network offering if this list is empty.", "message.shared.network.unsupported.for.nsx": "Shared networks aren't supported for NSX enabled zones", -"message.shutdown.triggered": "A shutdown has been triggered. CloudStack will not accept new jobs", +"message.shutdown.triggered": "Shutdown has been triggered. This Management Server will not accept new jobs", +"message.maintenance.initiated": "Maintenance has been initiated. This Management Server will not accept new jobs", "message.snapshot.additional.zones": "Snapshots will always be created in its native zone - %x, here you can select additional zone(s) where it will be copied to at creation time", "message.sourcenatip.change.warning": "WARNING: Changing the sourcenat IP address of the network will cause connectivity downtime for the Instances with NICs in the Network.", "message.sourcenatip.change.inhibited": "Changing the sourcenat to this IP of the Network to this address is inhibited as firewall rules are defined for it. This can include port forwarding or load balancing rules.\n - If this is an Isolated Network, please use updateNetwork/click the edit button.\n - If this is a VPC, first clear all other rules for this address.", @@ -3595,7 +3603,7 @@ "message.tooltip.reserved.system.netmask": "The Network prefix that defines the pod subnet. Uses CIDR notation.", "message.traffic.type.deleted": "Successfully deleted traffic type", "message.traffic.type.to.basic.zone": "traffic type to basic zone", -"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.", +"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management Server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.", "message.type.values.to.add": "Please add additional values by typing them in", "message.update.autoscale.policy.failed": "Failed to update autoscale policy", "message.update.autoscale.vmgroup.failed": "Failed to update autoscale group", diff --git a/ui/src/components/page/GlobalLayout.vue b/ui/src/components/page/GlobalLayout.vue index 6dd5c530fa5..2002ca3bfc8 100644 --- a/ui/src/components/page/GlobalLayout.vue +++ b/ui/src/components/page/GlobalLayout.vue @@ -17,11 +17,14 @@