Support for Management Server Maintenance Mode (#9854)

* Support for Management Server Maintenance

- New APIs: prepareForMaintenance and cancelMaintenance, with required parameter - managementserverid.

- New management server states for maintenance: PreparingForMaintenance, Maintenance.

- listHosts API with optional parameter – managementserverid, to list the hosts connected to the management server.

- Support management server maintenance when more than one active management servers available.

- Triggers transfer agents to other available management servers for maintenance, new agent command MigrateAgentConnectionCommand to initiate transfer of indirect agents.

- New global config 'management.server.maintenance.timeout', to set the timeout (in mins) for the management server maintenance window, default: 60 mins.

- UI changes: Prepare and Cancel Maintenance in Management Server section, Connected Agents tab, New fields for hosts and management servers.

* Updated pending jobs check timer task with ScheduledExecutorService

* keep maintenance state on trigger shutdown call when ms is in maintenance

* add pending jobs count to ms response

* during ms heartbeat, update state to up only when it's down

* allow vm work jobs of async job created before prepare for maintenance

* Revert "keep maintenance state on trigger shutdown call when ms is in maintenance"

This reverts commit 607e13364679eac897f4d146bb3325ea7a61ba17.

* skip maintenance test when multiple management servers are not available, and not configured in host setting for kvm
This commit is contained in:
Suresh Kumar Anaparti 2025-01-29 13:31:15 +05:30 committed by GitHub
parent 048649d351
commit 3b108b968f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
105 changed files with 2673 additions and 714 deletions

View File

@ -27,6 +27,7 @@ import java.net.UnknownHostException;
import java.nio.channels.ClosedChannelException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -40,6 +41,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import javax.naming.ConfigurationException;
import com.cloud.agent.api.MigrateAgentConnectionAnswer;
import com.cloud.agent.api.MigrateAgentConnectionCommand;
import com.cloud.resource.AgentStatusUpdater;
import com.cloud.resource.ResourceStatusUpdater;
import com.cloud.agent.api.PingAnswer;
@ -313,7 +316,6 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
}
_shell.updateConnectedHost();
scavengeOldAgentObjects();
}
public void stop(final String reason, final String detail) {
@ -477,6 +479,10 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
}
public void sendStartup(final Link link) {
sendStartup(link, false);
}
public void sendStartup(final Link link, boolean transfer) {
final StartupCommand[] startup = _resource.initialize();
if (startup != null) {
final String msHostList = _shell.getPersistentProperty(null, "host");
@ -484,6 +490,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
for (int i = 0; i < startup.length; i++) {
setupStartupCommand(startup[i]);
startup[i].setMSHostList(msHostList);
startup[i].setConnectionTransferred(transfer);
commands[i] = startup[i];
}
final Request request = new Request(_id != null ? _id : -1, -1, commands, false, false);
@ -541,9 +548,14 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
}
protected void reconnect(final Link link) {
if (!_reconnectAllowed) {
reconnect(link, null, null, false);
}
protected void reconnect(final Link link, String preferredHost, List<String> avoidHostList, boolean forTransfer) {
if (!(forTransfer || _reconnectAllowed)) {
return;
}
synchronized (this) {
if (_startup != null) {
_startup.cancel();
@ -575,22 +587,29 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
_shell.getBackoffAlgorithm().waitBeforeRetry();
}
String host = preferredHost;
if (StringUtils.isEmpty(host)) {
host = _shell.getNextHost();
}
do {
final String host = _shell.getNextHost();
_connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this);
logger.info("Reconnecting to host:{}", host);
try {
_connection.start();
} catch (final NioConnectionException e) {
logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e);
_connection.stop();
if (CollectionUtils.isEmpty(avoidHostList) || !avoidHostList.contains(host)) {
_connection = new NioClient("Agent", host, _shell.getPort(), _shell.getWorkers(), this);
logger.info("Reconnecting to host:{}", host);
try {
_connection.cleanUp();
} catch (final IOException ex) {
logger.warn("Fail to clean up old connection. {}", ex);
_connection.start();
} catch (final NioConnectionException e) {
logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e);
_connection.stop();
try {
_connection.cleanUp();
} catch (final IOException ex) {
logger.warn("Fail to clean up old connection. {}", ex);
}
}
}
_shell.getBackoffAlgorithm().waitBeforeRetry();
host = _shell.getNextHost();
} while (!_connection.isStartup());
_shell.updateConnectedHost();
logger.info("Connected to the host: {}", _shell.getConnectedHost());
@ -703,6 +722,8 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
}
} else if (cmd instanceof SetupMSListCommand) {
answer = setupManagementServerList((SetupMSListCommand) cmd);
} else if (cmd instanceof MigrateAgentConnectionCommand) {
answer = migrateAgentToOtherMS((MigrateAgentConnectionCommand) cmd);
} else {
if (cmd instanceof ReadyCommand) {
processReadyCommand(cmd);
@ -858,6 +879,53 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
return new SetupMSListAnswer(true);
}
private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) {
try {
if (CollectionUtils.isNotEmpty(cmd.getMsList())) {
processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
}
migrateAgentConnection(cmd.getAvoidMsList());
} catch (Exception e) {
String errMsg = "Migrate agent connection failed, due to " + e.getMessage();
logger.debug(errMsg, e);
return new MigrateAgentConnectionAnswer(errMsg);
}
return new MigrateAgentConnectionAnswer(true);
}
private void migrateAgentConnection(List<String> avoidMsList) {
final String[] msHosts = _shell.getHosts();
if (msHosts == null || msHosts.length < 1) {
throw new CloudRuntimeException("Management Server hosts empty, not properly configured in agent");
}
List<String> msHostsList = new ArrayList<>(Arrays.asList(msHosts));
msHostsList.removeAll(avoidMsList);
if (msHostsList.isEmpty() || StringUtils.isEmpty(msHostsList.get(0))) {
throw new CloudRuntimeException("No other Management Server hosts to migrate");
}
String preferredHost = null;
for (String msHost : msHostsList) {
try (final Socket socket = new Socket()) {
socket.connect(new InetSocketAddress(msHost, _shell.getPort()), 5000);
preferredHost = msHost;
break;
} catch (final IOException e) {
throw new CloudRuntimeException("Management server host: " + msHost + " is not reachable, to migrate connection");
}
}
if (preferredHost == null) {
throw new CloudRuntimeException("Management server host(s) are not reachable, to migrate connection");
}
logger.debug("Management server host " + preferredHost + " is found to be reachable, trying to reconnect");
_shell.resetHostCounter();
_shell.setConnectionTransfer(true);
reconnect(_link, preferredHost, avoidMsList, true);
}
public void processResponse(final Response response, final Link link) {
final Answer answer = response.getAnswer();
logger.debug("Received response: {}", response.toString());
@ -1153,7 +1221,8 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
if (task.getType() == Task.Type.CONNECT) {
_shell.getBackoffAlgorithm().reset();
setLink(task.getLink());
sendStartup(task.getLink());
sendStartup(task.getLink(), _shell.isConnectionTransfer());
_shell.setConnectionTransfer(false);
} else if (task.getType() == Task.Type.DATA) {
Request request;
try {
@ -1178,6 +1247,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
Thread.sleep(5000);
} catch (InterruptedException e) {
}
_shell.setConnectionTransfer(false);
reconnect(task.getLink());
return;
} else if (task.getType() == Task.Type.OTHER) {

View File

@ -77,6 +77,7 @@ public class AgentShell implements IAgentShell, Daemon {
private String hostToConnect;
private String connectedHost;
private Long preferredHostCheckInterval;
private boolean connectionTransfer = false;
protected AgentProperties agentProperties = new AgentProperties();
public AgentShell() {
@ -215,6 +216,14 @@ public class AgentShell implements IAgentShell, Daemon {
_storage.persist(name, value);
}
public boolean isConnectionTransfer() {
return connectionTransfer;
}
public void setConnectionTransfer(boolean connectionTransfer) {
this.connectionTransfer = connectionTransfer;
}
void loadProperties() throws ConfigurationException {
final File file = PropertiesUtil.findConfigFile("agent.properties");

View File

@ -70,4 +70,8 @@ public interface IAgentShell {
String getConnectedHost();
void launchNewAgent(ServerResource resource) throws ConfigurationException;
boolean isConnectionTransfer();
void setConnectionTransfer(boolean connectionTransfer);
}

View File

@ -177,6 +177,8 @@ public interface Host extends StateObject<Status>, Identity, Partition, HAResour
*/
Long getManagementServerId();
Long getLastManagementServerId();
/*
*@return removal date
*/

View File

@ -127,6 +127,7 @@ public enum Status {
s_fsm.addTransition(Status.Connecting, Event.HostDown, Status.Down);
s_fsm.addTransition(Status.Connecting, Event.Ping, Status.Connecting);
s_fsm.addTransition(Status.Connecting, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Connecting, Event.StartAgentRebalance, Status.Rebalancing);
s_fsm.addTransition(Status.Connecting, Event.AgentDisconnected, Status.Alert);
s_fsm.addTransition(Status.Up, Event.PingTimeout, Status.Alert);
s_fsm.addTransition(Status.Up, Event.AgentDisconnected, Status.Alert);

View File

@ -23,11 +23,11 @@ import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd;
import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd;
import org.apache.cloudstack.api.command.admin.host.AddHostCmd;
import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd;
import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd;
@ -51,7 +51,7 @@ public interface ResourceService {
Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException;
Host cancelMaintenance(CancelMaintenanceCmd cmd);
Host cancelMaintenance(CancelHostMaintenanceCmd cmd);
Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException;
@ -69,7 +69,7 @@ public interface ResourceService {
List<? extends Host> discoverHosts(AddSecondaryStorageCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException;
Host maintain(PrepareForMaintenanceCmd cmd);
Host maintain(PrepareForHostMaintenanceCmd cmd);
Host declareHostAsDegraded(DeclareHostAsDegradedCmd cmd) throws NoTransitionException;

View File

@ -19,6 +19,7 @@
package com.cloud.server;
import java.util.Date;
import java.util.List;
/**
* management server related stats
@ -70,6 +71,10 @@ public interface ManagementServerHostStats {
String getOsDistribution();
List<String> getLastAgents();
List<String> getAgents();
int getAgentCount();
long getHeapMemoryUsed();

View File

@ -1136,9 +1136,12 @@ public class ApiConstants {
public static final String LOGOUT = "logout";
public static final String LIST_IDPS = "listIdps";
public static final String READY_FOR_SHUTDOWN = "readyforshutdown";
public static final String MAINTENANCE_INITIATED = "maintenanceinitiated";
public static final String SHUTDOWN_TRIGGERED = "shutdowntriggered";
public static final String READY_FOR_SHUTDOWN = "readyforshutdown";
public static final String PENDING_JOBS_COUNT = "pendingjobscount";
public static final String AGENTS_COUNT = "agentscount";
public static final String AGENTS = "agents";
public static final String PUBLIC_MTU = "publicmtu";
public static final String PRIVATE_MTU = "privatemtu";

View File

@ -33,7 +33,7 @@ import com.cloud.user.Account;
@APICommand(name = "cancelHostMaintenance", description = "Cancels host maintenance.", responseObject = HostResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false)
public class CancelMaintenanceCmd extends BaseAsyncCmd {
public class CancelHostMaintenanceCmd extends BaseAsyncCmd {
/////////////////////////////////////////////////////

View File

@ -31,6 +31,7 @@ import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.response.ClusterResponse;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.api.response.ListResponse;
import org.apache.cloudstack.api.response.ManagementServerResponse;
import org.apache.cloudstack.api.response.PodResponse;
import org.apache.cloudstack.api.response.UserVmResponse;
import org.apache.cloudstack.api.response.ZoneResponse;
@ -105,6 +106,9 @@ public class ListHostsCmd extends BaseListCmd {
@Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, description = "hypervisor type of host: XenServer,KVM,VMware,Hyperv,BareMetal,Simulator")
private String hypervisor;
@Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", since="4.21.0")
private Long managementServerId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
@ -189,6 +193,10 @@ public class ListHostsCmd extends BaseListCmd {
return outOfBandManagementPowerState;
}
public Long getManagementServerId() {
return managementServerId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////

View File

@ -33,7 +33,7 @@ import com.cloud.utils.exception.CloudRuntimeException;
@APICommand(name = "prepareHostForMaintenance", description = "Prepares a host for maintenance.", responseObject = HostResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false)
public class PrepareForMaintenanceCmd extends BaseAsyncCmd {
public class PrepareForHostMaintenanceCmd extends BaseAsyncCmd {
/////////////////////////////////////////////////////

View File

@ -83,9 +83,13 @@ public class AsyncJobResponse extends BaseResponse {
@Param(description = "the unique ID of the instance/entity object related to the job")
private String jobInstanceId;
@SerializedName("managementserverid")
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
@Param(description = "the msid of the management server on which the job is running", since = "4.19")
private Long msid;
private String managementServerId;
@SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME)
@Param(description = "the management server name of the host", since = "4.21.0")
private String managementServerName;
@SerializedName(ApiConstants.CREATED)
@Param(description = " the created date of the job")
@ -156,7 +160,11 @@ public class AsyncJobResponse extends BaseResponse {
this.removed = removed;
}
public void setMsid(Long msid) {
this.msid = msid;
public void setManagementServerId(String managementServerId) {
this.managementServerId = managementServerId;
}
public void setManagementServerName(String managementServerName) {
this.managementServerName = managementServerName;
}
}

View File

@ -186,10 +186,18 @@ public class HostResponse extends BaseResponseWithAnnotations {
@Param(description = "the date and time the host was last pinged")
private Date lastPinged;
@SerializedName("managementserverid")
@SerializedName(ApiConstants.VIRTUAL_MACHINE_ID)
@Param(description = "the virtual machine id for host type ConsoleProxy and SecondaryStorageVM", since = "4.21.0")
private String virtualMachineId;
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
@Param(description = "the management server ID of the host")
private String managementServerId;
@SerializedName(ApiConstants.MANAGEMENT_SERVER_NAME)
@Param(description = "the management server name of the host", since = "4.21.0")
private String managementServerName;
@SerializedName("clusterid")
@Param(description = "the cluster ID of the host")
private String clusterId;
@ -435,10 +443,18 @@ public class HostResponse extends BaseResponseWithAnnotations {
this.lastPinged = lastPinged;
}
public void setVirtualMachineId(String virtualMachineId) {
this.virtualMachineId = virtualMachineId;
}
public void setManagementServerId(String managementServerId) {
this.managementServerId = managementServerId;
}
public void setManagementServerName(String managementServerName) {
this.managementServerName = managementServerName;
}
public void setClusterId(String clusterId) {
this.clusterId = clusterId;
}
@ -723,10 +739,18 @@ public class HostResponse extends BaseResponseWithAnnotations {
return lastPinged;
}
public String getVirtualMachineId() {
return virtualMachineId;
}
public String getManagementServerId() {
return managementServerId;
}
public String getManagementServerName() {
return managementServerName;
}
public String getClusterId() {
return clusterId;
}

View File

@ -86,6 +86,10 @@ public class LoginCmdResponse extends AuthenticationCmdResponse {
@Param(description = "Two factor authentication issuer", since = "4.18.0.0")
private String issuerFor2FA;
@SerializedName(value = ApiConstants.MANAGEMENT_SERVER_ID)
@Param(description = "Management Server ID that the user logged to", since = "4.21.0.0")
private String managementServerId;
public String getUsername() {
return username;
}
@ -211,4 +215,12 @@ public class LoginCmdResponse extends AuthenticationCmdResponse {
public void setIssuerFor2FA(String issuerFor2FA) {
this.issuerFor2FA = issuerFor2FA;
}
public String getManagementServerId() {
return managementServerId;
}
public void setManagementServerId(String managementServerId) {
this.managementServerId = managementServerId;
}
}

View File

@ -82,6 +82,14 @@ public class ManagementServerResponse extends BaseResponse {
@Param(description = "the Management Server Peers")
private List<PeerManagementServerNodeResponse> peers;
@SerializedName(ApiConstants.AGENTS_COUNT)
@Param(description = "the number of host agents this Management Server is responsible for", since = "4.21.0.0")
private Long agentsCount;
@SerializedName(ApiConstants.PENDING_JOBS_COUNT)
@Param(description = "the number of pending jobs in this Management Server", since = "4.21.0.0")
private Long pendingJobsCount;
public String getId() {
return this.id;
}
@ -126,6 +134,14 @@ public class ManagementServerResponse extends BaseResponse {
return serviceIp;
}
public Long getAgentsCount() {
return this.agentsCount;
}
public Long getPendingJobsCount() {
return this.pendingJobsCount;
}
public void setId(String id) {
this.id = id;
}
@ -174,6 +190,14 @@ public class ManagementServerResponse extends BaseResponse {
this.serviceIp = serviceIp;
}
public void setAgentsCount(Long agentsCount) {
this.agentsCount = agentsCount;
}
public void setPendingJobsCount(Long pendingJobsCount) {
this.pendingJobsCount = pendingJobsCount;
}
public String getKernelVersion() {
return kernelVersion;
}

View File

@ -22,7 +22,7 @@ import org.apache.cloudstack.api.InternalIdentity;
public interface ManagementServerHost extends InternalIdentity, Identity, ControlledEntity {
enum State {
Up, Down, PreparingToShutDown, ReadyToShutDown, ShuttingDown
Up, Down, PreparingForMaintenance, Maintenance, PreparingForShutDown, ReadyToShutDown, ShuttingDown
}
long getMsid();

View File

@ -624,7 +624,7 @@
</dependency>
<dependency>
<groupId>org.apache.cloudstack</groupId>
<artifactId>cloud-plugin-shutdown</artifactId>
<artifactId>cloud-plugin-maintenance</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>

View File

@ -0,0 +1,38 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.agent.api;
public class MigrateAgentConnectionAnswer extends Answer {
public MigrateAgentConnectionAnswer() {
}
public MigrateAgentConnectionAnswer(boolean result) {
this.result = result;
}
public MigrateAgentConnectionAnswer(String details) {
this.result = false;
this.details = details;
}
public MigrateAgentConnectionAnswer(MigrateAgentConnectionCommand cmd, boolean result) {
super(cmd, result, null);
}
}

View File

@ -0,0 +1,61 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.agent.api;
import java.util.List;
public class MigrateAgentConnectionCommand extends Command {
private List<String> msList;
private List<String> avoidMsList;
private String lbAlgorithm;
private Long lbCheckInterval;
public MigrateAgentConnectionCommand() {
}
public MigrateAgentConnectionCommand(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) {
super();
this.msList = msList;
this.avoidMsList = avoidMsList;
this.lbAlgorithm = lbAlgorithm;
this.lbCheckInterval = lbCheckInterval;
}
public List<String> getMsList() {
return msList;
}
public List<String> getAvoidMsList() {
return avoidMsList;
}
public String getLbAlgorithm() {
return lbAlgorithm;
}
public Long getLbCheckInterval() {
return lbCheckInterval;
}
@Override
public boolean executeInSequence() {
return false;
}
}

View File

@ -47,6 +47,7 @@ public class StartupCommand extends Command {
String resourceName;
String gatewayIpAddress;
String msHostList;
boolean connectionTransferred;
String arch;
public StartupCommand(Host.Type type) {
@ -291,6 +292,14 @@ public class StartupCommand extends Command {
this.msHostList = msHostList;
}
public boolean isConnectionTransferred() {
return connectionTransferred;
}
public void setConnectionTransferred(boolean connectionTransferred) {
this.connectionTransferred = connectionTransferred;
}
public String getArch() {
return arch;
}

View File

@ -25,6 +25,7 @@ public class TransferAgentCommand extends Command {
protected long agentId;
protected long futureOwner;
protected long currentOwner;
protected boolean isConnectionTransfer;
Event event;
protected TransferAgentCommand() {
@ -37,6 +38,11 @@ public class TransferAgentCommand extends Command {
this.event = event;
}
public TransferAgentCommand(long agentId, long currentOwner, long futureOwner, Event event, boolean isConnectionTransfer) {
this(agentId, currentOwner, futureOwner, event);
this.isConnectionTransfer = isConnectionTransfer;
}
public long getAgentId() {
return agentId;
}
@ -53,6 +59,10 @@ public class TransferAgentCommand extends Command {
return currentOwner;
}
public boolean isConnectionTransfer() {
return isConnectionTransfer;
}
@Override
public boolean executeInSequence() {
return false;

View File

@ -50,6 +50,10 @@ public interface ServerResource extends Manager {
*/
StartupCommand[] initialize();
default StartupCommand[] initialize(boolean isTransferredConnection) {
return initialize();
}
/**
* @param id id of the server to put in the PingCommand
* @return PingCommand

View File

@ -189,6 +189,11 @@ public class CheckOnHostCommandTest {
return 2L;
};
@Override
public Long getLastManagementServerId() {
return null;
};
@Override
public Date getRemoved() {
Date date = null;

View File

@ -16,6 +16,7 @@
// under the License.
package com.cloud.agent;
import java.util.List;
import java.util.Map;
import org.apache.cloudstack.framework.config.ConfigKey;
@ -170,4 +171,10 @@ public interface AgentManager {
void notifyMonitorsOfRemovedHost(long hostId, long clusterId);
void propagateChangeToAgents(Map<String, String> params);
boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs);
List<String> getLastAgents();
void setLastAgents(List<String> lastAgents);
}

View File

@ -85,6 +85,8 @@ public interface ResourceManager extends ResourceService, Configurable {
public Host createHostAndAgent(Long hostId, ServerResource resource, Map<String, String> details, boolean old, List<String> hostTags, boolean forRebalance);
public Host createHostAndAgent(Long hostId, ServerResource resource, Map<String, String> details, boolean old, List<String> hostTags, boolean forRebalance, boolean isTransferredConnection);
public Host addHost(long zoneId, ServerResource resource, Type hostType, Map<String, String> hostDetails);
public HostVO createHostVOForConnectedAgent(StartupCommand[] cmds);

View File

@ -70,7 +70,7 @@
</dependency>
<dependency>
<groupId>org.apache.cloudstack</groupId>
<artifactId>cloud-plugin-shutdown</artifactId>
<artifactId>cloud-plugin-maintenance</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

View File

@ -16,6 +16,7 @@
// under the License.
package com.cloud.agent.manager;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.nio.channels.ClosedChannelException;
@ -38,6 +39,8 @@ import java.util.concurrent.locks.ReentrantLock;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.configuration.Config;
import com.cloud.org.Cluster;
import com.cloud.utils.NumbersUtil;
@ -50,7 +53,10 @@ import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.jobs.AsyncJob;
import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext;
import org.apache.cloudstack.maintenance.ManagementServerMaintenanceListener;
import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.management.ManagementServerHost;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.collections.MapUtils;
@ -130,7 +136,7 @@ import org.apache.logging.log4j.ThreadContext;
/**
* Implementation of the Agent Manager. This class controls the connection to the agents.
**/
public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, Configurable {
public class AgentManagerImpl extends ManagerBase implements AgentManager, HandlerFactory, ManagementServerMaintenanceListener, Configurable {
/**
* _agents is a ConcurrentHashMap, but it is used from within a synchronized block. This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. Maybe a
@ -154,6 +160,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Inject
protected HostDao _hostDao = null;
@Inject
private ManagementServerHostDao _mshostDao;
@Inject
protected OutOfBandManagementDao outOfBandManagementDao;
@Inject
protected DataCenterDao _dcDao = null;
@ -175,6 +183,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Inject
protected IndirectAgentLB indirectAgentLB;
@Inject
private ManagementServerMaintenanceManager managementServerMaintenanceManager;
protected int _retry = 2;
protected long _nodeId = -1;
@ -187,6 +198,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
private int _directAgentThreadCap;
private List<String> lastAgents = null;
protected StateMachine2<Status, Status.Event, Host> _statusStateMachine = Status.getStateMachine();
private final ConcurrentHashMap<Long, Long> _pingMap = new ConcurrentHashMap<Long, Long>(10007);
@ -226,6 +239,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
registerForHostEvents(new SetHostParamsListener(), true, true, false);
managementServerMaintenanceManager.registerListener(this);
_executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentTaskPool"));
_connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentConnectTaskPool"));
@ -296,6 +311,45 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
_hostMonitors.remove(id);
}
@Override
public void onManagementServerMaintenance() {
logger.debug("Management server maintenance enabled");
_monitorExecutor.shutdownNow();
if (_connection != null) {
_connection.stop();
try {
_connection.cleanUp();
} catch (final IOException e) {
logger.warn("Fail to clean up old connection", e);
}
}
_connectExecutor.shutdownNow();
}
@Override
public void onManagementServerCancelMaintenance() {
logger.debug("Management server maintenance disabled");
if (_connectExecutor.isShutdown()) {
_connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentConnectTaskPool"));
_connectExecutor.allowCoreThreadTimeOut(true);
}
startDirectlyConnectedHosts(true);
if (_connection != null) {
try {
_connection.start();
} catch (final NioConnectionException e) {
logger.error("Error when connecting to the NioServer!", e);
}
}
if (_monitorExecutor.isShutdown()) {
_monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor"));
_monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS);
}
}
private AgentControlAnswer handleControlCommand(final AgentAttache attache, final AgentControlCommand cmd) {
AgentControlAnswer answer = null;
@ -332,6 +386,16 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
return attache;
}
@Override
public List<String> getLastAgents() {
return lastAgents;
}
@Override
public void setLastAgents(List<String> lastAgents) {
this.lastAgents = lastAgents;
}
@Override
public Answer sendTo(final Long dcId, final HypervisorType type, final Command cmd) {
final List<ClusterVO> clusters = _clusterDao.listByDcHyType(dcId, type.toString());
@ -616,10 +680,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
final long hostId = attache.getId();
final HostVO host = _hostDao.findById(hostId);
for (final Pair<Integer, Listener> monitor : _hostMonitors) {
logger.debug("Sending Connect to listener: {}", monitor.second().getClass().getSimpleName());
logger.debug("Sending Connect to listener: {}, for rebalance: {}", monitor.second().getClass().getSimpleName(), forRebalance);
for (int i = 0; i < cmd.length; i++) {
try {
logger.debug("process connection to issue {} forRebalance == {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), forRebalance);
logger.debug("process connection to issue: {} for host: {}, forRebalance: {}, connection transferred: {}", ReflectionToStringBuilderUtils.reflectCollection(cmd[i]), hostId, forRebalance, cmd[i].isConnectionTransferred());
monitor.second().processConnect(host, cmd[i], forRebalance);
} catch (final ConnectionException ce) {
if (ce.isSetupError()) {
@ -675,7 +739,13 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Override
public boolean start() {
startDirectlyConnectedHosts();
ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId);
if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) {
_monitorExecutor.shutdownNow();
return true;
}
startDirectlyConnectedHosts(false);
if (_connection != null) {
try {
@ -690,10 +760,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
return true;
}
public void startDirectlyConnectedHosts() {
public void startDirectlyConnectedHosts(final boolean forRebalance) {
final List<HostVO> hosts = _resourceMgr.findDirectlyConnectedHosts();
for (final HostVO host : hosts) {
loadDirectlyConnectedHost(host, false);
loadDirectlyConnectedHost(host, forRebalance);
}
}
@ -768,6 +838,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
}
protected boolean loadDirectlyConnectedHost(final HostVO host, final boolean forRebalance) {
return loadDirectlyConnectedHost(host, forRebalance, false);
}
protected boolean loadDirectlyConnectedHost(final HostVO host, final boolean forRebalance, final boolean isTransferredConnection) {
boolean initialized = false;
ServerResource resource = null;
try {
@ -796,7 +870,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
if (forRebalance) {
tapLoadingAgents(host.getId(), TapAgentsAction.Add);
final Host h = _resourceMgr.createHostAndAgent(host.getId(), resource, host.getDetails(), false, null, true);
final Host h = _resourceMgr.createHostAndAgent(host.getId(), resource, host.getDetails(), false, null, true, isTransferredConnection);
tapLoadingAgents(host.getId(), TapAgentsAction.Del);
return h == null ? false : true;
@ -1918,12 +1992,15 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Override
public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) {
if (cmd instanceof StartupRoutingCommand) {
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
Map<String, String> params = new HashMap<String, String>();
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId())));
if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) {
return;
}
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
Map<String, String> params = new HashMap<String, String>();
params.put(Config.RouterAggregationCommandEachTimeout.toString(), _configDao.getValue(Config.RouterAggregationCommandEachTimeout.toString()));
params.put(Config.MigrateWait.toString(), _configDao.getValue(Config.MigrateWait.toString()));
params.put(NetworkOrchestrationService.TUNGSTEN_ENABLED.key(), String.valueOf(NetworkOrchestrationService.TUNGSTEN_ENABLED.valueIn(host.getDataCenterId())));
try {
SetHostParamsCommand cmds = new SetHostParamsCommand(params);
@ -1935,8 +2012,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
}
}
}
@Override
public boolean processDisconnect(final long agentId, final Status state) {
return true;
@ -2004,6 +2079,11 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
}
}
@Override
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) {
return true;
}
private GlobalLock getHostJoinLock(Long hostId) {
return GlobalLock.getInternLock(String.format("%s-%s", "Host-Join", hostId));
}

View File

@ -47,14 +47,16 @@ import org.apache.cloudstack.framework.config.ConfigDepot;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.ha.dao.HAConfigDao;
import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager;
import org.apache.cloudstack.maintenance.command.BaseShutdownManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import org.apache.cloudstack.shutdown.ShutdownManager;
import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand;
import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand;
import org.apache.cloudstack.shutdown.command.BaseShutdownManagementServerHostCommand;
import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.cloudstack.utils.security.SSLUtils;
@ -74,12 +76,17 @@ import com.cloud.cluster.ClusterManagerListener;
import com.cloud.cluster.ClusterServicePdu;
import com.cloud.cluster.ClusteredAgentRebalanceService;
import org.apache.cloudstack.management.ManagementServerHost;
import org.apache.commons.collections.CollectionUtils;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.agentlb.AgentLoadBalancerPlanner;
import com.cloud.cluster.agentlb.HostTransferMapVO;
import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState;
import com.cloud.cluster.agentlb.dao.HostTransferMapDao;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.cluster.dao.ManagementServerHostPeerDao;
import com.cloud.dc.DataCenterVO;
import com.cloud.dc.dao.DataCenterDao;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.UnsupportedVersionException;
@ -101,7 +108,7 @@ import com.cloud.utils.nio.Task;
import com.google.gson.Gson;
public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ClusteredAgentRebalanceService {
private static final ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor"));
private static ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor"));
private final long rebalanceTimeOut = 300000; // 5 mins - after this time remove the agent from the transfer list
public final static long STARTUP_DELAY = 5000;
@ -113,12 +120,15 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
protected HashMap<String, SSLEngine> _sslEngines;
private final Timer _timer = new Timer("ClusteredAgentManager Timer");
boolean _agentLbHappened = false;
private int _mshostCounter = 0;
@Inject
protected ClusterManager _clusterMgr = null;
@Inject
protected ManagementServerHostDao _mshostDao;
@Inject
protected ManagementServerHostPeerDao _mshostPeerDao;
@Inject
protected HostTransferMapDao _hostTransferDao;
@Inject
protected List<AgentLoadBalancerPlanner> _lbPlanners;
@ -133,7 +143,9 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Inject
private CAManager caService;
@Inject
private ShutdownManager shutdownManager;
private ManagementServerMaintenanceManager managementServerMaintenanceManager;
@Inject
private DataCenterDao dcDao;
protected ClusteredAgentManagerImpl() {
super();
@ -172,6 +184,13 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
_timer.schedule(new DirectAgentScanTimerTask(), STARTUP_DELAY, ScanInterval.value());
logger.debug("Scheduled direct agent scan task to run at an interval of {} seconds", ScanInterval.value());
ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId);
if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) {
s_transferExecutor.shutdownNow();
cleanupTransferMap(_nodeId);
return true;
}
// Schedule tasks for agent rebalancing
if (isAgentRebalanceEnabled()) {
cleanupTransferMap(_nodeId);
@ -585,7 +604,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
@Override
public void startDirectlyConnectedHosts() {
public void startDirectlyConnectedHosts(final boolean forRebalance) {
// override and let it be dummy for purpose, we will scan and load direct agents periodically.
// We may also pickup agents that have been left over from other crashed management server
}
@ -742,12 +761,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public boolean executeRebalanceRequest(final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event) throws AgentUnavailableException, OperationTimedoutException {
return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event, false);
}
@Override
public boolean executeRebalanceRequest(final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException {
boolean result = false;
if (event == Event.RequestAgentRebalance) {
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
} else if (event == Event.StartAgentRebalance) {
try {
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId, isConnectionTransfer);
} catch (final Exception e) {
logger.warn("Unable to rebalance host id={} ({})", agentId, findAttache(agentId), e);
}
@ -871,7 +895,11 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
private Answer[] sendRebalanceCommand(final long peer, final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event) {
final TransferAgentCommand transfer = new TransferAgentCommand(agentId, currentOwnerId, futureOwnerId, event);
return sendRebalanceCommand(peer, agentId, currentOwnerId, futureOwnerId, event, false);
}
private Answer[] sendRebalanceCommand(final long peer, final long agentId, final long currentOwnerId, final long futureOwnerId, final Event event, final boolean isConnectionTransfer) {
final TransferAgentCommand transfer = new TransferAgentCommand(agentId, currentOwnerId, futureOwnerId, event, isConnectionTransfer);
final Commands commands = new Commands(Command.OnError.Stop);
commands.addCommand(transfer);
@ -1004,7 +1032,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
protected boolean rebalanceHost(final long hostId, final long currentOwnerId, final long futureOwnerId) throws AgentUnavailableException {
return rebalanceHost(hostId, currentOwnerId, futureOwnerId, false);
}
protected boolean rebalanceHost(final long hostId, final long currentOwnerId, final long futureOwnerId, final boolean isConnectionTransfer) throws AgentUnavailableException {
boolean result = true;
if (currentOwnerId == _nodeId) {
if (!startRebalance(hostId)) {
@ -1013,7 +1044,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
return false;
}
try {
final Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance);
final Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance, isConnectionTransfer);
if (answer == null || !answer[0].getResult()) {
result = false;
}
@ -1043,7 +1074,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (result) {
logger.debug("Loading directly connected host {} to the management server {} as a part of rebalance process", host, _nodeId);
result = loadDirectlyConnectedHost(host, true);
result = loadDirectlyConnectedHost(host, true, isConnectionTransfer);
} else {
logger.warn("Failed to disconnect {} as a part of rebalance process without notification", host);
}
@ -1253,10 +1284,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
} else if (cmds.length == 1 && cmds[0] instanceof TransferAgentCommand) {
final TransferAgentCommand cmd = (TransferAgentCommand)cmds[0];
logger.debug("Intercepting command for agent rebalancing: agent {} event: {}", cmd.getAgentId(), cmd.getEvent());
logger.debug("Intercepting command for agent rebalancing: agent: {}, event: {}, connection transfer: {}", cmd.getAgentId(), cmd.getEvent(), cmd.isConnectionTransfer());
boolean result = false;
try {
result = rebalanceAgent(cmd.getAgentId(), cmd.getEvent(), cmd.getCurrentOwner(), cmd.getFutureOwner());
result = rebalanceAgent(cmd.getAgentId(), cmd.getEvent(), cmd.getCurrentOwner(), cmd.getFutureOwner(), cmd.isConnectionTransfer());
logger.debug("Result is {}", result);
} catch (final AgentUnavailableException e) {
@ -1320,10 +1351,28 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
private String handleShutdownManagementServerHostCommand(BaseShutdownManagementServerHostCommand cmd) {
if (cmd instanceof PrepareForShutdownManagementServerHostCommand) {
logger.debug("Received BaseShutdownManagementServerHostCommand - preparing to shut down");
if (cmd instanceof PrepareForMaintenanceManagementServerHostCommand) {
logger.debug("Received PrepareForMaintenanceManagementServerHostCommand - preparing for maintenance");
try {
shutdownManager.prepareForShutdown();
managementServerMaintenanceManager.prepareForMaintenance(((PrepareForMaintenanceManagementServerHostCommand) cmd).getLbAlgorithm());
return "Successfully prepared for maintenance";
} catch(CloudRuntimeException e) {
return e.getMessage();
}
}
if (cmd instanceof CancelMaintenanceManagementServerHostCommand) {
logger.debug("Received CancelMaintenanceManagementServerHostCommand - cancelling maintenance");
try {
managementServerMaintenanceManager.cancelMaintenance();
return "Successfully cancelled maintenance";
} catch(CloudRuntimeException e) {
return e.getMessage();
}
}
if (cmd instanceof PrepareForShutdownManagementServerHostCommand) {
logger.debug("Received PrepareForShutdownManagementServerHostCommand - preparing to shut down");
try {
managementServerMaintenanceManager.prepareForShutdown();
return "Successfully prepared for shutdown";
} catch(CloudRuntimeException e) {
return e.getMessage();
@ -1332,7 +1381,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (cmd instanceof TriggerShutdownManagementServerHostCommand) {
logger.debug("Received TriggerShutdownManagementServerHostCommand - triggering a shut down");
try {
shutdownManager.triggerShutdown();
managementServerMaintenanceManager.triggerShutdown();
return "Successfully triggered shutdown";
} catch(CloudRuntimeException e) {
return e.getMessage();
@ -1341,8 +1390,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (cmd instanceof CancelShutdownManagementServerHostCommand) {
logger.debug("Received CancelShutdownManagementServerHostCommand - cancelling shut down");
try {
shutdownManager.cancelShutdown();
return "Successfully prepared for shutdown";
managementServerMaintenanceManager.cancelShutdown();
return "Successfully cancelled shutdown";
} catch(CloudRuntimeException e) {
return e.getMessage();
}
@ -1351,6 +1400,133 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
}
@Override
public boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs) {
if (timeoutDurationInMs <= 0) {
logger.debug(String.format("Not transferring direct agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid));
return false;
}
long transferStartTime = System.currentTimeMillis();
if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) {
logger.info(String.format("No direct agent hosts available on management server node %d (id: %s), to transfer", fromMsId, fromMsUuid));
return true;
}
List<ManagementServerHostVO> msHosts = getUpMsHostsExcludingMs(fromMsId);
if (msHosts.isEmpty()) {
logger.warn(String.format("No management server nodes available to transfer agents from management server node %d (id: %s)", fromMsId, fromMsUuid));
return false;
}
logger.debug(String.format("Transferring direct agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid));
int agentTransferFailedCount = 0;
List<DataCenterVO> dataCenterList = dcDao.listAll();
for (DataCenterVO dc : dataCenterList) {
List<HostVO> directAgentHostsInDc = getDirectAgentHostsInDc(fromMsId, dc.getId());
if (CollectionUtils.isEmpty(directAgentHostsInDc)) {
continue;
}
logger.debug(String.format("Transferring %d direct agents from management server node %d (id: %s) of zone %s", directAgentHostsInDc.size(), fromMsId, fromMsUuid, dc.toString()));
for (HostVO host : directAgentHostsInDc) {
long transferElapsedTimeInMs = System.currentTimeMillis() - transferStartTime;
if (transferElapsedTimeInMs >= timeoutDurationInMs) {
logger.debug(String.format("Stop transferring remaining direct agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid));
return false;
}
try {
if (_mshostCounter >= msHosts.size()) {
_mshostCounter = 0;
}
ManagementServerHostVO msHost = msHosts.get(_mshostCounter % msHosts.size());
_mshostCounter++;
_hostTransferDao.startAgentTransfering(host.getId(), fromMsId, msHost.getMsid());
if (!rebalanceAgent(host.getId(), Event.StartAgentRebalance, fromMsId, msHost.getMsid(), true)) {
agentTransferFailedCount++;
} else {
updateLastManagementServer(host.getId(), fromMsId);
}
} catch (Exception e) {
logger.warn(String.format("Failed to transfer direct agent of the host %s from management server node %d (id: %s), due to %s", host, fromMsId, fromMsUuid, e.getMessage()));
}
}
}
return (agentTransferFailedCount == 0);
}
private List<HostVO> getDirectAgentHosts(long msId) {
List<HostVO> directAgentHosts = new ArrayList<>();
List<HostVO> hosts = _hostDao.listHostsByMs(msId);
for (HostVO host : hosts) {
AgentAttache agent = findAttache(host.getId());
if (agent != null && agent instanceof DirectAgentAttache) {
directAgentHosts.add(host);
}
}
return directAgentHosts;
}
private List<HostVO> getDirectAgentHostsInDc(long msId, long dcId) {
List<HostVO> directAgentHosts = new ArrayList<>();
List<HostVO> hosts = _hostDao.listHostsByMsAndDc(msId, dcId);
for (HostVO host : hosts) {
AgentAttache agent = findAttache(host.getId());
if (agent != null && agent instanceof DirectAgentAttache) {
directAgentHosts.add(host);
}
}
return directAgentHosts;
}
private List<ManagementServerHostVO> getUpMsHostsExcludingMs(long avoidMsId) {
final List<ManagementServerHostVO> msHosts = _mshostDao.listBy(ManagementServerHost.State.Up);
Iterator<ManagementServerHostVO> iterator = msHosts.iterator();
while (iterator.hasNext()) {
ManagementServerHostVO ms = iterator.next();
if (ms.getMsid() == avoidMsId || _mshostPeerDao.findByPeerMsAndState(ms.getId(), ManagementServerHost.State.Up) == null) {
iterator.remove();
}
}
return msHosts;
}
private void updateLastManagementServer(long hostId, long msId) {
HostVO hostVO = _hostDao.findById(hostId);
if (hostVO != null) {
hostVO.setLastManagementServerId(msId);
_hostDao.update(hostId, hostVO);
}
}
@Override
public void onManagementServerMaintenance() {
logger.debug("Management server maintenance enabled");
s_transferExecutor.shutdownNow();
cleanupTransferMap(_nodeId);
_agentLbHappened = false;
super.onManagementServerMaintenance();
}
@Override
public void onManagementServerCancelMaintenance() {
logger.debug("Management server maintenance disabled");
super.onManagementServerCancelMaintenance();
if (isAgentRebalanceEnabled()) {
cleanupTransferMap(_nodeId);
if (s_transferExecutor.isShutdown()) {
s_transferExecutor = Executors.newScheduledThreadPool(2, new NamedThreadFactory("Cluster-AgentRebalancingExecutor"));
s_transferExecutor.scheduleAtFixedRate(getAgentRebalanceScanTask(), 60000, 60000, TimeUnit.MILLISECONDS);
s_transferExecutor.scheduleAtFixedRate(getTransferScanTask(), 60000, ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
}
}
}
public boolean executeAgentUserRequest(final long agentId, final Event event) throws AgentUnavailableException {
return executeUserRequest(agentId, event);
}
@ -1359,6 +1535,10 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event);
}
public boolean rebalanceAgent(final long agentId, final Event event, final long currentOwnerId, final long futureOwnerId, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException {
return executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event, isConnectionTransfer);
}
public boolean isAgentRebalanceEnabled() {
return EnableLB.value();
}

View File

@ -27,4 +27,5 @@ public interface ClusteredAgentRebalanceService {
boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException;
boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event, boolean isConnectionTransfer) throws AgentUnavailableException, OperationTimedoutException;
}

View File

@ -372,6 +372,9 @@ public class EngineHostVO implements EngineHost, Identity {
@Column(name = "mgmt_server_id")
private Long managementServerId;
@Column(name = "last_mgmt_server_id")
private Long lastManagementServerId;
@Column(name = "dom0_memory")
private long dom0MinMemory;
@ -556,6 +559,10 @@ public class EngineHostVO implements EngineHost, Identity {
this.managementServerId = managementServerId;
}
public void setLastManagementServerId(Long lastManagementServerId) {
this.lastManagementServerId = lastManagementServerId;
}
@Override
public long getLastPinged() {
return lastPinged;
@ -625,6 +632,11 @@ public class EngineHostVO implements EngineHost, Identity {
return managementServerId;
}
@Override
public Long getLastManagementServerId() {
return lastManagementServerId;
}
@Override
public Date getDisconnectedOn() {
return disconnectedOn;

View File

@ -4263,7 +4263,7 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
@Override
public void processConnect(final Host host, final StartupCommand cmd, final boolean forRebalance) throws ConnectionException {
if (!(cmd instanceof StartupRoutingCommand)) {
if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) {
return;
}
final long hostId = host.getId();

View File

@ -404,6 +404,9 @@ public class HostVO implements Host {
@Column(name = "mgmt_server_id")
private Long managementServerId;
@Column(name = "last_mgmt_server_id")
private Long lastManagementServerId;
@Column(name = "dom0_memory")
private long dom0MinMemory;
@ -570,6 +573,10 @@ public class HostVO implements Host {
this.managementServerId = managementServerId;
}
public void setLastManagementServerId(Long lastManagementServerId) {
this.lastManagementServerId = lastManagementServerId;
}
@Override
public long getLastPinged() {
return lastPinged;
@ -639,6 +646,11 @@ public class HostVO implements Host {
return managementServerId;
}
@Override
public Long getLastManagementServerId() {
return lastManagementServerId;
}
@Override
public Date getDisconnectedOn() {
return disconnectedOn;

View File

@ -151,12 +151,23 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
List<HostVO> listHostsWithActiveVMs(long offeringId);
List<HostVO> listHostsByMsAndDc(long msId, long dcId);
List<HostVO> listHostsByMs(long msId);
/**
* Retrieves the number of hosts/agents this {@see ManagementServer} has responsibility over.
* @param msid the id of the {@see ManagementServer}
* @param msId the id of the {@see ManagementServer}
* @return the number of hosts/agents this {@see ManagementServer} has responsibility over
*/
int countByMs(long msid);
int countByMs(long msId);
/**
* Retrieves the host ids/agents this {@see ManagementServer} has responsibility over.
* @param msId the id of the {@see ManagementServer}
* @return the host ids/agents this {@see ManagementServer} has responsibility over
*/
List<String> listByMs(long msId);
/**
* Retrieves the hypervisor versions of the hosts in the datacenter which are in Up state in ascending order

View File

@ -124,7 +124,9 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
protected SearchBuilder<HostVO> UnmanagedApplianceSearch;
protected SearchBuilder<HostVO> MaintenanceCountSearch;
protected SearchBuilder<HostVO> HostTypeCountSearch;
protected SearchBuilder<HostVO> ResponsibleMsCountSearch;
protected SearchBuilder<HostVO> ResponsibleMsSearch;
protected SearchBuilder<HostVO> ResponsibleMsDcSearch;
protected GenericSearchBuilder<HostVO, String> ResponsibleMsIdSearch;
protected SearchBuilder<HostVO> HostTypeZoneCountSearch;
protected SearchBuilder<HostVO> ClusterStatusSearch;
protected SearchBuilder<HostVO> TypeNameZoneSearch;
@ -189,9 +191,19 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
HostTypeCountSearch.and("type", HostTypeCountSearch.entity().getType(), SearchCriteria.Op.EQ);
HostTypeCountSearch.done();
ResponsibleMsCountSearch = createSearchBuilder();
ResponsibleMsCountSearch.and("managementServerId", ResponsibleMsCountSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
ResponsibleMsCountSearch.done();
ResponsibleMsSearch = createSearchBuilder();
ResponsibleMsSearch.and("managementServerId", ResponsibleMsSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
ResponsibleMsSearch.done();
ResponsibleMsDcSearch = createSearchBuilder();
ResponsibleMsDcSearch.and("managementServerId", ResponsibleMsDcSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
ResponsibleMsDcSearch.and("dcId", ResponsibleMsDcSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ);
ResponsibleMsDcSearch.done();
ResponsibleMsIdSearch = createSearchBuilder(String.class);
ResponsibleMsIdSearch.selectFields(ResponsibleMsIdSearch.entity().getUuid());
ResponsibleMsIdSearch.and("managementServerId", ResponsibleMsIdSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
ResponsibleMsIdSearch.done();
HostTypeZoneCountSearch = createSearchBuilder();
HostTypeZoneCountSearch.and("type", HostTypeZoneCountSearch.entity().getType(), SearchCriteria.Op.EQ);
@ -1424,12 +1436,34 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
}
@Override
public int countByMs(long msid) {
SearchCriteria<HostVO> sc = ResponsibleMsCountSearch.create();
sc.setParameters("managementServerId", msid);
public List<HostVO> listHostsByMsAndDc(long msId, long dcId) {
SearchCriteria<HostVO> sc = ResponsibleMsDcSearch.create();
sc.setParameters("managementServerId", msId);
sc.setParameters("dcId", dcId);
return listBy(sc);
}
@Override
public List<HostVO> listHostsByMs(long msId) {
SearchCriteria<HostVO> sc = ResponsibleMsSearch.create();
sc.setParameters("managementServerId", msId);
return listBy(sc);
}
@Override
public int countByMs(long msId) {
SearchCriteria<HostVO> sc = ResponsibleMsSearch.create();
sc.setParameters("managementServerId", msId);
return getCount(sc);
}
@Override
public List<String> listByMs(long msId) {
SearchCriteria<String> sc = ResponsibleMsIdSearch.create();
sc.addAnd("managementServerId", SearchCriteria.Op.EQ, msId);
return customSearch(sc, null);
}
@Override
public List<String> listOrderedHostsHypervisorVersionsInDatacenter(long datacenterId, HypervisorType hypervisorType) {
PreparedStatement pstmt = null;

View File

@ -31,3 +31,6 @@ SELECT uuid(), role_id, 'quotaCreditsList', permission, sort_order
FROM `cloud`.`role_permissions` rp
WHERE rp.rule = 'quotaStatement'
AND NOT EXISTS(SELECT 1 FROM cloud.role_permissions rp_ WHERE rp.role_id = rp_.role_id AND rp_.rule = 'quotaCreditsList');
CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.host', 'last_mgmt_server_id', 'bigint unsigned DEFAULT NULL COMMENT "last management server this host is connected to" AFTER `mgmt_server_id`');

View File

@ -20,6 +20,12 @@ import java.util.List;
public interface IndirectAgentLB {
/**
* Return list of management server addresses from host setting
* @return management servers string list
*/
List<String> getManagementServerList();
/**
* Return list of management server addresses after applying configured lb algorithm
* for a host in a zone.
@ -30,6 +36,17 @@ public interface IndirectAgentLB {
*/
List<String> getManagementServerList(Long hostId, Long dcId, List<Long> orderedHostIdList);
/**
* Return list of management server addresses after applying the lb algorithm
* for a host in a zone.
* @param hostId host id (if present)
* @param dcId zone id
* @param orderedHostIdList (optional) list of ordered host id list
* @param lbAlgorithm lb algorithm
* @return management servers string list
*/
List<String> getManagementServerList(Long hostId, Long dcId, List<Long> orderedHostIdList, String lbAlgorithm);
/**
* Compares received management server list against expected list for a host in a zone.
* @param hostId host id
@ -45,6 +62,8 @@ public interface IndirectAgentLB {
*/
String getLBAlgorithmName();
void checkLBAlgorithmName(String lbAlgorithm);
/**
* Returns the configured LB preferred host check interval (if applicable at cluster scope)
* @return returns interval in seconds
@ -53,4 +72,7 @@ public interface IndirectAgentLB {
void propagateMSListToAgents();
boolean haveAgentBasedHosts(long msId);
boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs);
}

View File

@ -941,7 +941,7 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
try {
JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId());
} catch (final Exception e) {
logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString());
logger.warn("Unable to deregister cluster node from JMX monitoring due to exception " + e.toString());
}
}
@ -1063,8 +1063,12 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
logger.info("New instance of management server {}, runId {} is being started", mshost, _runId);
}
} else {
ManagementServerHost.State msHostState = ManagementServerHost.State.Up;
if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) {
msHostState = ManagementServerHost.State.Maintenance;
}
_mshostDao.update(mshost.getId(), _runId, NetUtils.getCanonicalHostName(), version, _clusterNodeIP, _currentServiceAdapter.getServicePort(),
DateUtil.currentGMTTime());
DateUtil.currentGMTTime(), msHostState);
if (logger.isInfoEnabled()) {
logger.info("Management server {}, runId {} is being started", mshost, _runId);
}
@ -1102,11 +1106,17 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
if (_mshostId != null) {
final ManagementServerHostVO mshost = _mshostDao.findByMsid(_msId);
final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid());
mshost.setState(ManagementServerHost.State.Down);
mshostStatus.setLastJvmStop(new Date());
_mshostDao.update(_mshostId, mshost);
mshostStatusDao.update(mshostStatus.getId(), mshostStatus);
if (mshost != null) {
final ManagementServerStatusVO mshostStatus = mshostStatusDao.findByMsId(mshost.getUuid());
mshostStatus.setLastJvmStop(new Date());
mshostStatusDao.update(mshostStatus.getId(), mshostStatus);
ManagementServerHost.State msHostState = ManagementServerHost.State.Down;
if (ManagementServerHost.State.Maintenance.equals(mshost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(mshost.getState())) {
msHostState = ManagementServerHost.State.Maintenance;
}
_mshostDao.updateState(mshost.getId(), msHostState);
}
}
_heartbeatScheduler.shutdownNow();

View File

@ -33,10 +33,12 @@ public interface ManagementServerHostDao extends GenericDao<ManagementServerHost
int increaseAlertCount(long id);
void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate);
void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate, ManagementServerHost.State state);
void update(long id, long runid, Date lastUpdate);
boolean updateState(long id, ManagementServerHost.State newState);
List<ManagementServerHostVO> getActiveList(Date cutTime);
List<ManagementServerHostVO> getInactiveList(Date cutTime);
@ -47,6 +49,8 @@ public interface ManagementServerHostDao extends GenericDao<ManagementServerHost
List<ManagementServerHostVO> listBy(ManagementServerHost.State... states);
List<String> listNonUpStateMsIPs();
/**
* Lists msids for which hosts are orphaned, i.e. msids that hosts refer as their owning ms whilst no mshost entry exists with those msids
*

View File

@ -35,6 +35,7 @@ import com.cloud.utils.DateUtil;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.Filter;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.GenericSearchBuilder;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.TransactionLegacy;
@ -46,6 +47,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
private final SearchBuilder<ManagementServerHostVO> ActiveSearch;
private final SearchBuilder<ManagementServerHostVO> InactiveSearch;
private final SearchBuilder<ManagementServerHostVO> StateSearch;
protected GenericSearchBuilder<ManagementServerHostVO, String> NonUpStateMsSearch;
@Override
public void invalidateRunSession(long id, long runid) {
@ -77,7 +79,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
@Override
@DB
public void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate) {
public void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate, ManagementServerHost.State state) {
TransactionLegacy txn = TransactionLegacy.currentTxn();
PreparedStatement pstmt = null;
try {
@ -91,7 +93,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
pstmt.setInt(4, servicePort);
pstmt.setString(5, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), lastUpdate));
pstmt.setLong(6, runid);
pstmt.setString(7, ManagementServerHost.State.Up.toString());
pstmt.setString(7, state.toString());
pstmt.setLong(8, id);
pstmt.executeUpdate();
@ -130,7 +132,17 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
try {
txn.start();
pstmt = txn.prepareAutoCloseStatement("update mshost set last_update=?, removed=null, alert_count=0, state='Up' where id=? and runid=?");
boolean updateStatetoUp = false;
ManagementServerHostVO msHost = findById(id);
if (msHost != null && State.Down.equals(msHost.getState())) {
updateStatetoUp = true;
}
if (updateStatetoUp) {
pstmt = txn.prepareAutoCloseStatement("update mshost set last_update=?, removed=null, alert_count=0, state='Up' where id=? and runid=?");
} else {
pstmt = txn.prepareAutoCloseStatement("update mshost set last_update=?, removed=null, alert_count=0 where id=? and runid=?");
}
pstmt.setString(1, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), lastUpdate));
pstmt.setLong(2, id);
pstmt.setLong(3, runid);
@ -148,6 +160,18 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public boolean updateState(long id, ManagementServerHost.State newState) {
ManagementServerHostVO msHost = findById(id);
if (msHost == null) {
return false;
}
msHost.setState(newState);
msHost.setLastUpdateTime(DateUtil.currentGMTTime());
return update(id, msHost);
}
@Override
public List<ManagementServerHostVO> getActiveList(Date cutTime) {
SearchCriteria<ManagementServerHostVO> sc = ActiveSearch.create();
@ -205,6 +229,11 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
StateSearch.and("state", StateSearch.entity().getState(), SearchCriteria.Op.IN);
StateSearch.and("runid", StateSearch.entity().getRunid(), SearchCriteria.Op.GT);
StateSearch.done();
NonUpStateMsSearch = createSearchBuilder(String.class);
NonUpStateMsSearch.selectFields(NonUpStateMsSearch.entity().getServiceIP());
NonUpStateMsSearch.and("state", NonUpStateMsSearch.entity().getState(), SearchCriteria.Op.NLIKE);
NonUpStateMsSearch.done();
}
@Override
@ -238,6 +267,13 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
return listBy(sc);
}
@Override
public List<String> listNonUpStateMsIPs() {
SearchCriteria<String> sc = NonUpStateMsSearch.create();
sc.addAnd("state", SearchCriteria.Op.NLIKE, State.Up);
return customSearch(sc, null);
}
@Override
public List<Long> listOrphanMsids() {
List<Long> orphanList = new ArrayList<Long>();

View File

@ -33,4 +33,7 @@ public interface ManagementServerHostPeerDao extends GenericDao<ManagementServer
boolean isPeerUpState(long ownerMshost, long peerMshost, Date cutTime);
ManagementServerHostPeerVO findByOwnerAndPeerMsHost(long ownerMshost, long peerMshost, ManagementServerHost.State peerState);
ManagementServerHostPeerVO findByPeerMsAndState(long peerMshost, ManagementServerHost.State peerState);
}

View File

@ -34,6 +34,9 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
private final SearchBuilder<ManagementServerHostPeerVO> FindForUpdateSearch;
private final SearchBuilder<ManagementServerHostPeerVO> CountSearch;
private final SearchBuilder<ManagementServerHostPeerVO> ActiveSearch;
private final SearchBuilder<ManagementServerHostPeerVO> FindByOwnerAndPeerMsSearch;
private final SearchBuilder<ManagementServerHostPeerVO> FindByPeerMsAndStateSearch;
public ManagementServerHostPeerDaoImpl() {
ClearPeerSearch = createSearchBuilder();
@ -59,6 +62,17 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
ActiveSearch.and("peerState", ActiveSearch.entity().getPeerState(), SearchCriteria.Op.EQ);
ActiveSearch.and("lastUpdateTime", ActiveSearch.entity().getLastUpdateTime(), SearchCriteria.Op.GT);
ActiveSearch.done();
FindByOwnerAndPeerMsSearch = createSearchBuilder();
FindByOwnerAndPeerMsSearch.and("ownerMshost", FindByOwnerAndPeerMsSearch.entity().getOwnerMshost(), SearchCriteria.Op.EQ);
FindByOwnerAndPeerMsSearch.and("peerMshost", FindByOwnerAndPeerMsSearch.entity().getPeerMshost(), SearchCriteria.Op.EQ);
FindByOwnerAndPeerMsSearch.and("peerState", FindByOwnerAndPeerMsSearch.entity().getPeerState(), SearchCriteria.Op.EQ);
FindByOwnerAndPeerMsSearch.done();
FindByPeerMsAndStateSearch = createSearchBuilder();
FindByPeerMsAndStateSearch.and("peerMshost", FindByPeerMsAndStateSearch.entity().getPeerMshost(), SearchCriteria.Op.EQ);
FindByPeerMsAndStateSearch.and("peerState", FindByPeerMsAndStateSearch.entity().getPeerState(), SearchCriteria.Op.EQ);
FindByPeerMsAndStateSearch.done();
}
@Override
@ -133,4 +147,23 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
return listBy(sc).size() > 0;
}
@Override
public ManagementServerHostPeerVO findByOwnerAndPeerMsHost(long ownerMshost, long peerMshost, ManagementServerHost.State peerState) {
SearchCriteria<ManagementServerHostPeerVO> sc = FindByOwnerAndPeerMsSearch.create();
sc.setParameters("ownerMshost", ownerMshost);
sc.setParameters("peerMshost", peerMshost);
sc.setParameters("peerState", peerState);
return findOneBy(sc);
}
@Override
public ManagementServerHostPeerVO findByPeerMsAndState(long peerMshost, ManagementServerHost.State peerState) {
SearchCriteria<ManagementServerHostPeerVO> sc = FindByPeerMsAndStateSearch.create();
sc.setParameters("peerMshost", peerMshost);
sc.setParameters("peerState", peerState);
return findOneBy(sc);
}
}

View File

@ -174,7 +174,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
private ExecutorService _apiJobExecutor;
private ExecutorService _workerJobExecutor;
private boolean asyncJobsEnabled = true;
private boolean asyncJobsDisabled = false;
private long asyncJobsDisabledTime = 0;
@Override
public String getConfigComponentName() {
@ -218,16 +219,48 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
return submitAsyncJob(job, false);
}
private void checkShutdown() {
if (!isAsyncJobsEnabled()) {
throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs");
private void checkAsyncJobAllowed(AsyncJob job) {
if (isAsyncJobsEnabled()) {
return;
}
if (job instanceof VmWorkJobVO) {
String related = job.getRelated();
if (StringUtils.isNotBlank(related)) {
AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(Long.parseLong(related));
if (relatedJob != null) {
long relatedJobCreatedTime = relatedJob.getCreated().getTime();
if ((asyncJobsDisabledTime - relatedJobCreatedTime) >= 0) {
return;
}
}
}
}
throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs");
}
private boolean checkSyncQueueItemAllowed(SyncQueueItemVO item) {
if (isAsyncJobsEnabled()) {
return true;
}
Long contentId = item.getContentId();
AsyncJob relatedJob = _jobDao.findByIdIncludingRemoved(contentId);
if (relatedJob != null) {
long relatedJobCreatedTime = relatedJob.getCreated().getTime();
if ((asyncJobsDisabledTime - relatedJobCreatedTime) >= 0) {
return true;
}
}
return false;
}
@SuppressWarnings("unchecked")
@DB
public long submitAsyncJob(AsyncJob job, boolean scheduleJobExecutionInContext) {
checkShutdown();
checkAsyncJobAllowed(job);
@SuppressWarnings("rawtypes")
GenericDao dao = GenericDaoBase.getDao(job.getClass());
@ -248,7 +281,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
@Override
@DB
public long submitAsyncJob(final AsyncJob job, final String syncObjType, final long syncObjId) {
checkShutdown();
checkAsyncJobAllowed(job);
try {
@SuppressWarnings("rawtypes")
@ -860,7 +893,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
protected void reallyRun() {
try {
if (!isAsyncJobsEnabled()) {
logger.info("A shutdown has been triggered. Not executing any async job");
logger.info("Maintenance or Shutdown has been initiated on this management server. Not executing any async jobs");
return;
}
@ -1301,16 +1334,18 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
@Override
public void enableAsyncJobs() {
this.asyncJobsEnabled = true;
this.asyncJobsDisabled = false;
this.asyncJobsDisabledTime = 0;
}
@Override
public void disableAsyncJobs() {
this.asyncJobsEnabled = false;
this.asyncJobsDisabled = true;
this.asyncJobsDisabledTime = System.currentTimeMillis();
}
@Override
public boolean isAsyncJobsEnabled() {
return asyncJobsEnabled;
return !asyncJobsDisabled;
}
}

View File

@ -6036,6 +6036,11 @@ public class VmwareResource extends ServerResourceBase implements StoragePoolRes
@Override
public StartupCommand[] initialize() {
return initialize(false);
}
@Override
public StartupCommand[] initialize(boolean isTransferredConnection) {
try {
String hostApiVersion = "4.1";
VmwareContext context = getServiceContext();
@ -6064,6 +6069,7 @@ public class VmwareResource extends ServerResourceBase implements StoragePoolRes
cmd.setHypervisorType(HypervisorType.VMware);
cmd.setCluster(_cluster);
cmd.setHypervisorVersion(hostApiVersion);
cmd.setConnectionTransferred(isTransferredConnection);
List<StartupStorageCommand> storageCmds = initializeLocalStorage();
StartupCommand[] answerCmds = new StartupCommand[1 + storageCmds.size()];

View File

@ -3723,6 +3723,11 @@ public abstract class CitrixResourceBase extends ServerResourceBase implements S
@Override
public StartupCommand[] initialize() throws IllegalArgumentException {
return initialize(false);
}
@Override
public StartupCommand[] initialize(boolean isTransferredConnection) throws IllegalArgumentException {
final Connection conn = getConnection();
if (!getHostInfo(conn)) {
logger.warn("Unable to get host information for " + _host.getIp());
@ -3733,6 +3738,7 @@ public abstract class CitrixResourceBase extends ServerResourceBase implements S
cmd.setHypervisorType(HypervisorType.XenServer);
cmd.setCluster(_cluster);
cmd.setPoolSync(false);
cmd.setConnectionTransferred(isTransferredConnection);
try {
final Pool pool = Pool.getByUuid(conn, _host.getPool());

View File

@ -128,7 +128,12 @@ public class XenServer56Resource extends CitrixResourceBase {
@Override
public StartupCommand[] initialize() {
return initialize(false);
}
@Override
public StartupCommand[] initialize(boolean isTransferredConnection) {
pingXAPI();
return super.initialize();
return super.initialize(isTransferredConnection);
}
}

View File

@ -64,7 +64,12 @@ public class XenServerResourceNewBase extends XenServer620SP1Resource {
@Override
public StartupCommand[] initialize() throws IllegalArgumentException {
final StartupCommand[] cmds = super.initialize();
return initialize(false);
}
@Override
public StartupCommand[] initialize(boolean isTransferredConnection) throws IllegalArgumentException {
final StartupCommand[] cmds = super.initialize(isTransferredConnection);
final Connection conn = getConnection();
Pool pool;

View File

@ -21,8 +21,8 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>cloud-plugin-shutdown</artifactId>
<name>Apache CloudStack Plugin - Safe Shutdown</name>
<artifactId>cloud-plugin-maintenance</artifactId>
<name>Apache CloudStack Plugin - MS Maintenance and Safe Shutdown</name>
<parent>
<groupId>org.apache.cloudstack</groupId>
<artifactId>cloudstack-plugins</artifactId>

View File

@ -25,18 +25,18 @@ import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.response.ManagementServerResponse;
import org.apache.cloudstack.shutdown.ShutdownManager;
import org.apache.cloudstack.maintenance.ManagementServerMaintenanceManager;
public abstract class BaseShutdownActionCmd extends BaseCmd {
public abstract class BaseMSMaintenanceActionCmd extends BaseCmd {
@Inject
protected ShutdownManager shutdownManager;
protected ManagementServerMaintenanceManager managementServerMaintenanceManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server", required = true)
@Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the id of the management server", required = true)
private Long managementServerId;
/////////////////////////////////////////////////////

View File

@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.BaseCmd;
import com.cloud.user.Account;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType;
@APICommand(name = CancelMaintenanceCmd.APINAME,
description = "Cancels maintenance of the management server",
since = "4.21.0",
responseObject = ManagementServerMaintenanceResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
authorized = {RoleType.Admin})
public class CancelMaintenanceCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "cancelMaintenance";
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return Account.ACCOUNT_ID_SYSTEM;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public void execute() {
final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelMaintenance(this);
response.setResponseName(getCommandName());
response.setObjectName("cancelmaintenance");
setResponseObject(response);
}
}

View File

@ -15,24 +15,24 @@
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command;
package org.apache.cloudstack.api.command;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.BaseCmd;
import com.cloud.user.Account;
import org.apache.cloudstack.api.response.ReadyForShutdownResponse;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType;
@APICommand(name = CancelShutdownCmd.APINAME,
description = "Cancels a triggered shutdown",
since = "4.19.0",
responseObject = ReadyForShutdownResponse.class,
responseObject = ManagementServerMaintenanceResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
authorized = {RoleType.Admin})
public class CancelShutdownCmd extends BaseShutdownActionCmd {
public class CancelShutdownCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "cancelShutdown";
@ -52,7 +52,7 @@ public class CancelShutdownCmd extends BaseShutdownActionCmd {
@Override
public void execute() {
final ReadyForShutdownResponse response = shutdownManager.cancelShutdown(this);
final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.cancelShutdown(this);
response.setResponseName(getCommandName());
response.setObjectName("cancelshutdown");
setResponseObject(response);

View File

@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import com.cloud.user.Account;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType;
@APICommand(name = PrepareForMaintenanceCmd.APINAME,
description = "Prepares management server for maintenance by preventing new jobs from being accepted after completion of active jobs and migrating the agents",
since = "4.21.0",
responseObject = ManagementServerMaintenanceResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
authorized = {RoleType.Admin})
public class PrepareForMaintenanceCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "prepareForMaintenance";
@Parameter(name = ApiConstants.ALGORITHM, type = CommandType.STRING, description = "indirect agents load balancer algorithm (static, roundrobin, shuffle);" +
" when this is not set, already configured algorithm from setting 'indirect.agent.lb.algorithm' is considered")
private String algorithm;
public String getAlgorithm() {
return algorithm;
}
public void setAlgorithm(String algorithm) {
this.algorithm = algorithm;
}
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return Account.ACCOUNT_ID_SYSTEM;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public void execute() {
final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForMaintenance(this);
response.setResponseName(getCommandName());
response.setObjectName("prepareformaintenance");
setResponseObject(response);
}
}

View File

@ -17,22 +17,21 @@
package org.apache.cloudstack.api.command;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.BaseCmd;
import com.cloud.user.Account;
import org.apache.cloudstack.api.response.ReadyForShutdownResponse;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType;
@APICommand(name = PrepareForShutdownCmd.APINAME,
description = "Prepares CloudStack for a safe manual shutdown by preventing new jobs from being accepted",
since = "4.19.0",
responseObject = ReadyForShutdownResponse.class,
responseObject = ManagementServerMaintenanceResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
authorized = {RoleType.Admin})
public class PrepareForShutdownCmd extends BaseShutdownActionCmd {
public class PrepareForShutdownCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "prepareForShutdown";
@Override
@ -51,7 +50,7 @@ public class PrepareForShutdownCmd extends BaseShutdownActionCmd {
@Override
public void execute() {
final ReadyForShutdownResponse response = shutdownManager.prepareForShutdown(this);
final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.prepareForShutdown(this);
response.setResponseName(getCommandName());
response.setObjectName("prepareforshutdown");
setResponseObject(response);

View File

@ -17,55 +17,23 @@
package org.apache.cloudstack.api.command;
import javax.inject.Inject;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.response.ManagementServerResponse;
import org.apache.cloudstack.api.response.ReadyForShutdownResponse;
import org.apache.cloudstack.shutdown.ShutdownManager;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import com.cloud.user.Account;
@APICommand(name = ReadyForShutdownCmd.APINAME,
description = "Returns the status of CloudStack, whether a shutdown has been triggered and if ready to shutdown",
since = "4.19.0",
responseObject = ReadyForShutdownResponse.class,
responseObject = ManagementServerMaintenanceResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false)
public class ReadyForShutdownCmd extends BaseCmd {
public class ReadyForShutdownCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "readyForShutdown";
@Inject
private ShutdownManager shutdownManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.MANAGEMENT_SERVER_ID, type = CommandType.UUID, entityType = ManagementServerResponse.class, description = "the uuid of the management server")
private Long managementServerId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getManagementServerId() {
return managementServerId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public void execute() {
final ReadyForShutdownResponse response = shutdownManager.readyForShutdown(this);
response.setResponseName(getCommandName());
response.setObjectName("readyforshutdown");
setResponseObject(response);
}
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
@ -75,4 +43,16 @@ public class ReadyForShutdownCmd extends BaseCmd {
public long getEntityOwnerId() {
return Account.ACCOUNT_ID_SYSTEM;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public void execute() {
final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.readyForShutdown(this);
response.setResponseName(getCommandName());
response.setObjectName("readyforshutdown");
setResponseObject(response);
}
}

View File

@ -22,16 +22,16 @@ import org.apache.cloudstack.api.BaseCmd;
import com.cloud.user.Account;
import org.apache.cloudstack.api.response.ReadyForShutdownResponse;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.acl.RoleType;
@APICommand(name = TriggerShutdownCmd.APINAME,
description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobbs have been completed. Triggers an immediate shutdown if forced",
description = "Triggers an automatic safe shutdown of CloudStack by not accepting new jobs and shutting down when all pending jobs have been completed.",
since = "4.19.0",
responseObject = ReadyForShutdownResponse.class,
responseObject = ManagementServerMaintenanceResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
authorized = {RoleType.Admin})
public class TriggerShutdownCmd extends BaseShutdownActionCmd {
public class TriggerShutdownCmd extends BaseMSMaintenanceActionCmd {
public static final String APINAME = "triggerShutdown";
/////////////////////////////////////////////////////
@ -54,7 +54,7 @@ public class TriggerShutdownCmd extends BaseShutdownActionCmd {
@Override
public void execute() {
final ReadyForShutdownResponse response = shutdownManager.triggerShutdown(this);
final ManagementServerMaintenanceResponse response = managementServerMaintenanceManager.triggerShutdown(this);
response.setResponseName(getCommandName());
response.setObjectName("triggershutdown");
setResponseObject(response);

View File

@ -16,35 +16,81 @@
// under the License.
package org.apache.cloudstack.api.response;
import java.util.List;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.management.ManagementServerHost.State;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
public class ReadyForShutdownResponse extends BaseResponse {
@SerializedName(ApiConstants.READY_FOR_SHUTDOWN)
@Param(description = "Indicates whether CloudStack is ready to shutdown")
private Boolean readyForShutdown;
public class ManagementServerMaintenanceResponse extends BaseResponse {
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
@Param(description = "The id of the management server")
private String managementServerId;
@SerializedName(ApiConstants.STATE)
@Param(description = "the state of the management server")
private State state;
@SerializedName(ApiConstants.MAINTENANCE_INITIATED)
@Param(description = "Indicates whether maintenance has been initiated")
private Boolean maintenanceInitiated;
@SerializedName(ApiConstants.SHUTDOWN_TRIGGERED)
@Param(description = "Indicates whether a shutdown has been triggered")
private Boolean shutdownTriggered;
@SerializedName(ApiConstants.READY_FOR_SHUTDOWN)
@Param(description = "Indicates whether CloudStack is ready to shutdown")
private Boolean readyForShutdown;
@SerializedName(ApiConstants.PENDING_JOBS_COUNT)
@Param(description = "The number of jobs in progress")
private Long pendingJobsCount;
@SerializedName(ApiConstants.MANAGEMENT_SERVER_ID)
@Param(description = "The id of the management server")
private Long msId;
@SerializedName(ApiConstants.AGENTS_COUNT)
@Param(description = "The number of host agents this management server is responsible for")
private Long agentsCount;
public ReadyForShutdownResponse(Long msId, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount) {
this.msId = msId;
@SerializedName(ApiConstants.AGENTS)
@Param(description = "The host agents this management server is responsible for")
private List<String> agents;
public ManagementServerMaintenanceResponse(String managementServerId, State state, Boolean maintenanceInitiated, Boolean shutdownTriggered, Boolean readyForShutdown, long pendingJobsCount, long agentsCount, List<String> agents) {
this.managementServerId = managementServerId;
this.state = state;
this.maintenanceInitiated = maintenanceInitiated;
this.shutdownTriggered = shutdownTriggered;
this.readyForShutdown = readyForShutdown;
this.pendingJobsCount = pendingJobsCount;
this.agentsCount = agentsCount;
this.agents = agents;
}
public String getManagementServerId() {
return managementServerId;
}
public void setManagementServerId(String managementServerId) {
this.managementServerId = managementServerId;
}
public State getState() {
return state;
}
public void setState(State state) {
this.state = state;
}
public Boolean getMaintenanceInitiated() {
return this.maintenanceInitiated;
}
public void setMaintenanceInitiated(Boolean maintenanceInitiated) {
this.maintenanceInitiated = maintenanceInitiated;
}
public Boolean getShutdownTriggered() {
@ -71,11 +117,19 @@ public class ReadyForShutdownResponse extends BaseResponse {
this.pendingJobsCount = pendingJobsCount;
}
public Long getMsId() {
return msId;
public Long getAgentsCount() {
return this.agentsCount;
}
public void setMsId(Long msId) {
this.msId = msId;
public void setAgentsCount(Long agentsCount) {
this.agentsCount = agentsCount;
}
public List<String> getAgents() {
return agents;
}
public void setAgents(List<String> agents) {
this.agents = agents;
}
}

View File

@ -0,0 +1,24 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.maintenance;
public interface ManagementServerMaintenanceListener {
void onManagementServerMaintenance();
void onManagementServerCancelMaintenance();
}

View File

@ -0,0 +1,108 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.maintenance;
import org.apache.cloudstack.api.command.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.CancelShutdownCmd;
import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.PrepareForShutdownCmd;
import org.apache.cloudstack.api.command.ReadyForShutdownCmd;
import org.apache.cloudstack.api.command.TriggerShutdownCmd;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.framework.config.ConfigKey;
import com.cloud.cluster.ManagementServerHostVO;
public interface ManagementServerMaintenanceManager {
int DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS = 60;
ConfigKey<Integer> ManagementServerMaintenanceTimeoutInMins = new ConfigKey<>(Integer.class,
"management.server.maintenance.timeout",
"Advanced",
String.valueOf(DEFAULT_MS_MAINTENANCE_TIMEOUT_IN_MINS),
"Timeout (in mins) for the maintenance window for the management server, default: 60 mins.",
true,
ConfigKey.Scope.Global,
null);
void registerListener(ManagementServerMaintenanceListener listener);
void unregisterListener(ManagementServerMaintenanceListener listener);
void onMaintenance();
void onCancelMaintenance();
// Returns the number of pending jobs for the given management server msids.
// NOTE: This is the msid and NOT the id
long countPendingJobs(Long... msIds);
boolean isAsyncJobsEnabled();
// Indicates whether a shutdown has been triggered on the current management server
boolean isShutdownTriggered();
// Indicates whether the current management server is preparing to shutdown
boolean isPreparingForShutdown();
// Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs
void triggerShutdown();
// Prepares the current management server to shutdown by not accepting any more async jobs
void prepareForShutdown();
// Cancels the shutdown on the current management server
void cancelShutdown();
// Indicates whether the current management server is preparing to maintenance
boolean isPreparingForMaintenance();
void resetPreparingForMaintenance();
long getMaintenanceStartTime();
String getLbAlgorithm();
// Prepares the current management server for maintenance by migrating the agents and not accepting any more async jobs
void prepareForMaintenance(String lbAlorithm);
// Cancels maintenance of the current management server
void cancelMaintenance();
void cancelPreparingForMaintenance(ManagementServerHostVO msHost);
void cancelWaitForPendingJobs();
// Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms
ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd);
// Prepares the specified management server to shutdown by not accepting any more async jobs
ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd);
// Cancels the shutdown on the specified management server
ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd);
// Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs
ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd);
// Prepares the specified management server to maintenance by migrating the agents and not accepting any more async jobs
ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd);
// Cancels maintenance of the specified management server
ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd);
}

View File

@ -0,0 +1,598 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.maintenance;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import org.apache.cloudstack.agent.lb.IndirectAgentLB;
import org.apache.cloudstack.api.command.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.CancelShutdownCmd;
import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.PrepareForShutdownCmd;
import org.apache.cloudstack.api.command.ReadyForShutdownCmd;
import org.apache.cloudstack.api.command.TriggerShutdownCmd;
import org.apache.cloudstack.api.response.ManagementServerMaintenanceResponse;
import org.apache.cloudstack.config.ApiServiceConfiguration;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.management.ManagementServerHost.State;
import org.apache.cloudstack.maintenance.command.CancelMaintenanceManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.CancelShutdownManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.PrepareForMaintenanceManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.PrepareForShutdownManagementServerHostCommand;
import org.apache.cloudstack.maintenance.command.TriggerShutdownManagementServerHostCommand;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.collections.CollectionUtils;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Command;
import com.cloud.cluster.ClusterManager;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.host.dao.HostDao;
import com.cloud.serializer.GsonHelper;
import com.cloud.utils.StringUtils;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.component.PluggableService;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.exception.CloudRuntimeException;
import com.google.gson.Gson;
public class ManagementServerMaintenanceManagerImpl extends ManagerBase implements ManagementServerMaintenanceManager, PluggableService, Configurable {
Gson gson;
@Inject
private AsyncJobManager jobManager;
@Inject
private ClusterManager clusterManager;
@Inject
private AgentManager agentMgr;
@Inject
private IndirectAgentLB indirectAgentLB;
@Inject
private ManagementServerHostDao msHostDao;
@Inject
private HostDao hostDao;
private final List<ManagementServerMaintenanceListener> _listeners = new ArrayList<>();
private boolean shutdownTriggered = false;
private boolean preparingForShutdown = false;
private boolean preparingForMaintenance = false;
private long maintenanceStartTime = 0;
private String lbAlgorithm;
private ScheduledExecutorService pendingJobsCheckTask;
protected ManagementServerMaintenanceManagerImpl() {
super();
gson = GsonHelper.getGson();
}
@Override
public boolean start() {
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost != null) {
State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance};
if (Arrays.asList(maintenanceStates).contains(msHost.getState())) {
this.preparingForMaintenance = true;
jobManager.disableAsyncJobs();
msHostDao.updateState(msHost.getId(), State.Maintenance);
}
}
return true;
}
@Override
public void registerListener(ManagementServerMaintenanceListener listener) {
synchronized (_listeners) {
logger.info("Register management server maintenance listener " + listener.getClass());
_listeners.add(listener);
}
}
@Override
public void unregisterListener(ManagementServerMaintenanceListener listener) {
synchronized (_listeners) {
logger.info("Unregister management server maintenance listener " + listener.getClass());
_listeners.remove(listener);
}
}
@Override
public void onMaintenance() {
synchronized (_listeners) {
for (final ManagementServerMaintenanceListener listener : _listeners) {
logger.info("Invoke, on maintenance for listener " + listener.getClass());
listener.onManagementServerMaintenance();
}
}
}
@Override
public void onCancelMaintenance() {
synchronized (_listeners) {
for (final ManagementServerMaintenanceListener listener : _listeners) {
logger.info("Invoke, on cancel maintenance for listener " + listener.getClass());
listener.onManagementServerCancelMaintenance();
}
}
}
@Override
public boolean isShutdownTriggered() {
return shutdownTriggered;
}
@Override
public boolean isPreparingForShutdown() {
return preparingForShutdown;
}
@Override
public boolean isPreparingForMaintenance() {
return preparingForMaintenance;
}
@Override
public void resetPreparingForMaintenance() {
preparingForMaintenance = false;
maintenanceStartTime = 0;
lbAlgorithm = null;
}
@Override
public long getMaintenanceStartTime() {
return maintenanceStartTime;
}
@Override
public String getLbAlgorithm() {
return lbAlgorithm;
}
@Override
public long countPendingJobs(Long... msIds) {
return jobManager.countPendingNonPseudoJobs(msIds);
}
@Override
public boolean isAsyncJobsEnabled() {
return jobManager.isAsyncJobsEnabled();
}
@Override
public void triggerShutdown() {
if (this.shutdownTriggered) {
throw new CloudRuntimeException("Shutdown has already been triggered");
}
this.shutdownTriggered = true;
prepareForShutdown(true);
}
private void prepareForShutdown(boolean postTrigger) {
if (!postTrigger) {
if (this.preparingForMaintenance) {
throw new CloudRuntimeException("Maintenance has already been initiated, cancel maintenance and try again");
}
// Ensure we don't throw an error if triggering a shutdown after just preparing for it
if (this.preparingForShutdown) {
throw new CloudRuntimeException("Shutdown has already been triggered");
}
}
this.preparingForShutdown = true;
jobManager.disableAsyncJobs();
waitForPendingJobs();
}
@Override
public void prepareForShutdown() {
prepareForShutdown(false);
}
@Override
public void cancelShutdown() {
if (!this.preparingForShutdown) {
throw new CloudRuntimeException("Shutdown has not been triggered");
}
this.preparingForShutdown = false;
this.shutdownTriggered = false;
resetPreparingForMaintenance();
jobManager.enableAsyncJobs();
cancelWaitForPendingJobs();
}
@Override
public void prepareForMaintenance(String lbAlorithm) {
if (this.preparingForShutdown) {
throw new CloudRuntimeException("Shutdown has already been triggered, cancel shutdown and try again");
}
if (this.preparingForMaintenance) {
throw new CloudRuntimeException("Maintenance has already been initiated");
}
this.preparingForMaintenance = true;
this.maintenanceStartTime = System.currentTimeMillis();
this.lbAlgorithm = lbAlorithm;
jobManager.disableAsyncJobs();
waitForPendingJobs();
}
@Override
public void cancelMaintenance() {
if (!this.preparingForMaintenance) {
throw new CloudRuntimeException("Maintenance has not been initiated");
}
resetPreparingForMaintenance();
this.preparingForShutdown = false;
this.shutdownTriggered = false;
jobManager.enableAsyncJobs();
cancelWaitForPendingJobs();
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost != null && State.Maintenance.equals(msHost.getState())) {
onCancelMaintenance();
}
}
private void waitForPendingJobs() {
cancelWaitForPendingJobs();
pendingJobsCheckTask = Executors.newScheduledThreadPool(1, new NamedThreadFactory("PendingJobsCheck"));
long pendingJobsCheckDelayInSecs = 1L; // 1 sec
long pendingJobsCheckPeriodInSecs = 3L; // every 3 secs, check more frequently for pending jobs
pendingJobsCheckTask.scheduleAtFixedRate(new CheckPendingJobsTask(this), pendingJobsCheckDelayInSecs, pendingJobsCheckPeriodInSecs, TimeUnit.SECONDS);
}
@Override
public void cancelWaitForPendingJobs() {
if (pendingJobsCheckTask != null) {
pendingJobsCheckTask.shutdown();
pendingJobsCheckTask = null;
}
}
@Override
public ManagementServerMaintenanceResponse readyForShutdown(ReadyForShutdownCmd cmd) {
return prepareMaintenanceResponse(cmd.getManagementServerId());
}
@Override
public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Unable to find the management server, cannot prepare for shutdown");
}
if (!State.Up.equals(msHost.getState())) {
throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown");
}
final Command[] cmds = new Command[1];
cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("PrepareForShutdownCmd result : " + result);
if (!result.startsWith("Success")) {
throw new CloudRuntimeException(result);
}
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
return prepareMaintenanceResponse(cmd.getManagementServerId());
}
@Override
public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Unable to find the management server, cannot trigger shutdown");
}
if (!(State.Up.equals(msHost.getState()) || State.Maintenance.equals(msHost.getState()) || State.PreparingForShutDown.equals(msHost.getState()) ||
State.ReadyToShutDown.equals(msHost.getState()))) {
throw new CloudRuntimeException("Management server is not in the right state to trigger shutdown");
}
if (State.Up.equals(msHost.getState())) {
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
}
final Command[] cmds = new Command[1];
cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("TriggerShutdownCmd result : " + result);
if (!result.startsWith("Success")) {
throw new CloudRuntimeException(result);
}
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
return prepareMaintenanceResponse(cmd.getManagementServerId());
}
@Override
public ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Unable to find the management server, cannot cancel shutdown");
}
if (!(State.PreparingForShutDown.equals(msHost.getState()) || State.ReadyToShutDown.equals(msHost.getState()))) {
throw new CloudRuntimeException("Management server is not in the right state to cancel shutdown");
}
final Command[] cmds = new Command[1];
cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("CancelShutdownCmd result : " + result);
if (!result.startsWith("Success")) {
throw new CloudRuntimeException(result);
}
msHostDao.updateState(msHost.getId(), State.Up);
return prepareMaintenanceResponse(cmd.getManagementServerId());
}
@Override
public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd) {
if (StringUtils.isNotBlank(cmd.getAlgorithm())) {
indirectAgentLB.checkLBAlgorithmName(cmd.getAlgorithm());
}
final List<ManagementServerHostVO> activeMsList = msHostDao.listBy(State.Up);
if (CollectionUtils.isEmpty(activeMsList)) {
throw new CloudRuntimeException("Cannot prepare for maintenance, no active management servers found");
}
if (activeMsList.size() == 1) {
throw new CloudRuntimeException("Prepare for maintenance not supported, there is only one active management server");
}
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Cannot prepare for maintenance, unable to find the management server");
}
if (!State.Up.equals(msHost.getState())) {
throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance");
}
final List<ManagementServerHostVO> preparingForMaintenanceMsList = msHostDao.listBy(State.PreparingForMaintenance);
if (CollectionUtils.isNotEmpty(preparingForMaintenanceMsList)) {
throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance");
}
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) {
List<String> indirectAgentMsList = indirectAgentLB.getManagementServerList();
indirectAgentMsList.remove(msHost.getServiceIP());
List<String> nonUpMsList = msHostDao.listNonUpStateMsIPs();
indirectAgentMsList.removeAll(nonUpMsList);
if (CollectionUtils.isEmpty(indirectAgentMsList)) {
throw new CloudRuntimeException(String.format("Cannot prepare for maintenance, no other active management servers found from '%s' setting", ApiServiceConfiguration.ManagementServerAddresses.key()));
}
}
List<String> lastAgents = hostDao.listByMs(cmd.getManagementServerId());
agentMgr.setLastAgents(lastAgents);
final Command[] cmds = new Command[1];
cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("PrepareForMaintenanceCmd result : " + result);
if (!result.startsWith("Success")) {
agentMgr.setLastAgents(null);
throw new CloudRuntimeException(result);
}
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
return prepareMaintenanceResponse(cmd.getManagementServerId());
}
@Override
public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
if (msHost == null) {
throw new CloudRuntimeException("Unable to find the management server, cannot cancel maintenance");
}
if (!(State.Maintenance.equals(msHost.getState()) || State.PreparingForMaintenance.equals(msHost.getState()))) {
throw new CloudRuntimeException("Management server is not in the right state to cancel maintenance");
}
final Command[] cmds = new Command[1];
cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("CancelMaintenanceCmd result : " + result);
if (!result.startsWith("Success")) {
throw new CloudRuntimeException(result);
}
msHostDao.updateState(msHost.getId(), State.Up);
agentMgr.setLastAgents(null);
return prepareMaintenanceResponse(cmd.getManagementServerId());
}
@Override
public void cancelPreparingForMaintenance(ManagementServerHostVO msHost) {
resetPreparingForMaintenance();
this.preparingForShutdown = false;
this.shutdownTriggered = false;
jobManager.enableAsyncJobs();
if (msHost == null) {
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
}
msHostDao.updateState(msHost.getId(), State.Up);
}
private ManagementServerMaintenanceResponse prepareMaintenanceResponse(Long managementServerId) {
ManagementServerHostVO msHost;
Long[] msIds;
if (managementServerId == null) {
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
} else {
msHost = msHostDao.findById(managementServerId);
}
if (msHost == null) {
throw new CloudRuntimeException("Unable to find the management server");
}
State[] maintenanceStates = {State.PreparingForMaintenance, State.Maintenance};
State[] shutdownStates = {State.ShuttingDown, State.PreparingForShutDown, State.ReadyToShutDown};
boolean maintenanceInitiatedForMS = Arrays.asList(maintenanceStates).contains(msHost.getState());
boolean shutdownTriggeredForMS = Arrays.asList(shutdownStates).contains(msHost.getState());
msIds = new Long[]{msHost.getMsid()};
List<String> agents = hostDao.listByMs(managementServerId);
long agentsCount = hostDao.countByMs(managementServerId);
long pendingJobCount = countPendingJobs(msIds);
return new ManagementServerMaintenanceResponse(msHost.getUuid(), msHost.getState(), maintenanceInitiatedForMS, shutdownTriggeredForMS, pendingJobCount == 0, pendingJobCount, agentsCount, agents);
}
@Override
public List<Class<?>> getCommands() {
final List<Class<?>> cmdList = new ArrayList<>();
cmdList.add(PrepareForMaintenanceCmd.class);
cmdList.add(CancelMaintenanceCmd.class);
cmdList.add(PrepareForShutdownCmd.class);
cmdList.add(CancelShutdownCmd.class);
cmdList.add(ReadyForShutdownCmd.class);
cmdList.add(TriggerShutdownCmd.class);
return cmdList;
}
@Override
public String getConfigComponentName() {
return ManagementServerMaintenanceManager.class.getSimpleName();
}
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[]{
ManagementServerMaintenanceTimeoutInMins
};
}
private final class CheckPendingJobsTask extends ManagedContextRunnable {
private ManagementServerMaintenanceManager managementServerMaintenanceManager;
private boolean agentsTransferTriggered = false;
public CheckPendingJobsTask(ManagementServerMaintenanceManager managementServerMaintenanceManager) {
this.managementServerMaintenanceManager = managementServerMaintenanceManager;
}
@Override
protected void runInContext() {
try {
// If the maintenance or shutdown has been cancelled
if (!(managementServerMaintenanceManager.isPreparingForMaintenance() || managementServerMaintenanceManager.isPreparingForShutdown())) {
logger.info("Maintenance/Shutdown cancelled, terminating the pending jobs check timer task");
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
if (managementServerMaintenanceManager.isPreparingForMaintenance() && isMaintenanceWindowExpired()) {
logger.debug("Maintenance window timeout, terminating the pending jobs check timer task");
managementServerMaintenanceManager.cancelPreparingForMaintenance(null);
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
long totalPendingJobs = managementServerMaintenanceManager.countPendingJobs(ManagementServerNode.getManagementServerId());
int totalAgents = hostDao.countByMs(ManagementServerNode.getManagementServerId());
String msg = String.format("Checking for triggered maintenance or shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d] AgentsCount [%d]",
managementServerMaintenanceManager.isShutdownTriggered(), managementServerMaintenanceManager.isAsyncJobsEnabled(), totalPendingJobs, totalAgents);
logger.debug(msg);
if (totalPendingJobs > 0) {
logger.info(String.format("There are %d pending jobs, trying again later", totalPendingJobs));
return;
}
// No more pending jobs. Good to terminate
if (managementServerMaintenanceManager.isShutdownTriggered()) {
logger.info("MS is Shutting Down Now");
// update state to down ?
System.exit(0);
}
if (managementServerMaintenanceManager.isPreparingForMaintenance()) {
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (totalAgents == 0) {
logger.info("MS is in Maintenance Mode");
msHostDao.updateState(msHost.getId(), State.Maintenance);
managementServerMaintenanceManager.onMaintenance();
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
if (agentsTransferTriggered) {
logger.info(String.format("There are %d agents, trying again later", totalAgents));
return;
}
agentsTransferTriggered = true;
logger.info(String.format("Preparing for maintenance - migrating agents from management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
boolean agentsMigrated = indirectAgentLB.migrateAgents(msHost.getUuid(), ManagementServerNode.getManagementServerId(), managementServerMaintenanceManager.getLbAlgorithm(), remainingMaintenanceWindowInMs());
if (!agentsMigrated) {
logger.warn(String.format("Unable to prepare for maintenance, cannot migrate indirect agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost);
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
if(!agentMgr.transferDirectAgentsFromMS(msHost.getUuid(), ManagementServerNode.getManagementServerId(), remainingMaintenanceWindowInMs())) {
logger.warn(String.format("Unable to prepare for maintenance, cannot transfer direct agents on this management server node %d (id: %s)", ManagementServerNode.getManagementServerId(), msHost.getUuid()));
managementServerMaintenanceManager.cancelPreparingForMaintenance(msHost);
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
} else if (managementServerMaintenanceManager.isPreparingForShutdown()) {
logger.info("MS is Ready To Shutdown");
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
msHostDao.updateState(msHost.getId(), State.ReadyToShutDown);
managementServerMaintenanceManager.cancelWaitForPendingJobs();
return;
}
} catch (final Exception e) {
logger.error("Error trying to check/run pending jobs task", e);
}
}
private boolean isMaintenanceWindowExpired() {
long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime();
if (maintenanceElapsedTimeInMs >= (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000)) {
return true;
}
return false;
}
private long remainingMaintenanceWindowInMs() {
long maintenanceElapsedTimeInMs = System.currentTimeMillis() - managementServerMaintenanceManager.getMaintenanceStartTime();
long remainingMaintenanceWindowTimeInMs = (ManagementServerMaintenanceTimeoutInMins.value().longValue() * 60 * 1000) - maintenanceElapsedTimeInMs;
return (remainingMaintenanceWindowTimeInMs > 0) ? remainingMaintenanceWindowTimeInMs : 0;
}
}
}

View File

@ -16,7 +16,7 @@
// under the License.
package org.apache.cloudstack.shutdown.command;
package org.apache.cloudstack.maintenance.command;
import com.cloud.agent.api.Command;

View File

@ -0,0 +1,26 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.maintenance.command;
public class CancelMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {
public CancelMaintenanceManagementServerHostCommand(long msId) {
super(msId);
}
}

View File

@ -16,7 +16,7 @@
// under the License.
package org.apache.cloudstack.shutdown.command;
package org.apache.cloudstack.maintenance.command;
public class CancelShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {

View File

@ -0,0 +1,36 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.maintenance.command;
public class PrepareForMaintenanceManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {
String lbAlgorithm;
public PrepareForMaintenanceManagementServerHostCommand(long msId) {
super(msId);
}
public PrepareForMaintenanceManagementServerHostCommand(long msId, String lbAlgorithm) {
super(msId);
this.lbAlgorithm = lbAlgorithm;
}
public String getLbAlgorithm() {
return lbAlgorithm;
}
}

View File

@ -16,7 +16,7 @@
// under the License.
package org.apache.cloudstack.shutdown.command;
package org.apache.cloudstack.maintenance.command;
public class PrepareForShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {

View File

@ -16,7 +16,7 @@
// under the License.
package org.apache.cloudstack.shutdown.command;
package org.apache.cloudstack.maintenance.command;
public class TriggerShutdownManagementServerHostCommand extends BaseShutdownManagementServerHostCommand {

View File

@ -14,5 +14,5 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
name=shutdown
name=maintenance
parent=api

View File

@ -22,8 +22,8 @@
http://www.springframework.org/schema/beans/spring-beans.xsd"
>
<bean id="shutdownManager" class="org.apache.cloudstack.shutdown.ShutdownManagerImpl" >
<property name="name" value="shutdownManager" />
<bean id="managementServerMaintenanceManager" class="org.apache.cloudstack.maintenance.ManagementServerMaintenanceManagerImpl" >
<property name="name" value="managementServerMaintenanceManager" />
</bean>
</beans>

View File

@ -15,18 +15,15 @@
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.shutdown;
package org.apache.cloudstack.maintenance;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
import org.mockito.Spy;
import org.mockito.junit.MockitoJUnitRunner;
@ -34,20 +31,14 @@ import com.cloud.utils.exception.CloudRuntimeException;
@RunWith(MockitoJUnitRunner.class)
public class ShutdownManagerImplTest {
public class ManagementServerMaintenanceManagerImplTest {
@Spy
@InjectMocks
ShutdownManagerImpl spy;
ManagementServerMaintenanceManagerImpl spy;
@Mock
AsyncJobManager jobManagerMock;
private AutoCloseable closeable;
@Before
public void setUp() throws Exception {
closeable = MockitoAnnotations.openMocks(this);
}
private long prepareCountPendingJobs() {
long expectedCount = 1L;
@ -79,14 +70,8 @@ public class ShutdownManagerImplTest {
spy.prepareForShutdown();
});
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
spy.cancelShutdown();
Mockito.verify(jobManagerMock).enableAsyncJobs();
}
@After
public void tearDown() throws Exception {
closeable.close();
}
}

View File

@ -20,6 +20,8 @@ package org.apache.cloudstack.api;
* metric local api constants
*/
public interface MetricConstants {
String LAST_AGENTS = "lastagents";
String AGENTS = "agents";
String AGENT_COUNT = "agentcount";
String AVAILABLE_PROCESSORS = "availableprocessors";
String CONNECTIONS = "connections";

View File

@ -895,6 +895,8 @@ public class MetricsServiceImpl extends MutualExclusiveIdsManagerBase implements
metricsResponse.setDbLocal(status.isDbLocal());
metricsResponse.setUsageLocal(status.isUsageLocal());
metricsResponse.setAvailableProcessors(status.getAvailableProcessors());
metricsResponse.setLastAgents(status.getLastAgents());
metricsResponse.setAgents(status.getAgents());
metricsResponse.setAgentCount(status.getAgentCount());
metricsResponse.setCollectionTime(status.getCollectionTime());
metricsResponse.setSessions(status.getSessions());

View File

@ -22,6 +22,7 @@ import org.apache.cloudstack.api.MetricConstants;
import org.apache.cloudstack.api.response.ManagementServerResponse;
import java.util.Date;
import java.util.List;
public class ManagementServerMetricsResponse extends ManagementServerResponse {
@ -29,6 +30,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse {
@Param(description = "the number of processors available to the JVM")
private Integer availableProcessors;
@SerializedName(MetricConstants.LAST_AGENTS)
@Param(description = "the last agents this Management Server is responsible for, before preparing for maintenance", since = "4.18.1")
private List<String> lastAgents;
@SerializedName(MetricConstants.AGENTS)
@Param(description = "the agents this Management Server is responsible for", since = "4.18.1")
private List<String> agents;
@SerializedName(MetricConstants.AGENT_COUNT)
@Param(description = "the number of agents this Management Server is responsible for")
private Integer agentCount;
@ -121,6 +130,14 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse {
this.availableProcessors = availableProcessors;
}
public void setLastAgents(List<String> lastAgents) {
this.lastAgents = lastAgents;
}
public void setAgents(List<String> agents) {
this.agents = agents;
}
public void setAgentCount(int agentCount) {
this.agentCount = agentCount;
}

View File

@ -118,7 +118,7 @@
<module>outofbandmanagement-drivers/nested-cloudstack</module>
<module>outofbandmanagement-drivers/redfish</module>
<module>shutdown</module>
<module>maintenance</module>
<module>storage/sharedfs/storagevm</module>
<module>storage/image/default</module>

View File

@ -1,60 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.shutdown;
import org.apache.cloudstack.api.command.CancelShutdownCmd;
import org.apache.cloudstack.api.command.PrepareForShutdownCmd;
import org.apache.cloudstack.api.command.ReadyForShutdownCmd;
import org.apache.cloudstack.api.command.TriggerShutdownCmd;
import org.apache.cloudstack.api.response.ReadyForShutdownResponse;
public interface ShutdownManager {
// Returns the number of pending jobs for the given Management server msids.
// NOTE: This is the msid and NOT the id
long countPendingJobs(Long... msIds);
// Indicates whether a shutdown has been triggered on the current management server
boolean isShutdownTriggered();
// Indicates whether the current management server is preparing to shutdown
boolean isPreparingForShutdown();
// Triggers a shutdown on the current management server by not accepting any more async jobs and shutting down when there are no pending jobs
void triggerShutdown();
// Prepares the current management server to shutdown by not accepting any more async jobs
void prepareForShutdown();
// Cancels the shutdown on the current management server
void cancelShutdown();
// Returns whether the given ms can be shut down
ReadyForShutdownResponse readyForShutdown(Long managementserverid);
// Returns whether the any of the ms can be shut down and if a shutdown has been triggered on any running ms
ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd);
// Prepares the specified management server to shutdown by not accepting any more async jobs
ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd);
// Cancels the shutdown on the specified management server
ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd);
// Triggers a shutdown on the specified management server by not accepting any more async jobs and shutting down when there are no pending jobs
ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd);
}

View File

@ -1,265 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.shutdown;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
import javax.inject.Inject;
import org.apache.cloudstack.api.command.CancelShutdownCmd;
import org.apache.cloudstack.api.command.PrepareForShutdownCmd;
import org.apache.cloudstack.api.command.ReadyForShutdownCmd;
import org.apache.cloudstack.api.command.TriggerShutdownCmd;
import org.apache.cloudstack.api.response.ReadyForShutdownResponse;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.management.ManagementServerHost.State;
import org.apache.cloudstack.shutdown.command.CancelShutdownManagementServerHostCommand;
import org.apache.cloudstack.shutdown.command.PrepareForShutdownManagementServerHostCommand;
import org.apache.cloudstack.shutdown.command.TriggerShutdownManagementServerHostCommand;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import com.cloud.agent.api.Command;
import com.cloud.cluster.ClusterManager;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.serializer.GsonHelper;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.component.PluggableService;
import com.cloud.utils.exception.CloudRuntimeException;
import com.google.gson.Gson;
public class ShutdownManagerImpl extends ManagerBase implements ShutdownManager, PluggableService{
Gson gson;
@Inject
private AsyncJobManager jobManager;
@Inject
private ManagementServerHostDao msHostDao;
@Inject
private ClusterManager clusterManager;
private boolean shutdownTriggered = false;
private boolean preparingForShutdown = false;
private Timer timer = new Timer();
private TimerTask shutdownTask;
protected ShutdownManagerImpl() {
super();
gson = GsonHelper.getGson();
}
@Override
public boolean isShutdownTriggered() {
return shutdownTriggered;
}
@Override
public boolean isPreparingForShutdown() {
return preparingForShutdown;
}
@Override
public long countPendingJobs(Long... msIds) {
return jobManager.countPendingNonPseudoJobs(msIds);
}
@Override
public void triggerShutdown() {
if (this.shutdownTriggered) {
throw new CloudRuntimeException("A shutdown has already been triggered");
}
this.shutdownTriggered = true;
prepareForShutdown(true);
}
private void prepareForShutdown(boolean postTrigger) {
// Ensure we don't throw an error if triggering a shutdown after just preparing for it
if (!postTrigger && this.preparingForShutdown) {
throw new CloudRuntimeException("A shutdown has already been triggered");
}
this.preparingForShutdown = true;
jobManager.disableAsyncJobs();
if (this.shutdownTask != null) {
this.shutdownTask.cancel();
this.shutdownTask = null;
}
this.shutdownTask = new ShutdownTask(this);
long period = 30L * 1000;
long delay = period / 2;
logger.debug(String.format("Scheduling shutdown task with delay: %d and period: %d", delay, period));
timer.scheduleAtFixedRate(shutdownTask, delay, period);
}
@Override
public void prepareForShutdown() {
prepareForShutdown(false);
}
@Override
public void cancelShutdown() {
if (!this.preparingForShutdown) {
throw new CloudRuntimeException("A shutdown has not been triggered");
}
this.preparingForShutdown = false;
this.shutdownTriggered = false;
jobManager.enableAsyncJobs();
if (shutdownTask != null) {
shutdownTask.cancel();
}
shutdownTask = null;
}
@Override
public ReadyForShutdownResponse readyForShutdown(Long managementserverid) {
Long[] msIds = null;
boolean shutdownTriggeredAnywhere = false;
State[] shutdownTriggeredStates = {State.ShuttingDown, State.PreparingToShutDown, State.ReadyToShutDown};
if (managementserverid == null) {
List<ManagementServerHostVO> msHosts = msHostDao.listBy(shutdownTriggeredStates);
if (msHosts != null && !msHosts.isEmpty()) {
msIds = new Long[msHosts.size()];
for (int i = 0; i < msHosts.size(); i++) {
msIds[i] = msHosts.get(i).getMsid();
}
shutdownTriggeredAnywhere = !msHosts.isEmpty();
}
} else {
ManagementServerHostVO msHost = msHostDao.findById(managementserverid);
msIds = new Long[]{msHost.getMsid()};
shutdownTriggeredAnywhere = Arrays.asList(shutdownTriggeredStates).contains(msHost.getState());
}
long pendingJobCount = countPendingJobs(msIds);
return new ReadyForShutdownResponse(managementserverid, shutdownTriggeredAnywhere, pendingJobCount == 0, pendingJobCount);
}
@Override
public ReadyForShutdownResponse readyForShutdown(ReadyForShutdownCmd cmd) {
return readyForShutdown(cmd.getManagementServerId());
}
@Override
public ReadyForShutdownResponse prepareForShutdown(PrepareForShutdownCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
final Command[] cmds = new Command[1];
cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("PrepareForShutdownCmd result : " + result);
if (!result.contains("Success")) {
throw new CloudRuntimeException(result);
}
msHost.setState(State.PreparingToShutDown);
msHostDao.persist(msHost);
return readyForShutdown(cmd.getManagementServerId());
}
@Override
public ReadyForShutdownResponse triggerShutdown(TriggerShutdownCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
final Command[] cmds = new Command[1];
cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("TriggerShutdownCmd result : " + result);
if (!result.contains("Success")) {
throw new CloudRuntimeException(result);
}
msHost.setState(State.ShuttingDown);
msHostDao.persist(msHost);
return readyForShutdown(cmd.getManagementServerId());
}
@Override
public ReadyForShutdownResponse cancelShutdown(CancelShutdownCmd cmd) {
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
final Command[] cmds = new Command[1];
cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid());
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
logger.info("CancelShutdownCmd result : " + result);
if (!result.contains("Success")) {
throw new CloudRuntimeException(result);
}
msHost.setState(State.Up);
msHostDao.persist(msHost);
return readyForShutdown(cmd.getManagementServerId());
}
@Override
public List<Class<?>> getCommands() {
final List<Class<?>> cmdList = new ArrayList<>();
cmdList.add(CancelShutdownCmd.class);
cmdList.add(PrepareForShutdownCmd.class);
cmdList.add(ReadyForShutdownCmd.class);
cmdList.add(TriggerShutdownCmd.class);
return cmdList;
}
private final class ShutdownTask extends TimerTask {
private ShutdownManager shutdownManager;
public ShutdownTask(ShutdownManager shutdownManager) {
this.shutdownManager = shutdownManager;
}
@Override
public void run() {
try {
Long totalPendingJobs = shutdownManager.countPendingJobs(ManagementServerNode.getManagementServerId());
String msg = String.format("Checking for triggered shutdown... shutdownTriggered [%b] AllowAsyncJobs [%b] PendingJobCount [%d]",
shutdownManager.isShutdownTriggered(), shutdownManager.isPreparingForShutdown(), totalPendingJobs);
logger.info(msg);
// If the shutdown has been cancelled
if (!shutdownManager.isPreparingForShutdown()) {
logger.info("Shutdown cancelled. Terminating the shutdown timer task");
this.cancel();
return;
}
// No more pending jobs. Good to terminate
if (totalPendingJobs == 0) {
if (shutdownManager.isShutdownTriggered()) {
logger.info("Shutting down now");
System.exit(0);
}
if (shutdownManager.isPreparingForShutdown()) {
logger.info("Ready to shutdown");
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
msHost.setState(State.ReadyToShutDown);
msHostDao.persist(msHost);
}
}
logger.info("Pending jobs. Trying again later");
} catch (final Exception e) {
logger.error("Error trying to run shutdown task", e);
}
}
}
}

View File

@ -94,7 +94,7 @@ public class ApiDispatcher {
if (asyncJobManager.isAsyncJobsEnabled()) {
asyncCreationDispatchChain.dispatch(new DispatchTask(cmd, params));
} else {
throw new CloudRuntimeException("A shutdown has been triggered. Can not accept new jobs");
throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs");
}
}

View File

@ -57,6 +57,8 @@ import javax.naming.ConfigurationException;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.http.HttpSession;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.user.Account;
import com.cloud.user.AccountManager;
import com.cloud.user.AccountManagerImpl;
@ -113,6 +115,7 @@ import org.apache.cloudstack.framework.messagebus.MessageDispatcher;
import org.apache.cloudstack.framework.messagebus.MessageHandler;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.user.UserPasswordResetManager;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.lang3.EnumUtils;
import org.apache.http.ConnectionClosedException;
@ -222,6 +225,8 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer
@Inject
private ProjectDao projectDao;
@Inject
private ManagementServerHostDao msHostDao;
@Inject
private UUIDManager uuidMgr;
@Inject
private UserPasswordResetManager userPasswordResetManager;
@ -471,7 +476,6 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer
s_apiNameCmdClassMap.put(apiName, apiCmdList);
}
apiCmdList.add(cmdClass);
}
setEncodeApiResponse(EncodeApiResponse.value());
@ -1172,6 +1176,9 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer
if (ApiConstants.ISSUER_FOR_2FA.equalsIgnoreCase(attrName)) {
response.setIssuerFor2FA(attrObj.toString());
}
if (ApiConstants.MANAGEMENT_SERVER_ID.equalsIgnoreCase(attrName)) {
response.setManagementServerId(attrObj.toString());
}
}
}
response.setResponseName("loginresponse");
@ -1249,6 +1256,13 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer
session.setAttribute(ApiConstants.PROVIDER_FOR_2FA, userAcct.getUser2faProvider());
session.setAttribute(ApiConstants.ISSUER_FOR_2FA, issuerFor2FA);
if (accountMgr.isRootAdmin(userAcct.getAccountId())) {
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
if (msHost != null && msHost.getUuid() != null) {
session.setAttribute(ApiConstants.MANAGEMENT_SERVER_ID, msHost.getUuid());
}
}
// (bug 5483) generate a session key that the user must submit on every request to prevent CSRF, add that
// to the login response so that session-based authenticators know to send the key back
final SecureRandom sesssionKeyRandom = new SecureRandom();

View File

@ -143,6 +143,7 @@ import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.TemplateState;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementVO;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
@ -613,6 +614,8 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
@Inject
private ManagementServerHostPeerJoinDao mshostPeerJoinDao;
@Inject
private AsyncJobManager jobManager;
private SearchCriteria<ServiceOfferingJoinVO> getMinimumCpuServiceOfferingJoinSearchCriteria(int cpu) {
SearchCriteria<ServiceOfferingJoinVO> sc = _srvOfferingJoinDao.createSearchCriteria();
@ -2353,6 +2356,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
Long startIndex = cmd.getStartIndex();
Long pageSize = cmd.getPageSizeVal();
Hypervisor.HypervisorType hypervisorType = cmd.getHypervisor();
Long msId = cmd.getManagementServerId();
Filter searchFilter = new Filter(HostVO.class, "id", Boolean.TRUE, startIndex, pageSize);
@ -2368,6 +2372,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
hostSearchBuilder.and("clusterId", hostSearchBuilder.entity().getClusterId(), SearchCriteria.Op.EQ);
hostSearchBuilder.and("resourceState", hostSearchBuilder.entity().getResourceState(), SearchCriteria.Op.EQ);
hostSearchBuilder.and("hypervisor_type", hostSearchBuilder.entity().getHypervisorType(), SearchCriteria.Op.EQ);
hostSearchBuilder.and("mgmt_server_id", hostSearchBuilder.entity().getManagementServerId(), SearchCriteria.Op.EQ);
if (keyword != null) {
hostSearchBuilder.and().op("keywordName", hostSearchBuilder.entity().getName(), SearchCriteria.Op.LIKE);
@ -2448,6 +2453,13 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
sc.setParameters("hypervisor_type", hypervisorType);
}
if (msId != null) {
ManagementServerHostVO msHost = msHostDao.findById(msId);
if (msHost != null) {
sc.setParameters("mgmt_server_id", msHost.getMsid());
}
}
Pair<List<HostVO>, Integer> uniqueHostPair = hostDao.searchAndCount(sc, searchFilter);
Integer count = uniqueHostPair.second();
List<Long> hostIds = uniqueHostPair.first().stream().map(HostVO::getId).collect(Collectors.toList());
@ -5426,6 +5438,8 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer));
}
}
mgmtResponse.setAgentsCount((long) hostDao.countByMs(mgmt.getMsid()));
mgmtResponse.setPendingJobsCount(jobManager.countPendingNonPseudoJobs(mgmt.getMsid()));
mgmtResponse.setObjectName("managementserver");
return mgmtResponse;
}

View File

@ -20,6 +20,8 @@ import java.util.Date;
import java.util.List;
import javax.inject.Inject;
import org.springframework.stereotype.Component;
import org.apache.cloudstack.api.ResponseObject;
@ -29,6 +31,8 @@ import org.apache.cloudstack.framework.jobs.AsyncJob;
import com.cloud.api.ApiSerializerHelper;
import com.cloud.api.SerializationContext;
import com.cloud.api.query.vo.AsyncJobJoinVO;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
@ -36,6 +40,9 @@ import com.cloud.utils.db.SearchCriteria;
@Component
public class AsyncJobJoinDaoImpl extends GenericDaoBase<AsyncJobJoinVO, Long> implements AsyncJobJoinDao {
@Inject
private ManagementServerHostDao managementServerHostDao;
private final SearchBuilder<AsyncJobJoinVO> jobIdSearch;
protected AsyncJobJoinDaoImpl() {
@ -63,7 +70,13 @@ public class AsyncJobJoinDaoImpl extends GenericDaoBase<AsyncJobJoinVO, Long> im
jobResponse.setJobId(job.getUuid());
jobResponse.setJobStatus(job.getStatus());
jobResponse.setJobProcStatus(job.getProcessStatus());
jobResponse.setMsid(job.getExecutingMsid());
if (job.getExecutingMsid() != null) {
ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(job.getExecutingMsid());
if (managementServer != null) {
jobResponse.setManagementServerId(managementServer.getUuid());
jobResponse.setManagementServerName(managementServer.getName());
}
}
if (job.getInstanceType() != null && job.getInstanceId() != null) {
jobResponse.setJobInstanceType(job.getInstanceType().toString());

View File

@ -58,6 +58,8 @@ import com.cloud.storage.StorageStats;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.dao.VMInstanceDao;
@Component
public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements HostJoinDao {
@ -73,6 +75,8 @@ public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements
@Inject
private ManagementServerHostDao managementServerHostDao;
@Inject
private VMInstanceDao virtualMachineDao;
@Inject
private AnnotationDao annotationDao;
@Inject
private AccountManager accountManager;
@ -126,12 +130,19 @@ public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements
hostResponse.setHypervisor(hypervisorType);
}
hostResponse.setHostType(host.getType());
if (host.getType().equals(Host.Type.ConsoleProxy) || host.getType().equals(Host.Type.SecondaryStorageVM)) {
VMInstanceVO vm = virtualMachineDao.findVMByInstanceNameIncludingRemoved(host.getName());
if (vm != null) {
hostResponse.setVirtualMachineId(vm.getUuid());
}
}
hostResponse.setLastPinged(new Date(host.getLastPinged()));
Long mshostId = host.getManagementServerId();
if (mshostId != null) {
ManagementServerHostVO managementServer = managementServerHostDao.findByMsid(host.getManagementServerId());
if (managementServer != null) {
hostResponse.setManagementServerId(managementServer.getUuid());
hostResponse.setManagementServerName(managementServer.getName());
}
}
hostResponse.setName(host.getName());

View File

@ -85,21 +85,23 @@ public class SshKeysDistriMonitor implements Listener {
@Override
public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException {
if (cmd instanceof StartupRoutingCommand) {
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer ||
if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) {
return;
}
if (((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.KVM || ((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.XenServer ||
((StartupRoutingCommand)cmd).getHypervisorType() == HypervisorType.LXC) {
/*TODO: Get the private/public keys here*/
/*TODO: Get the private/public keys here*/
String pubKey = _configDao.getValue("ssh.publickey");
String prvKey = _configDao.getValue("ssh.privatekey");
String pubKey = _configDao.getValue("ssh.publickey");
String prvKey = _configDao.getValue("ssh.privatekey");
try {
ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey);
Commands c = new Commands(cmds);
_agentMgr.send(host.getId(), c, this);
} catch (AgentUnavailableException e) {
logger.debug("Failed to send keys to agent: {}", host);
}
try {
ModifySshKeysCommand cmds = new ModifySshKeysCommand(pubKey, prvKey);
Commands c = new Commands(cmds);
_agentMgr.send(host.getId(), c, this);
} catch (AgentUnavailableException e) {
logger.debug("Failed to send keys to agent: {}", host);
}
}
}

View File

@ -164,22 +164,23 @@ public class SecurityGroupListener implements Listener {
if (logger.isInfoEnabled())
logger.info("Received a host startup notification");
if (cmd instanceof StartupRoutingCommand) {
//if (Boolean.toString(true).equals(host.getDetail("can_bridge_firewall"))) {
try {
int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2);
CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval);
Commands c = new Commands(cleanupCmd);
_agentMgr.send(host.getId(), c, this);
if (logger.isInfoEnabled())
logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval());
} catch (AgentUnavailableException e) {
//usually hypervisors that do not understand sec group rules.
logger.debug("Unable to schedule network rules cleanup for host {}", host, e);
}
if (_workTracker != null) {
_workTracker.processConnect(host.getId());
}
if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) {
return;
}
try {
int interval = MIN_TIME_BETWEEN_CLEANUPS + _cleanupRandom.nextInt(MIN_TIME_BETWEEN_CLEANUPS / 2);
CleanupNetworkRulesCmd cleanupCmd = new CleanupNetworkRulesCmd(interval);
Commands c = new Commands(cleanupCmd);
_agentMgr.send(host.getId(), c, this);
if (logger.isInfoEnabled())
logger.info("Scheduled network rules cleanup, interval=" + cleanupCmd.getInterval());
} catch (AgentUnavailableException e) {
//usually hypervisors that do not understand sec group rules.
logger.debug("Unable to schedule network rules cleanup for host {}", host, e);
}
if (_workTracker != null) {
_workTracker.processConnect(host.getId());
}
}

View File

@ -47,9 +47,9 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd;
import org.apache.cloudstack.api.command.admin.host.AddHostCmd;
import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd;
@ -1284,7 +1284,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
@Override
public Host cancelMaintenance(final CancelMaintenanceCmd cmd) {
public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) {
final Long hostId = cmd.getId();
// verify input parameters
@ -1501,7 +1501,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
@Override
public Host maintain(final PrepareForMaintenanceCmd cmd) {
public Host maintain(final PrepareForHostMaintenanceCmd cmd) {
final Long hostId = cmd.getId();
final HostVO host = _hostDao.findById(hostId);
@ -2508,13 +2508,17 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
}
private Host createHostAndAgent(final ServerResource resource, final Map<String, String> details, final boolean old, final List<String> hostTags, final boolean forRebalance) {
return createHostAndAgent(resource, details, old, hostTags, forRebalance, false);
}
private Host createHostAndAgent(final ServerResource resource, final Map<String, String> details, final boolean old, final List<String> hostTags, final boolean forRebalance, final boolean isTransferredConnection) {
HostVO host = null;
StartupCommand[] cmds = null;
boolean hostExists = false;
boolean created = false;
try {
cmds = resource.initialize();
cmds = resource.initialize(isTransferredConnection);
if (cmds == null) {
logger.info("Unable to fully initialize the agent because no StartupCommands are returned");
return null;
@ -2685,7 +2689,12 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
@Override
public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map<String, String> details, final boolean old, final List<String> hostTags, final boolean forRebalance) {
final Host host = createHostAndAgent(resource, details, old, hostTags, forRebalance);
return createHostAndAgent(hostId, resource, details, old, hostTags, forRebalance, false);
}
@Override
public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map<String, String> details, final boolean old, final List<String> hostTags, final boolean forRebalance, boolean isTransferredConnection) {
final Host host = createHostAndAgent(resource, details, old, hostTags, forRebalance, isTransferredConnection);
return host;
}

View File

@ -32,7 +32,7 @@ import javax.naming.ConfigurationException;
import org.apache.cloudstack.affinity.AffinityGroupProcessor;
import org.apache.cloudstack.api.ApiCommandResourceType;
import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.framework.config.ConfigKey;
@ -405,7 +405,7 @@ public class RollingMaintenanceManagerImpl extends ManagerBase implements Rollin
*/
private void putHostIntoMaintenance(Host host) throws InterruptedException, AgentUnavailableException {
logger.debug(String.format("Trying to set %s into maintenance", host));
PrepareForMaintenanceCmd cmd = new PrepareForMaintenanceCmd();
PrepareForHostMaintenanceCmd cmd = new PrepareForHostMaintenanceCmd();
cmd.setId(host.getId());
resourceManager.maintain(cmd);
waitForHostInMaintenance(host.getId());

View File

@ -19,6 +19,7 @@
package com.cloud.server;
import java.util.Date;
import java.util.List;
public class ManagementServerHostStatsEntry implements ManagementServerHostStats {
@ -45,6 +46,8 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats
private String jvmVendor;
private String jvmVersion;
private String osDistribution;
private List<String> lastAgents;
private List<String> agents;
private int agentCount;
private long heapMemoryUsed;
@ -199,6 +202,16 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats
return osDistribution;
}
@Override
public List<String> getLastAgents() {
return lastAgents;
}
@Override
public List<String> getAgents() {
return agents;
}
@Override
public int getAgentCount() {
return agentCount;
@ -290,6 +303,14 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats
this.osDistribution = osDistribution;
}
public void setLastAgents(List<String> lastAgents) {
this.lastAgents = lastAgents;
}
public void setAgents(List<String> agents) {
this.agents = agents;
}
public void setAgentCount(int agentCount) {
this.agentCount = agentCount;
}

View File

@ -101,13 +101,13 @@ import org.apache.cloudstack.api.command.admin.guest.UpdateGuestOsMappingCmd;
import org.apache.cloudstack.api.command.admin.host.AddHostCmd;
import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.DeleteHostCmd;
import org.apache.cloudstack.api.command.admin.host.FindHostsForMigrationCmd;
import org.apache.cloudstack.api.command.admin.host.ListHostTagsCmd;
import org.apache.cloudstack.api.command.admin.host.ListHostsCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd;
import org.apache.cloudstack.api.command.admin.host.ReleaseHostReservationCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd;
@ -3508,14 +3508,14 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe
cmdList.add(MoveDomainCmd.class);
cmdList.add(AddHostCmd.class);
cmdList.add(AddSecondaryStorageCmd.class);
cmdList.add(CancelMaintenanceCmd.class);
cmdList.add(CancelHostMaintenanceCmd.class);
cmdList.add(CancelHostAsDegradedCmd.class);
cmdList.add(DeclareHostAsDegradedCmd.class);
cmdList.add(DeleteHostCmd.class);
cmdList.add(ListHostsCmd.class);
cmdList.add(ListHostTagsCmd.class);
cmdList.add(FindHostsForMigrationCmd.class);
cmdList.add(PrepareForMaintenanceCmd.class);
cmdList.add(PrepareForHostMaintenanceCmd.class);
cmdList.add(ReconnectHostCmd.class);
cmdList.add(UpdateHostCmd.class);
cmdList.add(UpdateHostPasswordCmd.class);

View File

@ -829,6 +829,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
}
private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) {
newEntry.setLastAgents(_agentMgr.getLastAgents());
List<String> agents = _hostDao.listByMs(msid);
newEntry.setAgents(agents);
int count = _hostDao.countByMs(msid);
newEntry.setAgentCount(count);
}

View File

@ -95,49 +95,51 @@ public class StoragePoolMonitor implements Listener {
@Override
public void processConnect(Host host, StartupCommand cmd, boolean forRebalance) throws ConnectionException {
if (cmd instanceof StartupRoutingCommand) {
StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd;
if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM ||
if (!(cmd instanceof StartupRoutingCommand) || cmd.isConnectionTransferred()) {
return;
}
StartupRoutingCommand scCmd = (StartupRoutingCommand)cmd;
if (scCmd.getHypervisorType() == HypervisorType.XenServer || scCmd.getHypervisorType() == HypervisorType.KVM ||
scCmd.getHypervisorType() == HypervisorType.VMware || scCmd.getHypervisorType() == HypervisorType.Simulator ||
scCmd.getHypervisorType() == HypervisorType.Ovm || scCmd.getHypervisorType() == HypervisorType.Hyperv ||
scCmd.getHypervisorType() == HypervisorType.LXC || scCmd.getHypervisorType() == HypervisorType.Ovm3) {
List<StoragePoolVO> pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER);
List<StoragePoolVO> zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false);
List<StoragePoolVO> zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType());
zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor);
pools.addAll(zoneStoragePoolsByTags);
List<StoragePoolVO> zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any);
pools.addAll(zoneStoragePoolsByAnyHypervisor);
List<StoragePoolVO> pools = _poolDao.listBy(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER);
List<StoragePoolVO> zoneStoragePoolsByTags = _poolDao.findZoneWideStoragePoolsByTags(host.getDataCenterId(), null, false);
List<StoragePoolVO> zoneStoragePoolsByHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), scCmd.getHypervisorType());
zoneStoragePoolsByTags.retainAll(zoneStoragePoolsByHypervisor);
pools.addAll(zoneStoragePoolsByTags);
List<StoragePoolVO> zoneStoragePoolsByAnyHypervisor = _poolDao.findZoneWideStoragePoolsByHypervisor(host.getDataCenterId(), HypervisorType.Any);
pools.addAll(zoneStoragePoolsByAnyHypervisor);
// get the zone wide disabled pools list if global setting is true.
if (StorageManager.MountDisabledStoragePool.value()) {
pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE));
// get the zone wide disabled pools list if global setting is true.
if (StorageManager.MountDisabledStoragePool.value()) {
pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), null, null, ScopeType.ZONE));
}
// get the cluster wide disabled pool list
if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) {
pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER));
}
for (StoragePoolVO pool : pools) {
if (!pool.isShared()) {
continue;
}
// get the cluster wide disabled pool list
if (StorageManager.MountDisabledStoragePool.valueIn(host.getClusterId())) {
pools.addAll(_poolDao.findDisabledPoolsByScope(host.getDataCenterId(), host.getPodId(), host.getClusterId(), ScopeType.CLUSTER));
if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) {
throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool));
}
for (StoragePoolVO pool : pools) {
if (!pool.isShared()) {
continue;
}
if (pool.getPoolType() == StoragePoolType.OCFS2 && !_ocfs2Mgr.prepareNodes(pool.getClusterId())) {
throw new ConnectionException(true, String.format("Unable to prepare OCFS2 nodes for pool %s", pool));
}
Long hostId = host.getId();
if (logger.isDebugEnabled()) {
logger.debug("Host {} connected, connecting host to shared pool {} and sending storage pool information ...", host, pool);
}
try {
_storageManager.connectHostToSharedPool(host, pool.getId());
_storageManager.createCapacityEntry(pool.getId());
} catch (Exception e) {
throw new ConnectionException(true, String.format("Unable to connect host %s to storage pool %s due to %s", host, pool, e.toString()), e);
}
Long hostId = host.getId();
if (logger.isDebugEnabled()) {
logger.debug("Host {} connected, connecting host to shared pool {} and sending storage pool information ...", host, pool);
}
try {
_storageManager.connectHostToSharedPool(host, pool.getId());
_storageManager.createCapacityEntry(pool.getId());
} catch (Exception e) {
throw new ConnectionException(true, String.format("Unable to connect host %s to storage pool %s due to %s", host, pool, e.toString()), e);
}
}
}

View File

@ -37,6 +37,11 @@ import org.apache.cloudstack.framework.config.Configurable;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.MigrateAgentConnectionCommand;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.dc.DataCenterVO;
import com.cloud.dc.dao.DataCenterDao;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.dao.HostDao;
@ -44,6 +49,8 @@ import com.cloud.hypervisor.Hypervisor;
import com.cloud.resource.ResourceState;
import com.cloud.utils.component.ComponentLifecycleBase;
import com.cloud.utils.exception.CloudRuntimeException;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implements IndirectAgentLB, Configurable {
@ -63,14 +70,35 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
@Inject
private HostDao hostDao;
@Inject
private DataCenterDao dcDao;
@Inject
private ManagementServerHostDao mshostDao;
@Inject
private AgentManager agentManager;
//////////////////////////////////////////////////////
/////////////// Agent MSLB Methods ///////////////////
//////////////////////////////////////////////////////
@Override
public List<String> getManagementServerList() {
final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value();
if (StringUtils.isEmpty(msServerAddresses)) {
throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting",
ApiServiceConfiguration.ManagementServerAddresses.key()));
}
List<String> msList = new ArrayList<>(Arrays.asList(msServerAddresses.replace(" ", "").split(",")));
return msList;
}
@Override
public List<String> getManagementServerList(final Long hostId, final Long dcId, final List<Long> orderedHostIdList) {
return getManagementServerList(hostId, dcId, orderedHostIdList, null);
}
@Override
public List<String> getManagementServerList(final Long hostId, final Long dcId, final List<Long> orderedHostIdList, String lbAlgorithm) {
final String msServerAddresses = ApiServiceConfiguration.ManagementServerAddresses.value();
if (StringUtils.isEmpty(msServerAddresses)) {
throw new CloudRuntimeException(String.format("No management server addresses are defined in '%s' setting",
@ -90,7 +118,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
hostIdList.add(hostId);
}
final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm();
final org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm algorithm = getAgentMSLBAlgorithm(lbAlgorithm);
final List<String> msList = Arrays.asList(msServerAddresses.replace(" ", "").split(","));
return algorithm.sort(msList, hostIdList, hostId);
}
@ -146,6 +174,30 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
return agentBasedHosts;
}
private List<Host> getAllAgentBasedHosts(long msId) {
final List<HostVO> allHosts = hostDao.listHostsByMs(msId);
if (allHosts == null) {
return new ArrayList<>();
}
final List <Host> agentBasedHosts = new ArrayList<>();
for (final Host host : allHosts) {
conditionallyAddHost(agentBasedHosts, host);
}
return agentBasedHosts;
}
private List<Host> getAllAgentBasedHostsInDc(long msId, long dcId) {
final List<HostVO> allHosts = hostDao.listHostsByMsAndDc(msId, dcId);
if (allHosts == null) {
return new ArrayList<>();
}
final List <Host> agentBasedHosts = new ArrayList<>();
for (final Host host : allHosts) {
conditionallyAddHost(agentBasedHosts, host);
}
return agentBasedHosts;
}
private void conditionallyAddHost(List<Host> agentBasedHosts, Host host) {
if (host == null) {
if (logger.isTraceEnabled()) {
@ -191,13 +243,33 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
agentBasedHosts.add(host);
}
@Override
public boolean haveAgentBasedHosts(long msId) {
return CollectionUtils.isNotEmpty(getAllAgentBasedHosts(msId));
}
private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm() {
final String algorithm = getLBAlgorithmName();
if (algorithmMap.containsKey(algorithm)) {
return algorithmMap.get(algorithm);
return getAgentMSLBAlgorithm(null);
}
private org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm getAgentMSLBAlgorithm(String lbAlgorithm) {
boolean algorithmNameFromConfig = false;
if (StringUtils.isEmpty(lbAlgorithm)) {
lbAlgorithm = getLBAlgorithmName();
algorithmNameFromConfig = true;
}
if (algorithmMap.containsKey(lbAlgorithm)) {
return algorithmMap.get(lbAlgorithm);
}
throw new CloudRuntimeException(String.format("Algorithm %s%s not found, valid values are: %s",
lbAlgorithm, algorithmNameFromConfig? " configured for '" + IndirectAgentLBAlgorithm.key() + "'" : "", algorithmMap.keySet()));
}
@Override
public void checkLBAlgorithmName(String lbAlgorithm) {
if (!algorithmMap.containsKey(lbAlgorithm)) {
throw new CloudRuntimeException(String.format("Invalid algorithm %s, valid values are: %s", lbAlgorithm, algorithmMap.keySet()));
}
throw new CloudRuntimeException(String.format("Algorithm configured for '%s' not found, valid values are: %s",
IndirectAgentLBAlgorithm.key(), algorithmMap.keySet()));
}
////////////////////////////////////////////////////////////
@ -224,6 +296,73 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
}
}
@Override
public boolean migrateAgents(String fromMsUuid, long fromMsId, String lbAlgorithm, long timeoutDurationInMs) {
if (timeoutDurationInMs <= 0) {
logger.debug(String.format("Not migrating indirect agents from management server node %d (id: %s) to other nodes, invalid timeout duration", fromMsId, fromMsUuid));
return false;
}
logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid));
long migrationStartTime = System.currentTimeMillis();
if (!haveAgentBasedHosts(fromMsId)) {
logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid));
return true;
}
boolean lbAlgorithmChanged = false;
if (StringUtils.isNotBlank(lbAlgorithm) && !lbAlgorithm.equalsIgnoreCase(getLBAlgorithmName())) {
logger.debug(String.format("Indirect agent lb algorithm changed to %s", lbAlgorithm));
lbAlgorithmChanged = true;
}
final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs();
ManagementServerHostVO ms = mshostDao.findByMsid(fromMsId);
if (ms != null && !avoidMsList.contains(ms.getServiceIP())) {
avoidMsList.add(ms.getServiceIP());
}
List<DataCenterVO> dataCenterList = dcDao.listAll();
for (DataCenterVO dc : dataCenterList) {
Long dcId = dc.getId();
List<Long> orderedHostIdList = getOrderedHostIdList(dcId);
List<Host> agentBasedHostsOfMsInDc = getAllAgentBasedHostsInDc(fromMsId, dcId);
if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDc)) {
continue;
}
logger.debug(String.format("Migrating %d indirect agents from management server node %d (id: %s) of zone %s", agentBasedHostsOfMsInDc.size(), fromMsId, fromMsUuid, dc.toString()));
for (final Host host : agentBasedHostsOfMsInDc) {
long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTime;
if (migrationElapsedTimeInMs >= timeoutDurationInMs) {
logger.debug(String.format("Stop migrating remaining indirect agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid));
return false;
}
List<String> msList = null;
Long lbCheckInterval = 0L;
if (lbAlgorithmChanged) {
// send new MS list when there is change in lb algorithm
msList = getManagementServerList(host.getId(), dcId, orderedHostIdList, lbAlgorithm);
lbCheckInterval = getLBPreferredHostCheckInterval(host.getClusterId());
}
final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
agentManager.easySend(host.getId(), cmd); //answer not received as the agent disconnects and reconnects to other ms
updateLastManagementServer(host.getId(), fromMsId);
}
}
return true;
}
private void updateLastManagementServer(long hostId, long msId) {
HostVO hostVO = hostDao.findById(hostId);
if (hostVO != null) {
hostVO.setLastManagementServerId(msId);
hostDao.update(hostId, hostVO);
}
}
private void configureAlgorithmMap() {
final List<org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm> algorithms = new ArrayList<>();
algorithms.add(new IndirectAgentLBStaticAlgorithm());

View File

@ -268,8 +268,8 @@
<property name="name" value="ApiAsyncJobDispatcher" />
</bean>
<bean id="shutdownManager" class="org.apache.cloudstack.shutdown.ShutdownManagerImpl" >
<property name="name" value="shutdownManager" />
<bean id="managementServerMaintenanceManager" class="org.apache.cloudstack.maintenance.ManagementServerMaintenanceManagerImpl" >
<property name="name" value="managementServerMaintenanceManager" />
</bean>
<bean id="statsCollector" class="com.cloud.server.StatsCollector" />

View File

@ -46,9 +46,9 @@ import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd;
import org.apache.cloudstack.api.command.admin.host.AddHostCmd;
import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.CancelHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.PrepareForHostMaintenanceCmd;
import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd;
import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd;
@ -79,7 +79,7 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
* @see com.cloud.resource.ResourceService#cancelMaintenance(com.cloud.api.commands.CancelMaintenanceCmd)
*/
@Override
public Host cancelMaintenance(final CancelMaintenanceCmd cmd) {
public Host cancelMaintenance(final CancelHostMaintenanceCmd cmd) {
// TODO Auto-generated method stub
return null;
}
@ -142,7 +142,7 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
* @see com.cloud.resource.ResourceService#maintain(com.cloud.api.commands.PrepareForMaintenanceCmd)
*/
@Override
public Host maintain(final PrepareForMaintenanceCmd cmd) {
public Host maintain(final PrepareForHostMaintenanceCmd cmd) {
// TODO Auto-generated method stub
return null;
}
@ -250,6 +250,15 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana
return null;
}
/* (non-Javadoc)
* @see com.cloud.resource.ResourceManager#createHostAndAgent(java.lang.Long, com.cloud.resource.ServerResource, java.util.Map, boolean, java.util.List, boolean, boolean)
*/
@Override
public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map<String, String> details, final boolean old, final List<String> hostTags, final boolean forRebalance, boolean isTransferredConnection) {
// TODO Auto-generated method stub
return null;
}
/* (non-Javadoc)
* @see com.cloud.resource.ResourceManager#addHost(long, com.cloud.resource.ServerResource, com.cloud.host.Host.Type, java.util.Map)
*/

View File

@ -76,4 +76,5 @@
<bean id="vlanDetailsDao" class="com.cloud.dc.dao.VlanDetailsDaoImpl" />
<bean id="publicIpQuarantineDaoImpl" class="com.cloud.network.dao.PublicIpQuarantineDaoImpl" />
<bean id="reservationDao" class="org.apache.cloudstack.reservation.dao.ReservationDaoImpl" />
<bean id="managementServerHostDao" class="com.cloud.cluster.dao.ManagementServerHostDaoImpl" />
</beans>

View File

@ -22,19 +22,19 @@ from marvin.lib.utils import *
from marvin.lib.base import *
from marvin.lib.common import *
class TestSafeShutdown(cloudstackTestCase):
class TestMSMaintenanceAndSafeShutdown(cloudstackTestCase):
"""
Tests safely shutting down the Management Server
Tests MS maintenance and safe shutting down the Management Server
"""
def setUp(self):
self.apiclient = self.testClient.getApiClient()
self.hypervisor = self.testClient.getHypervisorInfo()
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
self.cleanup = []
def tearDown(self):
self.startServer()
super(TestSafeShutdown, self).tearDown()
super(TestMSMaintenanceAndSafeShutdown, self).tearDown()
def isServerShutdown(self):
sshClient = SshClient(
@ -87,8 +87,61 @@ class TestSafeShutdown(cloudstackTestCase):
{"name": "test", "displaytext": "test"}
)
def getActiveManagementServers(self):
cmd = listManagementServers.listManagementServersCmd()
servers = self.apiclient.listManagementServers(cmd)
active_servers = []
for idx, server in enumerate(servers):
if server.state == 'Up':
active_servers.append(server.serviceip)
return active_servers
@attr(tags=["advanced", "smoke"])
def test_01_prepare_and_cancel_shutdown(self):
def test_01_prepare_and_cancel_maintenance(self):
active_management_servers = self.getActiveManagementServers()
if len(active_management_servers) <= 1:
self.skipTest("Skipping test case, this test is intended for only multiple management servers")
hypervisor = self.hypervisor.lower()
if hypervisor == 'kvm':
list_configurations_cmd = listConfigurations.listConfigurationsCmd()
list_configurations_cmd.name = "host"
list_configurations_response = self.apiclient.listConfigurations(list_configurations_cmd)
self.assertNotEqual(len(list_configurations_response), 0,
"Check if the list configurations API returns a non-empty response")
for item in list_configurations_response:
if item.name == list_configurations_cmd.name:
host_config = item
hosts = host_config.value.split(",")
if len(hosts) <= 1:
self.skipTest(
"Skipping test case, this test is intended for only multiple management server hosts configured on host setting for kvm")
try :
prepare_for_maintenance_cmd = prepareForMaintenance.prepareForMaintenanceCmd()
prepare_for_maintenance_cmd.managementserverid = 1
response = self.apiclient.prepareForMaintenance(prepare_for_maintenance_cmd)
self.assertEqual(
response.maintenanceinitiated,
True,
"Failed to prepare for maintenance"
)
try :
self.run_async_cmd()
except Exception as e:
self.debug("Prepare for maintenance check successful, API failure: %s" % e)
finally :
cancel_maintenance_cmd = cancelMaintenance.cancelMaintenanceCmd()
cancel_maintenance_cmd.managementserverid = 1
self.apiclient.cancelMaintenance(cancel_maintenance_cmd)
## Just to be sure, run another async command
project = self.run_async_cmd()
self.cleanup.append(project)
@attr(tags=["advanced", "smoke"])
def test_02_prepare_and_cancel_shutdown(self):
try :
prepare_for_shutdown_cmd = prepareForShutdown.prepareForShutdownCmd()
prepare_for_shutdown_cmd.managementserverid = 1
@ -111,7 +164,7 @@ class TestSafeShutdown(cloudstackTestCase):
self.cleanup.append(project)
@attr(tags=["advanced", "smoke"])
def test_02_trigger_shutdown(self):
def test_03_trigger_shutdown(self):
try :
cmd = triggerShutdown.triggerShutdownCmd()
cmd.managementserverid = 1

View File

@ -233,7 +233,8 @@ known_categories = {
'listQuarantinedIp': 'IP Quarantine',
'updateQuarantinedIp': 'IP Quarantine',
'removeQuarantinedIp': 'IP Quarantine',
'Shutdown': 'Management',
'Shutdown': 'Maintenance',
'Maintenance': 'Maintenance',
'addObjectStoragePool': 'Object Store',
'listObjectStoragePools': 'Object Store',
'deleteObjectStoragePool': 'Object Store',

View File

@ -349,6 +349,7 @@
"label.agent.username": "Agent username",
"label.agentport": "Agent port",
"label.agentstate": "Agent state",
"label.agentscount": "Number Of connected agents",
"label.agree": "Agree",
"label.alert": "Alert",
"label.alert.details": "Alert details",
@ -470,7 +471,7 @@
"label.cachemode": "Write-cache type",
"label.cancel": "Cancel",
"label.cancel.shutdown": "Cancel Shutdown",
"label.cancelmaintenance": "Cancel maintenance",
"label.cancel.maintenance": "Cancel Maintenance",
"label.cancel.host.as.degraded": "Cancel host as degraded",
"label.capacity": "Capacity",
"label.capacitybytes": "Capacity bytes",
@ -569,6 +570,7 @@
"label.confirmdeclineinvitation": "Are you sure you want to decline this project invitation?",
"label.confirmpassword": "Confirm password",
"label.confirmpassword.description": "Please type the same password again.",
"label.connected.agents": "Connected Agents",
"label.connect": "Connect",
"label.connectiontimeout": "Connection timeout",
"label.conservemode": "Conserve mode",
@ -1385,6 +1387,7 @@
"label.management.server": "Management server",
"label.management.servers": "Management servers",
"label.management.server.peers": "Peers",
"label.managementservername": "Management Server",
"label.managementservers": "Number of management servers",
"label.matchall": "Match all",
"label.max": "Max.",
@ -1684,6 +1687,7 @@
"label.peerstate": "Peer State",
"label.peerstate.lastupdated": "Peer State Updated Time",
"label.pending.jobs": "Pending Jobs",
"label.pendingjobscount": "Number Of pending jobs",
"label.per.account": "Per Account",
"label.per.zone": "Per zone",
"label.percentage": "Percentage",
@ -1723,7 +1727,7 @@
"label.prefix": "Prefix",
"label.prefix.type": "Prefix type",
"label.prepare.for.shutdown": "Prepare for Shutdown",
"label.prepareformaintenance": "Prepare for Maintenance",
"label.prepare.for.maintenance": "Prepare for Maintenance",
"label.presetup": "PreSetup",
"label.prev": "Prev",
"label.previous": "Previous",
@ -2052,6 +2056,7 @@
"label.sequence": "Sequence",
"label.server": "Server",
"label.server.certificate": "Server certificate",
"label.serviceip": "Service IP",
"label.service.connectivity.distributedroutercapabilitycheckbox": "Distributed router",
"label.service.connectivity.regionlevelvpccapabilitycheckbox": "Region level VPC",
"label.service.group": "Service group",
@ -2825,7 +2830,8 @@
"message.backup.create": "Are you sure you want create an Instance backup?",
"message.backup.offering.remove": "Are you sure you want to remove Instance from backup offering and delete the backup chain?",
"message.backup.restore": "Please confirm that you want to restore the Instance backup?",
"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management server. It will resume accepting any new Async Jobs.",
"message.cancel.shutdown": "Please confirm that you would like to cancel the shutdown on this Management Server. It will resume accepting any new Async Jobs.",
"message.cancel.maintenance": "Please confirm that you would like to cancel the maintenance on this Management Server. It will resume accepting any new Async Jobs.",
"message.certificate.upload.processing": "Certificate upload in progress",
"message.change.disk.offering.sharedfs.failed": "Failed to change disk offering for the Shared FileSystem.",
"message.change.disk.offering.sharedfs.processing": "Changing disk offering for the Shared FileSystem.",
@ -3341,7 +3347,8 @@
"message.please.wait.while.zone.is.being.created": "Please wait while your zone is being created; this may take a while...",
"message.pod.dedicated": "Pod dedicated.",
"message.pod.dedication.released": "Pod dedication released.",
"message.prepare.for.shutdown": "Please confirm that you would like to prep this Management server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.",
"message.prepare.for.shutdown": "Please confirm that you would like to prepare this Management Server for shutdown. It will not accept any new Async Jobs but will NOT terminate after there are no pending jobs.",
"message.prepare.for.maintenance": "Please confirm that you would like to prepare this Management Server for maintenance. It will not accept any new Async Jobs.",
"message.primary.storage.invalid.state": "Primary storage is not in Up state",
"message.processing.complete": "Processing complete!",
"message.protocol.description": "For XenServer, choose NFS, iSCSI, or PreSetup. For KVM, choose NFS, SharedMountPoint, RDB, CLVM or Gluster. For vSphere, choose NFS, PreSetup (VMFS or iSCSI or FiberChannel or vSAN or vVols) or DatastoreCluster. For Hyper-V, choose SMB/CIFS. For LXC, choose NFS or SharedMountPoint. For OVM, choose NFS or OCFS2.",
@ -3431,7 +3438,8 @@
"message.setup.physical.network.during.zone.creation.basic": "When adding a basic zone, you can set up one physical Network, which corresponds to a NIC on the hypervisor. The Network carries several types of traffic.<br/><br/>You may also <strong>add</strong> other traffic types onto the physical Network.",
"message.shared.network.offering.warning": "Domain admins and regular Users can only create shared Networks from Network offering with the setting specifyvlan=false. Please contact an administrator to create a Network offering if this list is empty.",
"message.shared.network.unsupported.for.nsx": "Shared networks aren't supported for NSX enabled zones",
"message.shutdown.triggered": "A shutdown has been triggered. CloudStack will not accept new jobs",
"message.shutdown.triggered": "Shutdown has been triggered. This Management Server will not accept new jobs",
"message.maintenance.initiated": "Maintenance has been initiated. This Management Server will not accept new jobs",
"message.snapshot.additional.zones": "Snapshots will always be created in its native zone - %x, here you can select additional zone(s) where it will be copied to at creation time",
"message.sourcenatip.change.warning": "WARNING: Changing the sourcenat IP address of the network will cause connectivity downtime for the Instances with NICs in the Network.",
"message.sourcenatip.change.inhibited": "Changing the sourcenat to this IP of the Network to this address is inhibited as firewall rules are defined for it. This can include port forwarding or load balancing rules.\n - If this is an Isolated Network, please use updateNetwork/click the edit button.\n - If this is a VPC, first clear all other rules for this address.",
@ -3595,7 +3603,7 @@
"message.tooltip.reserved.system.netmask": "The Network prefix that defines the pod subnet. Uses CIDR notation.",
"message.traffic.type.deleted": "Successfully deleted traffic type",
"message.traffic.type.to.basic.zone": "traffic type to basic zone",
"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.",
"message.trigger.shutdown": "Please confirm that you would like to trigger a shutdown on this Management Server. It will not accept any new Async Jobs and will terminate after there are no pending jobs.",
"message.type.values.to.add": "Please add additional values by typing them in",
"message.update.autoscale.policy.failed": "Failed to update autoscale policy",
"message.update.autoscale.vmgroup.failed": "Failed to update autoscale group",

View File

@ -17,11 +17,14 @@
<template>
<div>
<a-affix v-if="this.$store.getters.shutdownTriggered" >
<a-affix v-if="this.$store.getters.maintenanceInitiated" >
<a-alert :message="$t('message.maintenance.initiated')" type="error" banner :showIcon="false" class="maintenanceHeader" />
</a-affix>
<a-affix v-else-if="this.$store.getters.shutdownTriggered" >
<a-alert :message="$t('message.shutdown.triggered')" type="error" banner :showIcon="false" class="shutdownHeader" />
</a-affix>
<a-layout class="layout" :class="[device]">
<a-affix style="z-index: 200" :offsetTop="this.$store.getters.shutdownTriggered ? 25 : 0">
<a-affix style="z-index: 200" :offsetTop="this.$store.getters.maintenanceInitiated || this.$store.getters.shutdownTriggered ? 25 : 0">
<template v-if="isSideMenu()">
<a-drawer
v-if="isMobile()"
@ -84,7 +87,7 @@
<!-- layout header -->
<a-affix style="z-index: 100">
<global-header
:style="this.$store.getters.shutdownTriggered ? 'margin-top: 25px;' : null"
:style="this.$store.getters.maintenanceInitiated || this.$store.getters.shutdownTriggered ? 'margin-top: 25px;' : null"
:mode="layoutMode"
:menus="menus"
:theme="navTheme"
@ -257,8 +260,9 @@ export default {
this.$store.commit('SET_COUNT_NOTIFY', 0)
},
checkShutdown () {
api('readyForShutdown', {}).then(json => {
api('readyForShutdown', { managementserverid: this.$store.getters.msId }).then(json => {
this.$store.dispatch('SetShutdownTriggered', json.readyforshutdownresponse.readyforshutdown.shutdowntriggered || false)
this.$store.dispatch('SetMaintenanceInitiated', json.readyforshutdownresponse.readyforshutdown.maintenanceinitiated || false)
})
}
}
@ -307,6 +311,16 @@ export default {
}
}
.maintenanceHeader {
font-weight: bold;
height: 25px;
text-align: center;
padding: 0px;
margin: 0px;
width: 100vw;
position: absolute;
}
.shutdownHeader {
font-weight: bold;
height: 25px;

View File

@ -725,7 +725,7 @@ export default {
'/zone', '/pod', '/cluster', '/host', '/storagepool', '/imagestore', '/systemvm', '/router', '/ilbvm', '/annotation',
'/computeoffering', '/systemoffering', '/diskoffering', '/backupoffering', '/networkoffering', '/vpcoffering',
'/tungstenfabric', '/oauthsetting', '/guestos', '/guestoshypervisormapping', '/webhook', 'webhookdeliveries', '/quotatariff', '/sharedfs',
'/ipv4subnets'].join('|'))
'/ipv4subnets', '/managementserver'].join('|'))
.test(this.$route.path)
},
enableGroupAction () {

View File

@ -39,9 +39,10 @@ export default {
}
fields.push('clustername')
fields.push('zonename')
fields.push('managementservername')
return fields
},
details: ['name', 'id', 'resourcestate', 'ipaddress', 'hypervisor', 'arch', 'type', 'clustername', 'podname', 'zonename', 'disconnected', 'created'],
details: ['name', 'id', 'resourcestate', 'ipaddress', 'hypervisor', 'arch', 'type', 'clustername', 'podname', 'zonename', 'managementservername', 'disconnected', 'created'],
tabs: [{
name: 'details',
component: shallowRef(defineAsyncComponent(() => import('@/components/view/DetailsTab.vue')))

View File

@ -26,26 +26,30 @@ export default {
permission: ['listManagementServersMetrics'],
resourceType: 'ManagementServer',
columns: () => {
const fields = ['name', 'state', 'serviceip', 'version', 'osdistribution', 'agentcount']
const fields = ['name', 'state', 'serviceip', 'version', 'osdistribution', 'pendingjobscount', 'agentscount']
const metricsFields = ['collectiontime', 'availableprocessors', 'cpuload', 'heapmemoryused']
if (store.getters.metrics) {
fields.push(...metricsFields)
}
return fields
},
details: ['collectiontime', 'usageislocal', 'dbislocal', 'lastserverstart', 'lastserverstop', 'lastboottime', 'version', 'loginfo', 'systemtotalcpucycles', 'systemloadaverages', 'systemcycleusage', 'systemmemorytotal', 'systemmemoryfree', 'systemmemoryvirtualsize', 'availableprocessors', 'javadistribution', 'javaversion', 'osdistribution', 'kernelversion', 'agentcount', 'sessions', 'heapmemoryused', 'heapmemorytotal', 'threadsblockedcount', 'threadsdeamoncount', 'threadsnewcount', 'threadsrunnablecount', 'threadsterminatedcount', 'threadstotalcount', 'threadswaitingcount'],
details: ['collectiontime', 'usageislocal', 'dbislocal', 'lastserverstart', 'lastserverstop', 'lastboottime', 'version', 'loginfo', 'systemtotalcpucycles', 'systemloadaverages', 'systemcycleusage', 'systemmemorytotal', 'systemmemoryfree', 'systemmemoryvirtualsize', 'availableprocessors', 'javadistribution', 'javaversion', 'osdistribution', 'kernelversion', 'pendingjobscount', 'agentscount', 'sessions', 'heapmemoryused', 'heapmemorytotal', 'threadsblockedcount', 'threadsdeamoncount', 'threadsnewcount', 'threadsrunnablecount', 'threadsterminatedcount', 'threadstotalcount', 'threadswaitingcount'],
tabs: [
{
name: 'details',
component: shallowRef(defineAsyncComponent(() => import('@/components/view/DetailsTab.vue')))
},
{
name: 'management.server.peers',
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/ManagementServerPeerTab.vue')))
},
{
name: 'pending.jobs',
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/AsyncJobsTab.vue')))
},
{
name: 'management.server.peers',
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/ManagementServerPeerTab.vue')))
name: 'connected.agents',
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/ConnectedAgentsTab.vue')))
},
{
name: 'comments',
@ -53,6 +57,31 @@ export default {
}
],
actions: [
{
api: 'prepareForMaintenance',
icon: 'plus-square-outlined',
label: 'label.prepare.for.maintenance',
message: 'message.prepare.for.maintenance',
dataView: true,
popup: true,
confirmationText: 'MAINTENANCE',
show: (record, store) => { return record.state === 'Up' },
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/Confirmation.vue')))
},
{
api: 'cancelMaintenance',
icon: 'minus-square-outlined',
label: 'label.cancel.maintenance',
message: 'message.cancel.maintenance',
dataView: true,
popup: true,
show: (record, store) => { return ['PreparingForMaintenance', 'Maintenance'].includes(record.state) },
mapping: {
managementserverid: {
value: (record, params) => { return record.id }
}
}
},
{
api: 'prepareForShutdown',
icon: 'exclamation-circle-outlined',
@ -72,7 +101,7 @@ export default {
dataView: true,
popup: true,
confirmationText: 'SHUTDOWN',
show: (record, store) => { return ['Up', 'PreparingToShutDown', 'ReadyToShutDown'].includes(record.state) },
show: (record, store) => { return ['Up', 'Maintenance', 'PreparingForShutDown', 'ReadyToShutDown'].includes(record.state) },
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/Confirmation.vue')))
},
{
@ -83,7 +112,7 @@ export default {
docHelp: 'installguide/configuration.html#adding-a-zone',
dataView: true,
popup: true,
show: (record, store) => { return ['PreparingToShutDown', 'ReadyToShutDown', 'ShuttingDown'].includes(record.state) },
show: (record, store) => { return ['PreparingForShutDown', 'ReadyToShutDown', 'ShuttingDown'].includes(record.state) },
mapping: {
managementserverid: {
value: (record, params) => { return record.id }

View File

@ -46,6 +46,8 @@ const getters = {
countNotify: state => state.user.countNotify,
customColumns: state => state.user.customColumns,
logoutFlag: state => state.user.logoutFlag,
msId: state => state.user.msId,
maintenanceInitiated: state => state.user.maintenanceInitiated,
shutdownTriggered: state => state.user.shutdownTriggered,
twoFaEnabled: state => state.user.twoFaEnabled,
twoFaProvider: state => state.user.twoFaProvider,

View File

@ -128,6 +128,9 @@ const app = {
vueProps.$localStorage.set(RELOAD_ALL_PROJECTS, allProjects)
state.allProjects = allProjects
},
SET_MAINTENANCE_INITIATED: (state, maintenanceInitiated) => {
state.maintenanceInitiated = maintenanceInitiated
},
SET_SHUTDOWN_TRIGGERED: (state, shutdownTriggered) => {
state.shutdownTriggered = shutdownTriggered
},
@ -193,6 +196,9 @@ const app = {
ReloadAllProjects ({ commit, allProjects }) {
commit('RELOAD_ALL_PROJECTS', allProjects)
},
SetMaintenanceInitiated ({ commit }, bool) {
commit('SET_MAINTENANCE_INITIATED', bool)
},
SetShutdownTriggered ({ commit }, bool) {
commit('SET_SHUTDOWN_TRIGGERED', bool)
},

View File

@ -41,6 +41,7 @@ import {
DOMAIN_STORE,
DARK_MODE,
CUSTOM_COLUMNS,
MS_ID,
OAUTH_DOMAIN,
OAUTH_PROVIDER,
LATEST_CS_VERSION
@ -68,6 +69,8 @@ const user = {
loginFlag: false,
logoutFlag: false,
customColumns: {},
msId: '',
maintenanceInitiated: false,
shutdownTriggered: false,
twoFaEnabled: false,
twoFaProvider: '',
@ -147,6 +150,13 @@ const user = {
vueProps.$localStorage.set(CUSTOM_COLUMNS, customColumns)
state.customColumns = customColumns
},
SET_MS_ID: (state, msId) => {
state.msId = msId
vueProps.$localStorage.set(MS_ID, msId)
},
SET_MAINTENANCE_INITIATED: (state, maintenanceInitiated) => {
state.maintenanceInitiated = maintenanceInitiated
},
SET_SHUTDOWN_TRIGGERED: (state, shutdownTriggered) => {
state.shutdownTriggered = shutdownTriggered
},
@ -227,6 +237,9 @@ const user = {
commit('SET_2FA_PROVIDER', result.providerfor2fa)
commit('SET_2FA_ISSUER', result.issuerfor2fa)
commit('SET_LOGIN_FLAG', false)
if (result && result.managementserverid) {
commit('SET_MS_ID', result.managementserverid)
}
const latestVersion = vueProps.$localStorage.get(LATEST_CS_VERSION, { version: '', fetchedTs: 0 })
commit('SET_LATEST_VERSION', latestVersion)
notification.destroy()
@ -276,6 +289,9 @@ const user = {
commit('SET_2FA_PROVIDER', result.providerfor2fa)
commit('SET_2FA_ISSUER', result.issuerfor2fa)
commit('SET_LOGIN_FLAG', false)
if (result && result.managementserverid) {
commit('SET_MS_ID', result.managementserverid)
}
const latestVersion = vueProps.$localStorage.get(LATEST_CS_VERSION, { version: '', fetchedTs: 0 })
commit('SET_LATEST_VERSION', latestVersion)
notification.destroy()
@ -297,6 +313,7 @@ const user = {
const domainStore = vueProps.$localStorage.get(DOMAIN_STORE, {})
const cachedShowSecurityGroups = vueProps.$localStorage.get(SHOW_SECURTIY_GROUPS, false)
const darkMode = vueProps.$localStorage.get(DARK_MODE, false)
const msId = vueProps.$localStorage.get(MS_ID, false)
const latestVersion = vueProps.$localStorage.get(LATEST_CS_VERSION, { version: '', fetchedTs: 0 })
const hasAuth = Object.keys(cachedApis).length > 0
@ -311,6 +328,7 @@ const user = {
commit('SET_TIMEZONE_OFFSET', cachedTimezoneOffset)
commit('SET_USE_BROWSER_TIMEZONE', cachedUseBrowserTimezone)
commit('SET_CUSTOM_COLUMNS', cachedCustomColumns)
commit('SET_MS_ID', msId)
// Ensuring we get the user info so that store.getters.user is never empty when the page is freshly loaded
api('listUsers', { username: Cookies.get('username'), listall: true }).then(response => {
@ -458,6 +476,7 @@ const user = {
commit('SET_2FA_PROVIDER', '')
commit('SET_2FA_ISSUER', '')
commit('SET_LOGIN_FLAG', false)
commit('SET_MS_ID', '')
vueProps.$localStorage.remove(CURRENT_PROJECT)
vueProps.$localStorage.remove(ACCESS_TOKEN)
vueProps.$localStorage.remove(HEADER_NOTICES)

Some files were not shown because too many files have changed in this diff Show More