mirror of
https://github.com/apache/cloudstack.git
synced 2025-11-03 04:12:31 +01:00
bug 12790: use processDisconnect() when disconnect the agent during agent LB process
status 12790: resolved fixed Conflicts: api/src/com/cloud/host/Status.java server/src/com/cloud/agent/manager/AgentManagerImpl.java server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java
This commit is contained in:
parent
af50ecb7db
commit
d56d1f699d
BIN
.vmops.log.swp
BIN
.vmops.log.swp
Binary file not shown.
@ -21,7 +21,6 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import com.cloud.utils.fsm.NoTransitionException;
|
||||
import com.cloud.utils.fsm.StateMachine;
|
||||
import com.cloud.utils.fsm.StateMachine2;
|
||||
|
||||
public enum Status {
|
||||
@ -33,7 +32,7 @@ public enum Status {
|
||||
Alert(true, true, true),
|
||||
Removed(true, false, true),
|
||||
Error(true, false, true),
|
||||
Rebalancing(false, false, false);
|
||||
Rebalancing(true, false, true);
|
||||
|
||||
private final boolean updateManagementServer;
|
||||
private final boolean checkManagementServer;
|
||||
@ -157,7 +156,9 @@ public enum Status {
|
||||
s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.AgentConnected, Status.Connecting);
|
||||
s_fsm.addTransition(Status.Rebalancing, Event.AgentDisconnected, Status.Rebalancing);
|
||||
s_fsm.addTransition(Status.Error, Event.AgentConnected, Status.Connecting);
|
||||
}
|
||||
|
||||
|
||||
@ -557,6 +557,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
||||
return req.getSequence();
|
||||
}
|
||||
|
||||
|
||||
public void removeAgent(AgentAttache attache, Status nextState) {
|
||||
if (attache == null) {
|
||||
return;
|
||||
@ -581,6 +582,13 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
||||
if (removed != null) {
|
||||
removed.disconnect(nextState);
|
||||
}
|
||||
|
||||
for (Pair<Integer, Listener> monitor : _hostMonitors) {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
|
||||
}
|
||||
monitor.second().processDisconnect(hostId, nextState);
|
||||
}
|
||||
}
|
||||
|
||||
protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final StartupCommand[] cmd, boolean forRebalance) throws ConnectionException {
|
||||
@ -848,12 +856,6 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
||||
removeAgent(attache, nextStatus);
|
||||
disconnectAgent(host, event, _nodeId);
|
||||
|
||||
for (Pair<Integer, Listener> monitor : _hostMonitors) {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
|
||||
}
|
||||
monitor.second().processDisconnect(hostId, nextStatus);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1014,7 +1016,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) {
|
||||
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert && host.getStatus() != Status.Rebalancing) {
|
||||
s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus());
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -42,7 +42,6 @@ import com.cloud.agent.api.TransferAgentCommand;
|
||||
import com.cloud.agent.transport.Request;
|
||||
import com.cloud.agent.transport.Request.Version;
|
||||
import com.cloud.agent.transport.Response;
|
||||
import com.cloud.api.commands.UpdateHostPasswordCmd;
|
||||
import com.cloud.cluster.ClusterManager;
|
||||
import com.cloud.cluster.ClusterManagerListener;
|
||||
import com.cloud.cluster.ClusteredAgentRebalanceService;
|
||||
@ -64,18 +63,16 @@ import com.cloud.host.Status;
|
||||
import com.cloud.host.Status.Event;
|
||||
import com.cloud.resource.ServerResource;
|
||||
import com.cloud.storage.resource.DummySecondaryStorageResource;
|
||||
import com.cloud.user.User;
|
||||
import com.cloud.utils.DateUtil;
|
||||
import com.cloud.utils.NumbersUtil;
|
||||
import com.cloud.utils.component.Adapters;
|
||||
import com.cloud.utils.component.ComponentLocator;
|
||||
import com.cloud.utils.component.Inject;
|
||||
import com.cloud.utils.concurrency.NamedThreadFactory;
|
||||
import com.cloud.utils.db.DB;
|
||||
import com.cloud.utils.db.SearchCriteria.Op;
|
||||
import com.cloud.utils.db.SearchCriteria2;
|
||||
import com.cloud.utils.db.SearchCriteriaService;
|
||||
import com.cloud.utils.db.Transaction;
|
||||
import com.cloud.utils.db.SearchCriteria.Op;
|
||||
import com.cloud.utils.exception.CloudRuntimeException;
|
||||
import com.cloud.utils.nio.Link;
|
||||
import com.cloud.utils.nio.Task;
|
||||
@ -657,22 +654,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
|
||||
@Override
|
||||
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
|
||||
boolean result = false;
|
||||
if (event == Event.RequestAgentRebalance) {
|
||||
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
|
||||
} else if (event == Event.StartAgentRebalance) {
|
||||
boolean result = false;
|
||||
try {
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + agentId, e);
|
||||
} finally {
|
||||
if (!result) {
|
||||
failRebalance(agentId);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -899,18 +891,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
if (currentOwnerId == _nodeId) {
|
||||
if (!startRebalance(hostId)) {
|
||||
s_logger.debug("Failed to start agent rebalancing");
|
||||
failRebalance(hostId);
|
||||
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance);
|
||||
if (answer == null || !answer[0].getResult()) {
|
||||
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process");
|
||||
result = false;
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
|
||||
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
|
||||
result = false;
|
||||
}
|
||||
|
||||
@ -918,7 +909,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
s_logger.debug("Successfully transfered host id=" + hostId + " to management server " + futureOwnerId);
|
||||
finishRebalance(hostId, futureOwnerId, Event.RebalanceCompleted);
|
||||
} else {
|
||||
s_logger.debug("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
|
||||
s_logger.warn("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
|
||||
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
|
||||
}
|
||||
|
||||
@ -926,13 +917,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
HostVO host = _hostDao.findById(hostId);
|
||||
try {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") as a part of rebalance process");
|
||||
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
|
||||
}
|
||||
result = loadDirectlyConnectedHost(host, true);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Unable to load directly connected host " + host.getId() + " as a part of rebalance due to exception: ", ex);
|
||||
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process due to:", ex);
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
s_logger.debug("Successfully loaded directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
|
||||
} else {
|
||||
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
@ -943,7 +940,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
|
||||
boolean success = (event == Event.RebalanceCompleted) ? true : false;
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with result " + success);
|
||||
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with event " + event);
|
||||
}
|
||||
|
||||
AgentAttache attache = findAttache(hostId);
|
||||
@ -986,13 +983,12 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
try {
|
||||
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
reconnect(hostId);
|
||||
handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
|
||||
}
|
||||
}
|
||||
|
||||
@DB
|
||||
protected boolean startRebalance(final long hostId) {
|
||||
HostVO host = _hostDao.findById(hostId);
|
||||
|
||||
@ -1004,7 +1000,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
synchronized (_agents) {
|
||||
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
|
||||
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
|
||||
removeAgent(attache, Status.Rebalancing);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance);
|
||||
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
|
||||
if (forwardAttache == null) {
|
||||
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");
|
||||
@ -1021,17 +1017,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Transaction txn = Transaction.currentTxn();
|
||||
txn.start();
|
||||
|
||||
s_logger.debug("Updating host id=" + hostId + " with the status " + Status.Rebalancing);
|
||||
host.setManagementServerId(null);
|
||||
_agentMgr.agentStatusTransitTo(host, Event.StartAgentRebalance, _nodeId);
|
||||
}
|
||||
_hostTransferDao.startAgentTransfer(hostId);
|
||||
txn.commit();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1063,19 +1050,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
boolean result = false;
|
||||
try {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Rebalancing host id=" + hostId);
|
||||
}
|
||||
result = rebalanceHost(hostId, currentOwnerId, futureOwnerId);
|
||||
rebalanceHost(hostId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + hostId, e);
|
||||
|
||||
} finally {
|
||||
if (!result) {
|
||||
failRebalance(hostId);
|
||||
}
|
||||
StackMaid.current().exitCleanup();
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user