bug 12790: use processDisconnect() when disconnect the agent during agent LB process

status 12790: resolved fixed

Conflicts:

	api/src/com/cloud/host/Status.java
	server/src/com/cloud/agent/manager/AgentManagerImpl.java
	server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java
This commit is contained in:
Alena Prokharchyk 2011-12-29 16:56:04 -08:00
parent af50ecb7db
commit d56d1f699d
4 changed files with 32 additions and 47 deletions

Binary file not shown.

View File

@ -21,7 +21,6 @@ import java.util.List;
import java.util.Set;
import com.cloud.utils.fsm.NoTransitionException;
import com.cloud.utils.fsm.StateMachine;
import com.cloud.utils.fsm.StateMachine2;
public enum Status {
@ -33,7 +32,7 @@ public enum Status {
Alert(true, true, true),
Removed(true, false, true),
Error(true, false, true),
Rebalancing(false, false, false);
Rebalancing(true, false, true);
private final boolean updateManagementServer;
private final boolean checkManagementServer;
@ -157,7 +156,9 @@ public enum Status {
s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected);
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected);
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting);
s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Rebalancing, Event.AgentConnected, Status.Connecting);
s_fsm.addTransition(Status.Rebalancing, Event.AgentDisconnected, Status.Rebalancing);
s_fsm.addTransition(Status.Error, Event.AgentConnected, Status.Connecting);
}

View File

@ -557,6 +557,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
return req.getSequence();
}
public void removeAgent(AgentAttache attache, Status nextState) {
if (attache == null) {
return;
@ -581,6 +582,13 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
if (removed != null) {
removed.disconnect(nextState);
}
for (Pair<Integer, Listener> monitor : _hostMonitors) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
}
monitor.second().processDisconnect(hostId, nextState);
}
}
protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final StartupCommand[] cmd, boolean forRebalance) throws ConnectionException {
@ -848,12 +856,6 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
removeAgent(attache, nextStatus);
disconnectAgent(host, event, _nodeId);
for (Pair<Integer, Listener> monitor : _hostMonitors) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName());
}
monitor.second().processDisconnect(hostId, nextStatus);
}
return true;
}
@ -1014,7 +1016,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
return false;
}
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) {
if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert && host.getStatus() != Status.Rebalancing) {
s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus());
return false;
}

View File

@ -42,7 +42,6 @@ import com.cloud.agent.api.TransferAgentCommand;
import com.cloud.agent.transport.Request;
import com.cloud.agent.transport.Request.Version;
import com.cloud.agent.transport.Response;
import com.cloud.api.commands.UpdateHostPasswordCmd;
import com.cloud.cluster.ClusterManager;
import com.cloud.cluster.ClusterManagerListener;
import com.cloud.cluster.ClusteredAgentRebalanceService;
@ -64,18 +63,16 @@ import com.cloud.host.Status;
import com.cloud.host.Status.Event;
import com.cloud.resource.ServerResource;
import com.cloud.storage.resource.DummySecondaryStorageResource;
import com.cloud.user.User;
import com.cloud.utils.DateUtil;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.component.Adapters;
import com.cloud.utils.component.ComponentLocator;
import com.cloud.utils.component.Inject;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.SearchCriteria.Op;
import com.cloud.utils.db.SearchCriteria2;
import com.cloud.utils.db.SearchCriteriaService;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.SearchCriteria.Op;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.nio.Link;
import com.cloud.utils.nio.Task;
@ -657,22 +654,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
boolean result = false;
if (event == Event.RequestAgentRebalance) {
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
} else if (event == Event.StartAgentRebalance) {
boolean result = false;
try {
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + agentId, e);
} finally {
if (!result) {
failRebalance(agentId);
return false;
}
}
}
return true;
return result;
}
@Override
@ -899,18 +891,17 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
if (currentOwnerId == _nodeId) {
if (!startRebalance(hostId)) {
s_logger.debug("Failed to start agent rebalancing");
failRebalance(hostId);
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
return false;
}
try {
Answer[] answer = sendRebalanceCommand(futureOwnerId, hostId, currentOwnerId, futureOwnerId, Event.StartAgentRebalance);
if (answer == null || !answer[0].getResult()) {
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process");
result = false;
}
} catch (Exception ex) {
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
s_logger.warn("Host " + hostId + " failed to connect to the management server " + futureOwnerId + " as a part of rebalance process", ex);
result = false;
}
@ -918,7 +909,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
s_logger.debug("Successfully transfered host id=" + hostId + " to management server " + futureOwnerId);
finishRebalance(hostId, futureOwnerId, Event.RebalanceCompleted);
} else {
s_logger.debug("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
s_logger.warn("Failed to transfer host id=" + hostId + " to management server " + futureOwnerId);
finishRebalance(hostId, futureOwnerId, Event.RebalanceFailed);
}
@ -926,13 +917,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
HostVO host = _hostDao.findById(hostId);
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") as a part of rebalance process");
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
}
result = loadDirectlyConnectedHost(host, true);
} catch (Exception ex) {
s_logger.warn("Unable to load directly connected host " + host.getId() + " as a part of rebalance due to exception: ", ex);
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process due to:", ex);
result = false;
}
if (result) {
s_logger.debug("Successfully loaded directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
} else {
s_logger.warn("Failed to load directly connected host " + host.getId() + "(" + host.getName() + ") to the management server " + _nodeId + " as a part of rebalance process");
}
}
return result;
@ -943,7 +940,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
boolean success = (event == Event.RebalanceCompleted) ? true : false;
if (s_logger.isDebugEnabled()) {
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with result " + success);
s_logger.debug("Finishing rebalancing for the agent " + hostId + " with event " + event);
}
AgentAttache attache = findAttache(hostId);
@ -986,13 +983,12 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
try {
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
_hostTransferDao.completeAgentTransfer(hostId);
reconnect(hostId);
handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed);
} catch (Exception ex) {
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
}
}
@DB
protected boolean startRebalance(final long hostId) {
HostVO host = _hostDao.findById(hostId);
@ -1004,7 +1000,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
synchronized (_agents) {
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
removeAgent(attache, Status.Rebalancing);
handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance);
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
if (forwardAttache == null) {
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");
@ -1021,17 +1017,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
return false;
}
}
Transaction txn = Transaction.currentTxn();
txn.start();
s_logger.debug("Updating host id=" + hostId + " with the status " + Status.Rebalancing);
host.setManagementServerId(null);
_agentMgr.agentStatusTransitTo(host, Event.StartAgentRebalance, _nodeId);
}
_hostTransferDao.startAgentTransfer(hostId);
txn.commit();
return true;
}
@ -1063,19 +1050,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public void run() {
boolean result = false;
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Rebalancing host id=" + hostId);
}
result = rebalanceHost(hostId, currentOwnerId, futureOwnerId);
rebalanceHost(hostId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + hostId, e);
} finally {
if (!result) {
failRebalance(hostId);
}
StackMaid.current().exitCleanup();
}
}