Do agent disconnect when agent rebalance fails

Reviewed-by: Alex Huang
This commit is contained in:
alena 2011-09-15 18:20:36 -07:00
parent 6b82ba3ff9
commit 117de2a61a
3 changed files with 44 additions and 36 deletions

View File

@ -721,10 +721,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
if (event == Event.RequestAgentRebalance) {
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
} else if (event == Event.StartAgentRebalance) {
return rebalanceHost(agentId, currentOwnerId, futureOwnerId);
}
} else if (event == Event.StartAgentRebalance) {
boolean result = false;
try {
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + agentId, e);
} finally {
if (!result) {
failRebalance(agentId);
return false;
}
}
}
return true;
}
@ -883,29 +892,22 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
// remove the host from re-balance list and delete from op_host_transfer DB
// no need to do anything with the real attache as we haven't modified it yet
Date cutTime = DateUtil.currentGMTTime();
if (_hostTransferDao.isNotActive(hostId, new Date(cutTime.getTime() - rebalanceTimeOut))) {
HostTransferMapVO transferMap = _hostTransferDao.findActiveHostTransferMapByHostId(hostId, new Date(cutTime.getTime() - rebalanceTimeOut));
if (transferMap == null) {
s_logger.debug("Timed out waiting for the host id=" + hostId + " to be ready to transfer, skipping rebalance for the host");
iterator.remove();
_hostTransferDao.completeAgentTransfer(hostId);
continue;
}
if (attache.forForward()) {
if (transferMap.getInitialOwner() != _nodeId || attache.forForward()) {
s_logger.debug("Management server " + _nodeId + " doesn't own host id=" + hostId + " any more, skipping rebalance for the host");
iterator.remove();
_hostTransferDao.completeAgentTransfer(hostId);
continue;
}
HostTransferMapVO transferMap = _hostTransferDao.findByIdAndCurrentOwnerId(hostId, _nodeId);
if (transferMap == null) {
s_logger.debug("Can't transfer host id=" + hostId + "; record for the host no longer exists in op_host_transfer table");
iterator.remove();
_hostTransferDao.completeAgentTransfer(hostId);
continue;
}
ManagementServerHostVO ms = _mshostDao.findByMsid(transferMap.getFutureOwner());
if (ms != null && ms.getState() != ManagementServerHost.State.Up) {
s_logger.debug("Can't transfer host " + hostId + " as it's future owner is not in UP state: " + ms + ", skipping rebalance for the host");
@ -996,7 +998,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
protected void finishRebalance(final long hostId, long futureOwnerId, Event event) throws AgentUnavailableException{
protected void finishRebalance(final long hostId, long futureOwnerId, Event event){
boolean success = (event == Event.RebalanceCompleted) ? true : false;
if (s_logger.isDebugEnabled()) {
@ -1039,10 +1041,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
_hostTransferDao.completeAgentTransfer(hostId);
}
protected void failRebalance(final long hostId) throws AgentUnavailableException{
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
_hostTransferDao.completeAgentTransfer(hostId);
reconnect(hostId);
protected void failRebalance(final long hostId){
try {
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
_hostTransferDao.completeAgentTransfer(hostId);
reconnect(hostId);
} catch (Exception ex) {
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
}
}
@DB
@ -1116,14 +1122,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public void run() {
boolean result = false;
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Rebalancing host id=" + hostId);
}
rebalanceHost(hostId, currentOwnerId, futureOwnerId);
result = rebalanceHost(hostId, currentOwnerId, futureOwnerId);
} catch (Exception e) {
s_logger.warn("Unable to rebalance host id=" + hostId, e);
} finally {
if (!result) {
failRebalance(hostId);
}
StackMaid.current().exitCleanup();
}
}

View File

@ -37,7 +37,7 @@ public interface HostTransferMapDao extends GenericDao<HostTransferMapVO, Long>
List<HostTransferMapVO> listBy(long futureOwnerId, HostTransferState state);
boolean isNotActive(long hostId, Date cutTime);
HostTransferMapVO findActiveHostTransferMapByHostId(long hostId, Date cutTime);
boolean startAgentTransfer(long hostId);

View File

@ -39,7 +39,7 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
protected final SearchBuilder<HostTransferMapVO> AllFieldsSearch;
protected final SearchBuilder<HostTransferMapVO> IntermediateStateSearch;
protected final SearchBuilder<HostTransferMapVO> InactiveSearch;
protected final SearchBuilder<HostTransferMapVO> ActiveSearch;
public HostTransferMapDaoImpl() {
AllFieldsSearch = createSearchBuilder();
@ -55,11 +55,11 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
IntermediateStateSearch.and("state", IntermediateStateSearch.entity().getState(), SearchCriteria.Op.IN);
IntermediateStateSearch.done();
InactiveSearch = createSearchBuilder();
InactiveSearch.and("created", InactiveSearch.entity().getCreated(), SearchCriteria.Op.LTEQ);
InactiveSearch.and("id", InactiveSearch.entity().getId(), SearchCriteria.Op.EQ);
InactiveSearch.and("state", InactiveSearch.entity().getState(), SearchCriteria.Op.EQ);
InactiveSearch.done();
ActiveSearch = createSearchBuilder();
ActiveSearch.and("created", ActiveSearch.entity().getCreated(), SearchCriteria.Op.GT);
ActiveSearch.and("id", ActiveSearch.entity().getId(), SearchCriteria.Op.EQ);
ActiveSearch.and("state", ActiveSearch.entity().getState(), SearchCriteria.Op.EQ);
ActiveSearch.done();
}
@ -100,17 +100,14 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
}
@Override
public boolean isNotActive(long hostId, Date cutTime) {
SearchCriteria<HostTransferMapVO> sc = InactiveSearch.create();
public HostTransferMapVO findActiveHostTransferMapByHostId(long hostId, Date cutTime) {
SearchCriteria<HostTransferMapVO> sc = ActiveSearch.create();
sc.setParameters("id", hostId);
sc.setParameters("state", HostTransferState.TransferRequested);
sc.setParameters("created", cutTime);
if (listBy(sc).isEmpty()) {
return false;
} else {
return true;
}
return findOneBy(sc);
}
@Override