mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Do agent disconnect when agent rebalance fails
Reviewed-by: Alex Huang
This commit is contained in:
parent
6b82ba3ff9
commit
117de2a61a
@ -721,10 +721,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
public boolean executeRebalanceRequest(long agentId, long currentOwnerId, long futureOwnerId, Event event) throws AgentUnavailableException, OperationTimedoutException {
|
||||
if (event == Event.RequestAgentRebalance) {
|
||||
return setToWaitForRebalance(agentId, currentOwnerId, futureOwnerId);
|
||||
} else if (event == Event.StartAgentRebalance) {
|
||||
return rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
}
|
||||
|
||||
} else if (event == Event.StartAgentRebalance) {
|
||||
boolean result = false;
|
||||
try {
|
||||
result = rebalanceHost(agentId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + agentId, e);
|
||||
} finally {
|
||||
if (!result) {
|
||||
failRebalance(agentId);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -883,29 +892,22 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
// remove the host from re-balance list and delete from op_host_transfer DB
|
||||
// no need to do anything with the real attache as we haven't modified it yet
|
||||
Date cutTime = DateUtil.currentGMTTime();
|
||||
if (_hostTransferDao.isNotActive(hostId, new Date(cutTime.getTime() - rebalanceTimeOut))) {
|
||||
HostTransferMapVO transferMap = _hostTransferDao.findActiveHostTransferMapByHostId(hostId, new Date(cutTime.getTime() - rebalanceTimeOut));
|
||||
|
||||
if (transferMap == null) {
|
||||
s_logger.debug("Timed out waiting for the host id=" + hostId + " to be ready to transfer, skipping rebalance for the host");
|
||||
iterator.remove();
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (attache.forForward()) {
|
||||
if (transferMap.getInitialOwner() != _nodeId || attache.forForward()) {
|
||||
s_logger.debug("Management server " + _nodeId + " doesn't own host id=" + hostId + " any more, skipping rebalance for the host");
|
||||
iterator.remove();
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
continue;
|
||||
}
|
||||
|
||||
HostTransferMapVO transferMap = _hostTransferDao.findByIdAndCurrentOwnerId(hostId, _nodeId);
|
||||
|
||||
if (transferMap == null) {
|
||||
s_logger.debug("Can't transfer host id=" + hostId + "; record for the host no longer exists in op_host_transfer table");
|
||||
iterator.remove();
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
ManagementServerHostVO ms = _mshostDao.findByMsid(transferMap.getFutureOwner());
|
||||
if (ms != null && ms.getState() != ManagementServerHost.State.Up) {
|
||||
s_logger.debug("Can't transfer host " + hostId + " as it's future owner is not in UP state: " + ms + ", skipping rebalance for the host");
|
||||
@ -996,7 +998,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
}
|
||||
|
||||
|
||||
protected void finishRebalance(final long hostId, long futureOwnerId, Event event) throws AgentUnavailableException{
|
||||
protected void finishRebalance(final long hostId, long futureOwnerId, Event event){
|
||||
|
||||
boolean success = (event == Event.RebalanceCompleted) ? true : false;
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
@ -1039,10 +1041,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
}
|
||||
|
||||
protected void failRebalance(final long hostId) throws AgentUnavailableException{
|
||||
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
reconnect(hostId);
|
||||
protected void failRebalance(final long hostId){
|
||||
try {
|
||||
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
reconnect(hostId);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
|
||||
}
|
||||
}
|
||||
|
||||
@DB
|
||||
@ -1116,14 +1122,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
boolean result = false;
|
||||
try {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Rebalancing host id=" + hostId);
|
||||
}
|
||||
rebalanceHost(hostId, currentOwnerId, futureOwnerId);
|
||||
result = rebalanceHost(hostId, currentOwnerId, futureOwnerId);
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Unable to rebalance host id=" + hostId, e);
|
||||
|
||||
} finally {
|
||||
if (!result) {
|
||||
failRebalance(hostId);
|
||||
}
|
||||
StackMaid.current().exitCleanup();
|
||||
}
|
||||
}
|
||||
|
||||
@ -37,7 +37,7 @@ public interface HostTransferMapDao extends GenericDao<HostTransferMapVO, Long>
|
||||
|
||||
List<HostTransferMapVO> listBy(long futureOwnerId, HostTransferState state);
|
||||
|
||||
boolean isNotActive(long hostId, Date cutTime);
|
||||
HostTransferMapVO findActiveHostTransferMapByHostId(long hostId, Date cutTime);
|
||||
|
||||
boolean startAgentTransfer(long hostId);
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
|
||||
|
||||
protected final SearchBuilder<HostTransferMapVO> AllFieldsSearch;
|
||||
protected final SearchBuilder<HostTransferMapVO> IntermediateStateSearch;
|
||||
protected final SearchBuilder<HostTransferMapVO> InactiveSearch;
|
||||
protected final SearchBuilder<HostTransferMapVO> ActiveSearch;
|
||||
|
||||
public HostTransferMapDaoImpl() {
|
||||
AllFieldsSearch = createSearchBuilder();
|
||||
@ -55,11 +55,11 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
|
||||
IntermediateStateSearch.and("state", IntermediateStateSearch.entity().getState(), SearchCriteria.Op.IN);
|
||||
IntermediateStateSearch.done();
|
||||
|
||||
InactiveSearch = createSearchBuilder();
|
||||
InactiveSearch.and("created", InactiveSearch.entity().getCreated(), SearchCriteria.Op.LTEQ);
|
||||
InactiveSearch.and("id", InactiveSearch.entity().getId(), SearchCriteria.Op.EQ);
|
||||
InactiveSearch.and("state", InactiveSearch.entity().getState(), SearchCriteria.Op.EQ);
|
||||
InactiveSearch.done();
|
||||
ActiveSearch = createSearchBuilder();
|
||||
ActiveSearch.and("created", ActiveSearch.entity().getCreated(), SearchCriteria.Op.GT);
|
||||
ActiveSearch.and("id", ActiveSearch.entity().getId(), SearchCriteria.Op.EQ);
|
||||
ActiveSearch.and("state", ActiveSearch.entity().getState(), SearchCriteria.Op.EQ);
|
||||
ActiveSearch.done();
|
||||
|
||||
}
|
||||
|
||||
@ -100,17 +100,14 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isNotActive(long hostId, Date cutTime) {
|
||||
SearchCriteria<HostTransferMapVO> sc = InactiveSearch.create();
|
||||
public HostTransferMapVO findActiveHostTransferMapByHostId(long hostId, Date cutTime) {
|
||||
SearchCriteria<HostTransferMapVO> sc = ActiveSearch.create();
|
||||
sc.setParameters("id", hostId);
|
||||
sc.setParameters("state", HostTransferState.TransferRequested);
|
||||
sc.setParameters("created", cutTime);
|
||||
|
||||
if (listBy(sc).isEmpty()) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
return findOneBy(sc);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user