bug 10734: removed global lock in "DirectAgentScanTimerTask". This lock used to prevent the task from executing on multiple management server simultaniously.

status 10734: resolved fixed
This commit is contained in:
alena 2011-07-18 12:54:34 -07:00
parent b3957d525b
commit 62270fe62e
4 changed files with 25 additions and 40 deletions

View File

@ -1272,7 +1272,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
try {
if (id != null) {
host = _hostDao.findById(id);
if (!_hostDao.directConnect(host, _nodeId, false)) {
if (!_hostDao.directConnect(host, _nodeId)) {
s_logger.info("MS " + host.getManagementServerId() + " is loading " + host);
return null;
}
@ -1285,7 +1285,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
}
if (host != null) {
if (!_hostDao.directConnect(host, _nodeId, true)) {
if (!_hostDao.directConnect(host, _nodeId)) {
host = _hostDao.findById(id);
s_logger.info("MS " + host.getManagementServerId() + " is loading " + host + " after it has been initialized.");
return null;

View File

@ -143,18 +143,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
private void runDirectAgentScanTimerTask() {
GlobalLock scanLock = GlobalLock.getInternLock("clustermgr.scan");
try {
if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
try {
scanDirectAgentToLoad();
} finally {
scanLock.unlock();
}
}
} finally {
scanLock.releaseRef();
}
scanDirectAgentToLoad();
}
private void scanDirectAgentToLoad() {
@ -164,22 +153,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
// for agents that are self-managed, threshold to be considered as disconnected is 3 ping intervals
long cutSeconds = (System.currentTimeMillis() >> 10) - (_pingInterval * 3);
List<HostVO> hosts = _hostDao.findDirectAgentToLoad(cutSeconds, _loadSize);
if (hosts != null && hosts.size() == _loadSize) {
//if list contains more than one cluster, exclude the last cluster from the list
if (hosts.size() > 1 && hosts.get(0).getClusterId().longValue() != hosts.get(hosts.size()-1).getClusterId().longValue()) {
Long clusterId = hosts.get((int) (_loadSize - 1)).getClusterId();
if (clusterId != null) {
for (int i = (int) (_loadSize - 1); i > 0; i--) {
if (hosts.get(i).getClusterId().longValue() == clusterId.longValue()) {
hosts.remove(i);
} else {
break;
}
}
}
}
}
List<HostVO> hosts = _hostDao.findAndUpdateDirectAgentToLoad(cutSeconds, _loadSize, _nodeId);
if (hosts != null && hosts.size() > 0) {
s_logger.debug("Found " + hosts.size() + " unmanaged direct hosts, processing connect for them...");
for (HostVO host : hosts) {

View File

@ -68,7 +68,7 @@ public interface HostDao extends GenericDao<HostVO, Long> {
*/
List<HostVO> findDirectlyConnectedHosts();
List<HostVO> findDirectAgentToLoad(long lastPingSecondsAfter, Long limit);
List<HostVO> findAndUpdateDirectAgentToLoad(long lastPingSecondsAfter, Long limit, long managementServerId);
/**
* Mark the host as disconnected if it is in one of these states.
* The management server id is set to null.
@ -165,7 +165,7 @@ public interface HostDao extends GenericDao<HostVO, Long> {
List<HostVO> listSecondaryStorageHosts(long dataCenterId);
boolean directConnect(HostVO host, long msId, boolean secondConnect);
boolean directConnect(HostVO host, long msId);
List<HostVO> listDirectHostsBy(long msId, Status status);

View File

@ -50,9 +50,9 @@ import com.cloud.utils.db.Filter;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.GenericSearchBuilder;
import com.cloud.utils.db.JoinBuilder;
import com.cloud.utils.db.JoinBuilder.JoinType;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.JoinBuilder.JoinType;
import com.cloud.utils.db.SearchCriteria.Func;
import com.cloud.utils.db.SearchCriteria.Op;
import com.cloud.utils.db.Transaction;
@ -364,12 +364,24 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return search(sc, null);
}
@Override
public List<HostVO> findDirectAgentToLoad(long lastPingSecondsAfter, Long limit) {
@Override @DB
public List<HostVO> findAndUpdateDirectAgentToLoad(long lastPingSecondsAfter, Long limit, long managementServerId) {
Transaction txn = Transaction.currentTxn();
txn.start();
SearchCriteria<HostVO> sc = UnmanagedDirectConnectSearch.create();
sc.setParameters("lastPinged", lastPingSecondsAfter);
sc.setParameters("statuses", Status.ErrorInMaintenance, Status.Maintenance, Status.PrepareForMaintenance);
return search(sc, new Filter(HostVO.class, "clusterId", true, 0L, limit));
List<HostVO> hosts = lockRows(sc, new Filter(HostVO.class, "clusterId", true, 0L, limit), true);
for (HostVO host : hosts) {
host.setManagementServerId(managementServerId);
update(host.getId(), host);
}
txn.commit();
return hosts;
}
@Override
@ -518,12 +530,10 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
}
@Override
public boolean directConnect(HostVO host, long msId, boolean secondConnect) {
public boolean directConnect(HostVO host, long msId) {
SearchCriteria<HostVO> sc = DirectConnectSearch.create();
sc.setParameters("id", host.getId());
if (secondConnect) {
sc.setParameters("server", msId);
}
sc.setParameters("server", msId);
host.setManagementServerId(msId);
host.setLastPinged(System.currentTimeMillis() >> 10);