Bug 11522 - New agent manager

1.load hosts that in maintenance mode because maintenance is no longer an agent status now
2.don't disconnect agent when entering maintenance mode, again it's no longer an agent status now
This commit is contained in:
frank 2011-10-19 17:51:20 -07:00
parent 58fefd2891
commit 4e595d04ab
7 changed files with 94 additions and 9 deletions

View File

@ -242,11 +242,11 @@
</target>
<target name="deploydb-simulator">
<condition property="server-setup.file" value="override/server-setup.xml" else="server-setup.xml">
<condition property="server-setup.file" value="${setup.db.dir}/override/server-setup.xml" else="server-setup.xml">
<available file="${setup.db.dir}/override/server-setup.xml" />
</condition>
<condition property="templates.file" value="override/templates.sql" else="templates.sql" >
<condition property="templates.file" value="${setup.db.dir}/override/templates.sql" else="templates.sql" >
<available file="${setup.db.dir}/override/templates.sql" />
</condition>

View File

@ -215,6 +215,8 @@ public interface AgentManager extends Manager {
public boolean disconnectAgent(HostVO host, Status.Event e, long msId);
public void pullAgentToMaintenance(long hostId);
public void pullAgentOutMaintenance(long hostId);
boolean reconnect(long hostId);
}

View File

@ -1710,4 +1710,12 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
attache.cancelAllCommands(Status.Disconnected, false);
}
}
@Override
public void pullAgentOutMaintenance(long hostId) {
AgentAttache attache = findAttache(hostId);
if (attache != null) {
attache.setMaintenanceMode(false);
}
}
}

View File

@ -326,7 +326,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
txn.start();
SearchCriteria<HostVO> sc = UnmanagedDirectConnectSearch.create();
sc.setParameters("lastPinged", lastPingSecondsAfter);
sc.setParameters("resourceStates", ResourceState.ErrorInMaintenance, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.Disabled);
//sc.setParameters("resourceStates", ResourceState.ErrorInMaintenance, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.Disabled);
sc.setJoinParameters("ClusterManagedSearch", "managed", Managed.ManagedState.Managed);
List<HostVO> hosts = lockRows(sc, new Filter(HostVO.class, "clusterId", true, 0L, limit), true);

View File

@ -975,7 +975,7 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma
return (_agentMgr.reconnect(hostId) ? host : null);
}
@Override
public boolean resourceStateTransitTo(Host host, ResourceState.Event event, long msId) throws NoTransitionException {
ResourceState currentState = host.getResourceState();
@ -1674,7 +1674,7 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma
try {
resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId);
_agentMgr.disconnectWithoutInvestigation(hostId, Status.Event.ResetRequested);
_agentMgr.pullAgentOutMaintenance(hostId);
return true;
} catch (NoTransitionException e) {
s_logger.debug("Cannot transmit host " + host.getId() + "to Enabled state", e);

View File

@ -46,6 +46,7 @@ import com.cloud.host.HostStats;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.resource.ResourceState;
import com.cloud.storage.StorageManager;
import com.cloud.storage.StoragePoolHostVO;
import com.cloud.storage.StoragePoolVO;
@ -156,7 +157,8 @@ public class StatsCollector {
s_logger.debug("HostStatsCollector is running...");
SearchCriteria<HostVO> sc = _hostDao.createSearchCriteria();
sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString());
sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString());
sc.addAnd("resourceState", SearchCriteria.Op.NIN, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.ErrorInMaintenance);
sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.Storage.toString());
sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.ConsoleProxy.toString());
sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.SecondaryStorage.toString());
@ -196,6 +198,7 @@ public class StatsCollector {
SearchCriteria<HostVO> sc = _hostDao.createSearchCriteria();
sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString());
sc.addAnd("resourceState", SearchCriteria.Op.NIN, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.ErrorInMaintenance);
sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.Storage.toString());
sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.ConsoleProxy.toString());
sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.SecondaryStorage.toString());

View File

@ -6,17 +6,89 @@ Created on Oct 18, 2011
from cloudstackTestCase import *
from cloudstackAPI import *
import uuid
import threading
import random
import time
class Task(threading.Thread):
def __init__(self, func, param=None):
super(Task, self).__init__()
self.func = func
self.param = param
def run(self):
self.func(self.param)
def doTask(self):
self.start()
class TestDeploy100Hosts(cloudstackTestCase):
def test_deploy100Hosts(self):
hosts = []
def deployHost(self, url):
apiClient = self.testClient.getApiClient()
addHostCmd = addHost.addHostCmd()
addHostCmd.hypervisor = "simulator"
addHostCmd.clusterid = 1
addHostCmd.zoneid = 1
addHostCmd.podid = 1
addHostCmd.url = "http://sim/10.223.63.1"
addHostCmd.url = "http://sim/%s"%url
addHostCmd.username = "placeholder"
addHostCmd.password = "placeholder"
addHostResponce = apiClient.addHost(addHostCmd)
return addHostResponce.id
return addHostResponce[0].id
def randomCancelMaintenance(self):
def run(param):
while(1):
try:
interval = random.randint(1, 2)
time.sleep(interval)
if len(self.hosts) == 0:
continue
index = random.randint(0, len(self.hosts)-1)
hostId = self.hosts[index]
apiClient = self.testClient.getApiClient()
cMaintainCmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
cMaintainCmd.id = hostId
response = apiClient.cancelHostMaintenance(cMaintainCmd)
id = response.id
print "Host %s cancelled maintenance mode" % id
except Exception, e:
print e
t = Task(run)
t.doTask()
def randomEnterMaintenance(self):
def run(param):
while(1):
try:
interval = random.randint(1, 2)
time.sleep(interval)
if len(self.hosts) == 0:
continue
index = random.randint(0, len(self.hosts)-1)
hostId = self.hosts[index]
apiClient = self.testClient.getApiClient()
maintainCmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
maintainCmd.id = hostId
response = apiClient.prepareHostForMaintenance(maintainCmd)
id = response.id
print "Host %s entered maintenance mode" % id
except Exception, e:
print e
t = Task(run)
t.doTask()
def test_deploy100Hosts(self):
#for i in range(200):
#self.hosts.append(self.deployHost(i))
for i in range(200):
self.hosts.append(i)
self.randomEnterMaintenance()
self.randomCancelMaintenance()
while(1): time.sleep(10000)