mirror of
https://github.com/apache/cloudstack.git
synced 2025-11-03 04:12:31 +01:00
2 fixes for Agent Load Balancer:
* when management server dies and notifies other management servers about this, the running management server has to cleanup host_transfer records belonging to the died management server * issue agent load balancing task only when agent load (number of connected agents in the system) exceeds "agent.load.threshold" - 70% by default Conflicts: server/src/com/cloud/configuration/Config.java setup/db/db/schema-228to229.sql
This commit is contained in:
parent
abd3321077
commit
307741edcd
@ -567,7 +567,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
|
||||
//cancel all transfer tasks
|
||||
s_transferExecutor.shutdownNow();
|
||||
cleanupTransferMap();
|
||||
cleanupTransferMap(_nodeId);
|
||||
|
||||
return super.stop();
|
||||
}
|
||||
@ -695,6 +695,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
for (ManagementServerHostVO vo : nodeList) {
|
||||
s_logger.info("Marking hosts as disconnected on Management server" + vo.getMsid());
|
||||
_hostDao.markHostsAsDisconnected(vo.getMsid());
|
||||
s_logger.info("Deleting entries from op_host_transfer table for Management server " + vo.getMsid());
|
||||
cleanupTransferMap(vo.getMsid());
|
||||
}
|
||||
}
|
||||
|
||||
@ -1071,14 +1073,14 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||
return true;
|
||||
}
|
||||
|
||||
protected void cleanupTransferMap() {
|
||||
List<HostTransferMapVO> hostsJoingingCluster = _hostTransferDao.listHostsJoiningCluster(_nodeId);
|
||||
protected void cleanupTransferMap(long msId) {
|
||||
List<HostTransferMapVO> hostsJoingingCluster = _hostTransferDao.listHostsJoiningCluster(msId);
|
||||
|
||||
for (HostTransferMapVO hostJoingingCluster : hostsJoingingCluster) {
|
||||
_hostTransferDao.remove(hostJoingingCluster.getId());
|
||||
}
|
||||
|
||||
List<HostTransferMapVO> hostsLeavingCluster = _hostTransferDao.listHostsLeavingCluster(_nodeId);
|
||||
List<HostTransferMapVO> hostsLeavingCluster = _hostTransferDao.listHostsLeavingCluster(msId);
|
||||
for (HostTransferMapVO hostLeavingCluster : hostsLeavingCluster) {
|
||||
_hostTransferDao.remove(hostLeavingCluster.getId());
|
||||
}
|
||||
|
||||
@ -127,6 +127,9 @@ public class ClusterManagerImpl implements ClusterManager {
|
||||
private String _name;
|
||||
private String _clusterNodeIP = "127.0.0.1";
|
||||
private boolean _agentLBEnabled = false;
|
||||
private double _connectedAgentsThreshold = 0.7;
|
||||
private static boolean _agentLbHappened = false;
|
||||
|
||||
|
||||
public ClusterManagerImpl() {
|
||||
clusterPeers = new HashMap<String, ClusterService>();
|
||||
@ -607,6 +610,26 @@ public class ClusterManagerImpl implements ClusterManager {
|
||||
}
|
||||
|
||||
peerScan();
|
||||
|
||||
//initiate agent lb task will be scheduled and executed only once, and only when number of agents loaded exceeds _connectedAgentsThreshold
|
||||
if (_agentLBEnabled && !_agentLbHappened) {
|
||||
List<HostVO> allManagedRoutingAgents = _hostDao.listManagedRoutingAgents();
|
||||
List<HostVO> allAgents = _hostDao.listAllRoutingAgents();
|
||||
double allHostsCount = allAgents.size();
|
||||
double managedHostsCount = allManagedRoutingAgents.size();
|
||||
if (allHostsCount > 0.0) {
|
||||
double load = managedHostsCount/allHostsCount;
|
||||
if (load >= _connectedAgentsThreshold) {
|
||||
s_logger.debug("Scheduling agent rebalancing task as the average agent load " + load + " is more than the threshold " + _connectedAgentsThreshold);
|
||||
_rebalanceService.scheduleRebalanceAgents();
|
||||
_agentLbHappened = true;
|
||||
} else {
|
||||
s_logger.trace("Not scheduling agent rebalancing task as the averages load " + load + " is less than the threshold " + _connectedAgentsThreshold);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} catch(CloudRuntimeException e) {
|
||||
s_logger.error("Runtime DB exception ", e.getCause());
|
||||
|
||||
@ -937,10 +960,6 @@ public class ClusterManagerImpl implements ClusterManager {
|
||||
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), heartbeatInterval, heartbeatInterval, TimeUnit.MILLISECONDS);
|
||||
_notificationExecutor.submit(getNotificationTask());
|
||||
|
||||
//Initiate agent rebalancing after the host is in UP state
|
||||
if (_agentLBEnabled) {
|
||||
_rebalanceService.scheduleRebalanceAgents();
|
||||
}
|
||||
|
||||
} catch (Throwable e) {
|
||||
s_logger.error("Unexpected exception : ", e);
|
||||
@ -1063,6 +1082,12 @@ public class ClusterManagerImpl implements ClusterManager {
|
||||
|
||||
|
||||
_agentLBEnabled = Boolean.valueOf(configDao.getValue(Config.AgentLbEnable.key()));
|
||||
|
||||
String connectedAgentsThreshold = configs.get("agent.load.threshold");
|
||||
|
||||
if (connectedAgentsThreshold != null) {
|
||||
_connectedAgentsThreshold = Double.parseDouble(connectedAgentsThreshold);
|
||||
}
|
||||
|
||||
this.registerListener(new LockMasterListener(_msId));
|
||||
|
||||
@ -1191,5 +1216,4 @@ public class ClusterManagerImpl implements ClusterManager {
|
||||
public boolean isAgentRebalanceEnabled() {
|
||||
return _agentLBEnabled;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -51,6 +51,7 @@ public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Lo
|
||||
|
||||
IntermediateStateSearch = createSearchBuilder();
|
||||
IntermediateStateSearch.and("futureOwner", IntermediateStateSearch.entity().getFutureOwner(), SearchCriteria.Op.EQ);
|
||||
IntermediateStateSearch.and("initialOwner", IntermediateStateSearch.entity().getInitialOwner(), SearchCriteria.Op.EQ);
|
||||
IntermediateStateSearch.and("state", IntermediateStateSearch.entity().getState(), SearchCriteria.Op.IN);
|
||||
IntermediateStateSearch.done();
|
||||
|
||||
|
||||
@ -240,8 +240,9 @@ public enum Config {
|
||||
SubDomainNetworkAccess("Advanced", NetworkManager.class, Boolean.class, "allow.subdomain.network.access", "true", "Allow subdomains to use networks dedicated to their parent domain(s)", null),
|
||||
EncodeApiResponse("Advanced", ManagementServer.class, Boolean.class, "encode.api.response", "false", "Do UTF-8 encoding for the api response, false by default", null),
|
||||
|
||||
DnsBasicZoneUpdates("Advanced", NetworkManager.class, String.class, "network.dns.basiczone.updates", "all", "This parameter can take 2 values: all (default) and pod. It defines if DHCP/DNS requests have to be send to all dhcp servers in cloudstack, or only to the one in the same pod", "all,pod"),
|
||||
ClusterMessageTimeOutSeconds("Advanced", ManagementServer.class, Integer.class, "cluster.message.timeout.seconds", "300", "Time (in seconds) to wait before a inter-management server message post times out.", null);
|
||||
DnsBasicZoneUpdates("Advanced", NetworkManager.class, String.class, "network.dns.basiczone.updates", "all", "This parameter can take 2 values: all (default) and pod. It defines if DHCP/DNS requests have to be send to all dhcp servers in cloudstack, or only to the one in the same pod", "all,pod"),
|
||||
ClusterMessageTimeOutSeconds("Advanced", ManagementServer.class, Integer.class, "cluster.message.timeout.seconds", "300", "Time (in seconds) to wait before a inter-management server message post times out.", null),
|
||||
AgentLoadThreshold("Advanced", ManagementServer.class, Float.class, "agent.load.threshold", "0.7", "Percentage (as a value between 0 and 1) of connected agents after which agent load balancing will start happening", null);
|
||||
|
||||
private final String _category;
|
||||
private final Class<?> _componentClass;
|
||||
|
||||
@ -184,4 +184,6 @@ public interface HostDao extends GenericDao<HostVO, Long> {
|
||||
List<HostVO> listByManagementServer(long msId);
|
||||
|
||||
List<HostVO> listSecondaryStorageVM(long dcId);
|
||||
|
||||
List<HostVO> listAllRoutingAgents();
|
||||
}
|
||||
|
||||
@ -104,6 +104,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||
protected final GenericSearchBuilder<HostVO, Long> CountRoutingByDc;
|
||||
protected final SearchBuilder<HostTransferMapVO> HostTransferSearch;
|
||||
protected final SearchBuilder<ClusterVO> ClusterManagedSearch;
|
||||
protected final SearchBuilder<HostVO> RoutingSearch;
|
||||
|
||||
protected final Attribute _statusAttr;
|
||||
protected final Attribute _msIdAttr;
|
||||
@ -294,6 +295,10 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||
ManagedRoutingServersSearch.and("server", ManagedRoutingServersSearch.entity().getManagementServerId(), SearchCriteria.Op.NNULL);
|
||||
ManagedRoutingServersSearch.and("type", ManagedRoutingServersSearch.entity().getType(), SearchCriteria.Op.EQ);
|
||||
ManagedRoutingServersSearch.done();
|
||||
|
||||
RoutingSearch = createSearchBuilder();
|
||||
RoutingSearch.and("type", RoutingSearch.entity().getType(), SearchCriteria.Op.EQ);
|
||||
RoutingSearch.done();
|
||||
|
||||
_statusAttr = _allAttributes.get("status");
|
||||
_msIdAttr = _allAttributes.get("managementServerId");
|
||||
@ -942,4 +947,11 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||
|
||||
return listBy(sc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HostVO> listAllRoutingAgents() {
|
||||
SearchCriteria<HostVO> sc = RoutingSearch.create();
|
||||
sc.setParameters("type", Type.Routing);
|
||||
return listBy(sc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,3 +13,5 @@ ALTER TABLE `cloud`.`op_host_capacity` DROP FOREIGN KEY `fk_op_host_capacity__po
|
||||
ALTER TABLE `cloud`.`op_host_capacity` DROP FOREIGN KEY `fk_op_host_capacity__data_center_id`;
|
||||
ALTER TABLE `cloud`.`op_host_capacity` DROP FOREIGN KEY `fk_op_host_capacity__cluster_id`;
|
||||
|
||||
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'agent.load.threshold', '0.70', 'Percentage (as a value between 0 and 1) of connected agents after which agent load balancing will start happening');
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user