bug 9127: intermediate checkin for agent load balancer

This commit is contained in:
alena 2011-05-23 11:19:20 -07:00
parent 4696e5066b
commit 55692fd7cf
29 changed files with 1733 additions and 540 deletions

View File

@ -56,4 +56,8 @@ public abstract class Command {
public String getContextParam(String name) {
return contextMap.get(name);
}
public boolean allowCaching() {
return true;
}
}

View File

@ -0,0 +1,52 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.agent.api;
import com.cloud.host.Status.Event;
public class TransferAgentCommand extends Command {
protected long agentId;
protected long futureOwner;
Event event;
protected TransferAgentCommand() {
}
public TransferAgentCommand(long agentId, long futureOwner, Event event) {
this.agentId = agentId;
this.futureOwner = futureOwner;
this.event = event;
}
public long getAgentId() {
return agentId;
}
public long getFutureOwner() {
return futureOwner;
}
public Event getEvent() {
return event;
}
@Override
public boolean executeInSequence() {
return false;
}
}

View File

@ -0,0 +1,33 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.cluster;
public interface ManagementServerHost {
public static enum State { Up, Starting, Down };
long getMsid();
State getState();
String getVersion();
}

View File

@ -32,7 +32,8 @@ public enum Status {
ErrorInMaintenance(false, false, false),
Maintenance(false, false, false),
Alert(true, true, true),
Removed(true, false, true);
Removed(true, false, true),
Rebalance(false, false, false);
private final boolean updateManagementServer;
private final boolean checkManagementServer;
@ -72,7 +73,11 @@ public enum Status {
WaitedTooLong(false, "Waited too long from the agent to reconnect on its own. Time to do HA"),
Remove(true, "Host is removed"),
Ready(false, "Host is ready for commands"),
UpdatePassword(false, "Update host password from db");
UpdatePassword(false, "Update host password from db"),
RequestAgentRebalance(false, "Request rebalance for the certain host"),
StartAgentRebalance(false, "Start rebalance for the certain host"),
RebalanceCompleted(false, "Host is rebalanced successfully"),
RebalanceFailed(false, "Failed to rebalance the host");
private final boolean isUserRequest;
private final String comment;
@ -132,6 +137,7 @@ public enum Status {
s_fsm.addTransition(Status.Up, Event.Ping, Status.Up);
s_fsm.addTransition(Status.Up, Event.AgentConnected, Status.Connecting);
s_fsm.addTransition(Status.Up, Event.ManagementServerDown, Status.Disconnected);
s_fsm.addTransition(Status.Up, Event.StartAgentRebalance, Status.Rebalance);
s_fsm.addTransition(Status.Updating, Event.PingTimeout, Status.Alert);
s_fsm.addTransition(Status.Updating, Event.Ping, Status.Updating);
s_fsm.addTransition(Status.Updating, Event.AgentConnected, Status.Connecting);
@ -177,6 +183,8 @@ public enum Status {
s_fsm.addTransition(Status.Alert, Event.Ping, Status.Up);
s_fsm.addTransition(Status.Alert, Event.Remove, Status.Removed);
s_fsm.addTransition(Status.Alert, Event.ManagementServerDown, Status.Alert);
s_fsm.addTransition(Status.Rebalance, Event.RebalanceFailed, Status.Alert);
s_fsm.addTransition(Status.Rebalance, Event.RebalanceCompleted, Status.Connecting);
}
public static void main(String[] args) {

View File

@ -93,6 +93,9 @@
<adapters key="com.cloud.acl.SecurityChecker">
<adapter name="DomainChecker" class="com.cloud.acl.DomainChecker"/>
</adapters>
<adapters key="com.cloud.cluster.agentlb">
<adapter name="ClusterBasedAgentLbPlanner" class="com.cloud.cluster.agentlb.ClusterBasedAgentLoadBalancerPlanner"/>
</adapters>
</management-server>

View File

@ -33,20 +33,18 @@
package com.xensource.xenapi;
import com.xensource.xenapi.Types.BadServerResponse;
import com.xensource.xenapi.Types.VersionException;
import com.xensource.xenapi.Types.XenAPIException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import org.apache.xmlrpc.XmlRpcException;
import com.xensource.xenapi.Types.BadServerResponse;
import com.xensource.xenapi.Types.XenAPIException;
/**
* Asynchronous event registration and handling
*
@ -66,6 +64,7 @@ public class Event extends XenAPIObject {
this.ref = ref;
}
@Override
public String toWireString() {
return this.ref;
}
@ -96,6 +95,7 @@ public class Event extends XenAPIObject {
* Represents all the fields in a Event
*/
public static class Record implements Types.Record {
@Override
public String toString() {
StringWriter writer = new StringWriter();
PrintWriter print = new PrintWriter(writer);
@ -112,6 +112,7 @@ public class Event extends XenAPIObject {
/**
* Convert a event.Record to a Map
*/
@Override
public Map<String,Object> toMap() {
Map<String,Object> map = new HashMap<String,Object>();
map.put("id", this.id == null ? 0 : this.id);

View File

@ -238,6 +238,10 @@ public abstract class AgentAttache {
return _requests.size();
}
public int getListenersSize() {
return _waitForList.size();
}
public boolean processAnswers(final long seq, final Response resp) {
resp.logD("Processing: ", true);

View File

@ -11,11 +11,19 @@ import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.nio.channels.SocketChannel;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.ejb.Local;
import javax.naming.ConfigurationException;
@ -23,29 +31,40 @@ import javax.naming.ConfigurationException;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CancelCommand;
import com.cloud.agent.api.ChangeAgentCommand;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.TransferAgentCommand;
import com.cloud.agent.transport.Request;
import com.cloud.agent.transport.Request.Version;
import com.cloud.agent.transport.Response;
import com.cloud.api.commands.UpdateHostPasswordCmd;
import com.cloud.cluster.ClusterManager;
import com.cloud.cluster.ClusterManagerListener;
import com.cloud.cluster.ClusteredAgentRebalanceService;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.agentlb.AgentLoadBalancerPlanner;
import com.cloud.cluster.agentlb.HostTransferMapVO;
import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState;
import com.cloud.cluster.agentlb.dao.HostTransferMapDao;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.configuration.Config;
import com.cloud.configuration.dao.ConfigurationDao;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.Status.Event;
import com.cloud.resource.ServerResource;
import com.cloud.storage.resource.DummySecondaryStorageResource;
import com.cloud.user.User;
import com.cloud.utils.DateUtil;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.component.Adapters;
import com.cloud.utils.component.ComponentLocator;
import com.cloud.utils.component.Inject;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.db.Transaction;
@ -53,22 +72,31 @@ import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.nio.Link;
import com.cloud.utils.nio.Task;
@Local(value = { AgentManager.class })
public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener {
@Local(value = { AgentManager.class, ClusteredAgentRebalanceService.class })
public class ClusteredAgentManagerImpl extends AgentManagerImpl implements ClusterManagerListener, ClusteredAgentRebalanceService {
final static Logger s_logger = Logger.getLogger(ClusteredAgentManagerImpl.class);
private static final ScheduledExecutorService s_transferExecutor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("Cluster-AgentTransferExecutor"));
public final static long STARTUP_DELAY = 5000;
public final static long SCAN_INTERVAL = 90000; // 90 seconds, it takes 60 sec for xenserver to fail login
public final static int ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION = 5; // 5 seconds
public long _loadSize = 100;
protected Set<Long> _agentToTransferIds = new HashSet<Long>();
private final long rebalanceTimeOut = 300000; // 5 mins - after this time remove the agent from the transfer list
@Inject protected ClusterManager _clusterMgr = null;
@Inject
protected ClusterManager _clusterMgr = null;
protected HashMap<String, SocketChannel> _peers;
private final Timer _timer = new Timer("ClusteredAgentManager Timer");
@Inject
protected ManagementServerHostDao _mshostDao;
@Inject
protected HostTransferMapDao _hostTransferDao;
@Inject(adapter = AgentLoadBalancerPlanner.class)
protected Adapters<AgentLoadBalancerPlanner> _lbPlanners;
protected ClusteredAgentManagerImpl() {
super();
@ -97,6 +125,13 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
return false;
}
_timer.schedule(new DirectAgentScanTimerTask(), STARTUP_DELAY, SCAN_INTERVAL);
// schedule transfer scan executor - if agent LB is enabled
if (_clusterMgr.isAgentRebalanceEnabled()) {
s_transferExecutor.scheduleAtFixedRate(getTransferScanTask(), ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL, ClusteredAgentRebalanceService.DEFAULT_TRANSFER_CHECK_INTERVAL,
TimeUnit.MILLISECONDS);
}
return true;
}
@ -121,13 +156,13 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
// for agents that are self-managed, threshold to be considered as disconnected is 3 ping intervals
long cutSeconds = (System.currentTimeMillis() >> 10) - (_pingInterval*3);
List<HostVO> hosts = _hostDao.findDirectAgentToLoad(_clusterMgr.getManagementNodeId(), cutSeconds, _loadSize);
if ( hosts != null && hosts.size() == _loadSize ) {
Long clusterId = hosts.get((int)(_loadSize-1)).getClusterId();
if ( clusterId != null) {
for ( int i = (int)(_loadSize-1); i > 0; i-- ) {
if ( hosts.get(i).getClusterId() == clusterId ) {
long cutSeconds = (System.currentTimeMillis() >> 10) - (_pingInterval * 3);
List<HostVO> hosts = _hostDao.findDirectAgentToLoad(cutSeconds, _loadSize);
if (hosts != null && hosts.size() == _loadSize) {
Long clusterId = hosts.get((int) (_loadSize - 1)).getClusterId();
if (clusterId != null) {
for (int i = (int) (_loadSize - 1); i > 0; i--) {
if (hosts.get(i).getClusterId() == clusterId) {
hosts.remove(i);
} else {
break;
@ -322,7 +357,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public boolean updateHostPassword(UpdateHostPasswordCmd upasscmd) {
if (upasscmd.getClusterId() == null) {
//update agent attache password
// update agent attache password
try {
Boolean result = _clusterMgr.propagateAgentEvent(upasscmd.getHostId(), Event.UpdatePassword);
if (result != null) {
@ -330,8 +365,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
} catch (AgentUnavailableException e) {
}
}
else {
} else {
// get agents for the cluster
List<HostVO> hosts = _hostDao.listByCluster(upasscmd.getClusterId());
for (HostVO h : hosts) {
@ -507,6 +541,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
}
_timer.cancel();
s_transferExecutor.shutdownNow();
return super.stop();
}
@ -586,6 +621,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
cancel(Long.toString(Request.getManagementServerId(data)), hostId, Request.getSequence(data), e.getMessage());
}
} else {
long mgmtId = Request.getManagementServerId(data);
if (mgmtId != -1 && mgmtId != _nodeId) {
routeToPeer(Long.toString(mgmtId), data);
@ -624,7 +660,6 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public void onManagementNodeJoined(List<ManagementServerHostVO> nodeList, long selfNodeId) {
}
@Override
@ -638,4 +673,264 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
@Override
public void onManagementNodeIsolated() {
}
@Override
public void removeAgent(AgentAttache attache, Status nextState) {
if (attache == null) {
return;
}
super.removeAgent(attache, nextState);
}
@Override
public boolean executeRebalanceRequest(long agentId, Event event) throws AgentUnavailableException, OperationTimedoutException {
if (event == Event.RequestAgentRebalance) {
return setToWaitForRebalance(agentId);
} else if (event == Event.StartAgentRebalance) {
return rebalanceHost(agentId);
}
return true;
}
@Override
public void startRebalanceAgents() {
Date cutTime = DateUtil.currentGMTTime();
List<ManagementServerHostVO> activeNodes = _mshostDao.getActiveList(new Date(cutTime.getTime() - ClusterManager.DEFAULT_HEARTBEAT_THRESHOLD));
List<HostVO> allManagedAgents = _hostDao.listManagedAgents();
long avLoad = 0L;
if (!allManagedAgents.isEmpty() && !activeNodes.isEmpty()) {
avLoad = allManagedAgents.size() / activeNodes.size();
} else {
return;
}
for (ManagementServerHostVO node : activeNodes) {
if (node.getMsid() != _nodeId) {
List<HostVO> hostsToRebalance = new ArrayList<HostVO>();
for (AgentLoadBalancerPlanner lbPlanner : _lbPlanners) {
hostsToRebalance = lbPlanner.getHostsToRebalance(node.getMsid(), avLoad);
if (!hostsToRebalance.isEmpty()) {
break;
}
}
if (!hostsToRebalance.isEmpty()) {
//TODO - execute rebalance for every host; right now we are doing it for (0) host just for testing
for (HostVO host : hostsToRebalance) {
long hostId = host.getId();
s_logger.debug("Asking management server " + node.getMsid() + " to give away host id=" + hostId);
boolean result = true;
HostTransferMapVO transfer = _hostTransferDao.startAgentTransfering(hostId, _nodeId, node.getMsid());
try {
Answer[] answer = sendRebalanceCommand(hostId, _nodeId, Event.RequestAgentRebalance);
if (answer == null) {
s_logger.warn("Failed to get host id=" + hostId + " from management server " + node.getMsid());
result = false;
}
} catch (Exception ex) {
s_logger.warn("Failed to get host id=" + hostId + " from management server " + node.getMsid(), ex);
result = false;
} finally {
HostTransferMapVO updatedTransfer = _hostTransferDao.findById(transfer.getId());
if (!result && updatedTransfer.getState() == HostTransferState.TransferRequested) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Removing mapping from op_host_transfer as it failed to be set to transfer mode");
}
//just remove the mapping as nothing was done on the peer management server yet
_hostTransferDao.remove(transfer.getId());
}
}
}
}
}
}
}
private Answer[] sendRebalanceCommand(long agentId, long peer, Event event) {
TransferAgentCommand transfer = new TransferAgentCommand(agentId, peer, event);
Commands commands = new Commands(OnError.Stop);
commands.addCommand(transfer);
Command[] cmds = commands.toCommands();
String peerName = Long.toString(peer);
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Forwarding " + cmds[0].toString() + " to " + peer);
}
Answer[] answers = _clusterMgr.execute(peerName, agentId, cmds, true);
return answers;
} catch (Exception e) {
s_logger.warn("Caught exception while talking to " + peer, e);
return null;
}
}
private Runnable getTransferScanTask() {
return new Runnable() {
@Override
public void run() {
try {
// TODO - change to trace level later on
if (s_logger.isDebugEnabled()) {
s_logger.debug("Clustered agent transfer scan check, management server id:" + _nodeId);
}
if (_agentToTransferIds.size() > 0) {
s_logger.debug("Found " + _agentToTransferIds.size() + " agents to transfer");
for (Long hostId : _agentToTransferIds) {
AgentAttache attache = findAttache(hostId);
if (attache.getQueueSize() == 0 && attache.getListenersSize() == 0) {
boolean result = true;
_agentToTransferIds.remove(hostId);
try {
result = rebalanceHost(hostId);
} finally {
if (result) {
finishRebalance(hostId, Event.RebalanceCompleted);
} else {
finishRebalance(hostId, Event.RebalanceFailed);
}
}
} else {
// if we timed out waiting for the host to reconnect, remove host from rebalance list and mark it as failed to rebalance
// no need to do anything with the real attache
Date cutTime = DateUtil.currentGMTTime();
if (!(_hostTransferDao.isActive(hostId, new Date(cutTime.getTime() - rebalanceTimeOut)))) {
s_logger.debug("Timed out waiting for the host id=" + hostId + " to be ready to transfer, failing rebalance for this host");
_agentToTransferIds.remove(hostId);
HostTransferMapVO transferMap = _hostTransferDao.findById(hostId);
transferMap.setState(HostTransferState.TransferFailed);
_hostTransferDao.update(hostId, transferMap);
}
}
}
} else {
// TODO - change to trace level later on
if (s_logger.isDebugEnabled()) {
s_logger.debug("Found no agents to be transfered by the management server " + _nodeId);
}
}
} catch (Throwable e) {
s_logger.error("Problem with the clustered agent transfer scan check!", e);
}
}
};
}
private boolean setToWaitForRebalance(final long hostId) {
s_logger.debug("Adding agent " + hostId + " to the list of agents to transfer");
synchronized (_agentToTransferIds) {
return _agentToTransferIds.add(hostId);
}
}
private boolean rebalanceHost(final long hostId) {
HostTransferMapVO map = _hostTransferDao.findById(hostId);
HostVO host = _hostDao.findById(hostId);
boolean result = true;
if (map.getInitialOwner() == _nodeId) {
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)findAttache(hostId);
if (attache != null && !attache.getTransferMode()) {
attache.setTransferMode(true);
s_logger.debug("Putting agent id=" + hostId + " to transfer mode");
_agents.put(hostId, attache);
if (host != null && host.getRemoved() == null) {
host.setManagementServerId(null);
s_logger.debug("Updating host id=" + hostId + " with the status " + Status.Rebalance);
_hostDao.updateStatus(host, Event.StartAgentRebalance, _nodeId);
}
try {
Answer[] answer = sendRebalanceCommand(hostId, map.getFutureOwner(), Event.StartAgentRebalance);
if (answer == null) {
s_logger.warn("Host " + hostId + " failed to connect to the management server " + map.getFutureOwner() + " as a part of rebalance process");
result = false;
}
} catch (Exception ex) {
s_logger.warn("Host " + hostId + " failed to connect to the management server " + map.getFutureOwner() + " as a part of rebalance process", ex);
result = false;
}
if (result) {
s_logger.debug("Got host id=" + hostId + " from management server " + map.getFutureOwner());
}
}
} else if (map.getFutureOwner() == _nodeId) {
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Loading directly connected host " + host.getId() + "(" + host.getName() + ") as a part of rebalance process");
}
//TODO - 1) no need to do vmfullSync/storageSetup on the agent side 2) Make sure that if connection fails, host goes from Rebalance state to Alert
loadDirectlyConnectedHost(host);
} catch (Exception ex) {
s_logger.warn("Unable to load directly connected host " + host.getId() + " as a part of rebalance due to exception: ", ex);
}
}
return result;
}
private boolean finishRebalance(final long hostId, Event event) {
HostTransferMapVO map = _hostTransferDao.findById(hostId);
AgentAttache attache = findAttache(hostId);
if (attache == null) {
s_logger.debug("Unable to find attache for the host id=" + hostId + ", assuming that the agent disconnected already");
HostTransferState state = (event == Event.RebalanceCompleted) ? HostTransferState.TransferCompleted : HostTransferState.TransferFailed;
map.setState(state);
_hostTransferDao.update(hostId, map);
return true;
}
if (map.getInitialOwner() != _nodeId) {
s_logger.warn("Why finish rebalance called not by initial host owner???");
return false;
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Finishing rebalancing for the host id=" + hostId);
}
if (event == Event.RebalanceFailed) {
((ClusteredDirectAgentAttache) attache).setTransferMode(false);
s_logger.debug("Rebalance failed for the host id=" + hostId);
map.setState(HostTransferState.TransferFailed);
_hostTransferDao.update(hostId, map);
} else if (event == Event.RebalanceCompleted) {
//1) Get all the requests remove transfer attache
LinkedList<Request> requests = ((ClusteredDirectAgentAttache) attache).getRequests();
removeAgent(attache, Status.Rebalance);
//2) Create forward attache
createAttache(hostId);
//3) forward all the requests to the management server which owns the host now
if (!requests.isEmpty()) {
for (Request request : requests) {
routeToPeer(Long.toString(map.getFutureOwner()), request.getBytes());
}
}
map.setState(HostTransferState.TransferCompleted);
_hostTransferDao.update(hostId, map);
return true;
}
return true;
}
}

View File

@ -17,7 +17,13 @@
*/
package com.cloud.agent.manager;
import java.util.LinkedList;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
import com.cloud.agent.api.Command;
import com.cloud.agent.transport.Request;
import com.cloud.agent.transport.Response;
import com.cloud.exception.AgentUnavailableException;
@ -26,8 +32,10 @@ import com.cloud.resource.ServerResource;
import com.cloud.utils.exception.CloudRuntimeException;
public class ClusteredDirectAgentAttache extends DirectAgentAttache implements Routable {
private final static Logger s_logger = Logger.getLogger(ClusteredDirectAgentAttache.class);
private final ClusteredAgentManagerImpl _mgr;
private final long _nodeId;
private boolean _transferMode = false;
public ClusteredDirectAgentAttache(AgentManager agentMgr, long id, long mgmtId, ServerResource resource, boolean maintenance, ClusteredAgentManagerImpl mgr) {
super(agentMgr, id, resource, maintenance, mgr);
@ -35,6 +43,25 @@ public class ClusteredDirectAgentAttache extends DirectAgentAttache implements R
_nodeId = mgmtId;
}
public synchronized void setTransferMode(final boolean transfer) {
_transferMode = transfer;
}
@Override
protected void checkAvailability(final Command[] cmds) throws AgentUnavailableException {
if (_transferMode) {
// need to throw some other exception while agent is in rebalancing mode
for (final Command cmd : cmds) {
if (!cmd.allowCaching()) {
throw new AgentUnavailableException("Unable to send " + cmd.getClass().toString() + " because agent is in Rebalancing mode", _id);
}
}
}
super.checkAvailability(cmds);
}
@Override
public void routeToAgent(byte[] data) throws AgentUnavailableException {
Request req;
@ -47,7 +74,7 @@ public class ClusteredDirectAgentAttache extends DirectAgentAttache implements R
}
if (req instanceof Response) {
super.process(((Response)req).getAnswers());
super.process(((Response) req).getAnswers());
} else {
super.send(req);
}
@ -66,4 +93,37 @@ public class ClusteredDirectAgentAttache extends DirectAgentAttache implements R
return super.processAnswers(seq, response);
}
}
@Override
public void send(Request req, final Listener listener) throws AgentUnavailableException {
checkAvailability(req.getCommands());
if (_transferMode) {
long seq = req.getSequence();
if (s_logger.isDebugEnabled()) {
s_logger.debug(log(seq, "Holding request as the corresponding agent is in transfer mode: "));
}
synchronized (this) {
addRequest(req);
return;
}
} else {
super.send(req, listener);
}
}
public boolean getTransferMode() {
return _transferMode;
}
public LinkedList<Request> getRequests() {
if (_transferMode) {
return _requests;
} else {
return null;
}
}
}

View File

@ -62,4 +62,8 @@ public interface ClusterManager extends Manager {
* @param cmds commands to broadcast
*/
public void broadcast(long agentId, Command[] cmds);
boolean rebalanceAgent(long agentId, Event event) throws AgentUnavailableException, OperationTimedoutException;
boolean isAgentRebalanceEnabled();
}

View File

@ -50,7 +50,12 @@ import com.cloud.agent.api.Answer;
import com.cloud.agent.api.ChangeAgentCommand;
import com.cloud.agent.api.Command;
import com.cloud.agent.manager.Commands;
import com.cloud.cluster.ManagementServerHost.State;
import com.cloud.cluster.agentlb.HostTransferMapVO;
import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState;
import com.cloud.cluster.agentlb.dao.HostTransferMapDao;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.configuration.Config;
import com.cloud.configuration.dao.ConfigurationDao;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.OperationTimedoutException;
@ -64,6 +69,7 @@ import com.cloud.utils.Profiler;
import com.cloud.utils.PropertiesUtil;
import com.cloud.utils.component.Adapters;
import com.cloud.utils.component.ComponentLocator;
import com.cloud.utils.component.Inject;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.Transaction;
@ -74,12 +80,14 @@ import com.cloud.utils.mgmt.JmxUtil;
import com.cloud.utils.net.NetUtils;
import com.google.gson.Gson;
@Local(value={ClusterManager.class})
@Local(value = { ClusterManager.class })
public class ClusterManagerImpl implements ClusterManager {
private static final Logger s_logger = Logger.getLogger(ClusterManagerImpl.class);
private static final int EXECUTOR_SHUTDOWN_TIMEOUT = 1000; // 1 second
private final List<ClusterManagerListener> listeners = new ArrayList<ClusterManagerListener>();
private final Map<Long, ManagementServerHostVO> activePeers = new HashMap<Long, ManagementServerHostVO>();
private int heartbeatInterval = ClusterManager.DEFAULT_HEARTBEAT_INTERVAL;
@ -90,11 +98,12 @@ public class ClusterManagerImpl implements ClusterManager {
private final Gson gson;
private AgentManager _agentMgr;
@Inject
private ClusteredAgentRebalanceService _rebalanceService;
private final ScheduledExecutorService _heartbeatScheduler = Executors.newScheduledThreadPool(1, new NamedThreadFactory("Cluster-Heartbeat"));
private final ExecutorService _notificationExecutor = Executors.newFixedThreadPool(1, new NamedThreadFactory("Cluster-Notification"));
private List<ClusterManagerMessage> _notificationMsgs = new ArrayList<ClusterManagerMessage>();
private final List<ClusterManagerMessage> _notificationMsgs = new ArrayList<ClusterManagerMessage>();
private Connection _heartbeatConnection = null;
private final ExecutorService _executor;
@ -103,6 +112,7 @@ public class ClusterManagerImpl implements ClusterManager {
private ManagementServerHostDao _mshostDao;
private HostDao _hostDao;
private HostTransferMapDao _hostTransferDao;
//
// pay attention to _mshostId and _msid
@ -110,13 +120,16 @@ public class ClusterManagerImpl implements ClusterManager {
// _msid is the unique persistent identifier that peer name is based upon
//
private Long _mshostId = null;
protected long _msid = ManagementServerNode.getManagementServerId();
protected long _msId = ManagementServerNode.getManagementServerId();
protected long _runId = System.currentTimeMillis();
private boolean _peerScanInited = false;
private String _name;
private String _clusterNodeIP = "127.0.0.1";
private boolean _agentLBEnabled = false;
private State _state = State.Starting;
private final Object stateLock = new Object();
public ClusterManagerImpl() {
clusterPeers = new HashMap<String, ClusterService>();
@ -131,8 +144,7 @@ public class ClusterManagerImpl implements ClusterManager {
}
@Override
public Answer[] sendToAgent(Long hostId, Command [] cmds, boolean stopOnError)
throws AgentUnavailableException, OperationTimedoutException {
public Answer[] sendToAgent(Long hostId, Command[] cmds, boolean stopOnError) throws AgentUnavailableException, OperationTimedoutException {
Commands commands = new Commands(stopOnError ? OnError.Stop : OnError.Continue);
for (Command cmd : cmds) {
commands.addCommand(cmd);
@ -141,8 +153,7 @@ public class ClusterManagerImpl implements ClusterManager {
}
@Override
public long sendToAgent(Long hostId, Command[] cmds, boolean stopOnError, Listener listener)
throws AgentUnavailableException {
public long sendToAgent(Long hostId, Command[] cmds, boolean stopOnError, Listener listener) throws AgentUnavailableException {
Commands commands = new Commands(stopOnError ? OnError.Stop : OnError.Continue);
for (Command cmd : cmds) {
commands.addCommand(cmd);
@ -163,7 +174,7 @@ public class ClusterManagerImpl implements ClusterManager {
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Propagating agent change request event:" + event.toString() + " to agent:"+ agentId);
s_logger.debug("Propagating agent change request event:" + event.toString() + " to agent:" + agentId);
}
Command[] cmds = new Command[1];
cmds[0] = new ChangeAgentCommand(agentId, event);
@ -182,13 +193,14 @@ public class ClusterManagerImpl implements ClusterManager {
/**
* called by DatabaseUpgradeChecker to see if there are other peers running.
* @param notVersion If version is passed in, the peers CANNOT be running at this
* version. If version is null, return true if any peer is
* running regardless of version.
*
* @param notVersion
* If version is passed in, the peers CANNOT be running at this version. If version is null, return true if any
* peer is running regardless of version.
* @return true if there are peers running and false if not.
*/
public static final boolean arePeersRunning(String notVersion) {
return false; //TODO: Leaving this for Kelven to take care of.
return false; // TODO: Leaving this for Kelven to take care of.
}
@Override
@ -248,11 +260,9 @@ public class ClusterManagerImpl implements ClusterManager {
s_logger.error("Exception on parsing gson package from remote call to " + strPeer);
}
}
return null;
} catch (RemoteException e) {
invalidatePeerService(strPeer);
if(s_logger.isInfoEnabled()) {
s_logger.info("Exception on remote execution, peer: " + strPeer + ", iteration: "
+ i + ", exception message :" + e.getMessage());
@ -266,7 +276,6 @@ public class ClusterManagerImpl implements ClusterManager {
@Override
public long executeAsync(String strPeer, long agentId, Command[] cmds, boolean stopOnError, Listener listener) {
ClusterService peerService = null;
if(s_logger.isDebugEnabled()) {
@ -280,7 +289,6 @@ public class ClusterManagerImpl implements ClusterManager {
} catch (RemoteException e) {
s_logger.error("Unable to get cluster service on peer : " + strPeer);
}
if(peerService != null) {
try {
long seq = 0;
@ -291,7 +299,6 @@ public class ClusterManagerImpl implements ClusterManager {
long startTick = System.currentTimeMillis();
seq = peerService.executeAsync(getSelfPeerName(), agentId, gson.toJson(cmds, Command[].class), stopOnError);
if(seq > 0) {
if(s_logger.isDebugEnabled()) {
s_logger.debug("Completed Async " + getSelfPeerName() + " -> " + strPeer + "." + agentId
@ -321,7 +328,6 @@ public class ClusterManagerImpl implements ClusterManager {
@Override
public boolean onAsyncResult(String executingPeer, long agentId, long seq, Answer[] answers) {
if(s_logger.isDebugEnabled()) {
s_logger.debug("Process Async-call result from remote peer " + executingPeer + ", {" +
agentId + "-" + seq + "} answers: " + (answers != null ? gson.toJson(answers, Answer[].class): "null"));
@ -373,7 +379,6 @@ public class ClusterManagerImpl implements ClusterManager {
@Override
public boolean forwardAnswer(String targetPeer, long agentId, long seq, Answer[] answers) {
if(s_logger.isDebugEnabled()) {
s_logger.debug("Forward -> " + targetPeer + " Async-call answer {" + agentId + "-" + seq +
"} " + (answers != null? gson.toJson(answers, Answer[].class):""));
@ -444,7 +449,7 @@ public class ClusterManagerImpl implements ClusterManager {
@Override
public String getSelfPeerName() {
return Long.toString(_msid);
return Long.toString(_msId);
}
@Override
@ -455,15 +460,13 @@ public class ClusterManagerImpl implements ClusterManager {
@Override
public void registerListener(ClusterManagerListener listener) {
// Note : we don't check duplicates
synchronized(listeners) {
listeners.add(listener);
synchronized (listeners) {
}
}
@Override
public void unregisterListener(ClusterManagerListener listener) {
synchronized(listeners) {
listeners.remove(listener);
}
}
@ -571,11 +574,22 @@ public class ClusterManagerImpl implements ClusterManager {
Connection conn = getHeartbeatConnection();
_mshostDao.update(conn, _mshostId, getCurrentRunId(), DateUtil.currentGMTTime());
if(s_logger.isTraceEnabled()) {
// for cluster in Starting state check if there are any agents being transfered
if (_state == State.Starting) {
synchronized (stateLock) {
if (isClusterReadyToStart()) {
_mshostDao.update(conn, _mshostId, getCurrentRunId(), State.Up, DateUtil.currentGMTTime());
_state = State.Up;
stateLock.notifyAll();
}
}
}
if (s_logger.isTraceEnabled()) {
s_logger.trace("Cluster manager peer-scan, id:" + _mshostId);
}
if(!_peerScanInited) {
if (!_peerScanInited) {
_peerScanInited = true;
initPeerScan(conn);
}
@ -604,14 +618,41 @@ public class ClusterManagerImpl implements ClusterManager {
s_logger.error("Problem with the cluster heartbeat!", e);
}
}
private boolean isClusterReadyToStart() {
boolean isReady = false;
int transferCount = _hostTransferDao.listHostsJoiningCluster(_msId).size();
if (transferCount == 0) {
//Check how many servers got transfered successfully
List<HostTransferMapVO> rebalancedHosts = _hostTransferDao.listBy(_msId, HostTransferState.TransferCompleted);
s_logger.debug(rebalancedHosts.size() + " hosts joined the cluster " + _msId + " as a result of rebalance process");
for (HostTransferMapVO host : rebalancedHosts) {
_hostTransferDao.remove(host.getId());
}
//Check how many servers failed to transfer
List<HostTransferMapVO> failedToRebalanceHosts = _hostTransferDao.listBy(_msId, HostTransferState.TransferFailed);
s_logger.debug(failedToRebalanceHosts.size() + " hosts failed to join the cluster " + _msId + " as a result of rebalance process");
for (HostTransferMapVO host : failedToRebalanceHosts) {
_hostTransferDao.remove(host.getId());
}
s_logger.debug("There are no hosts currently joining cluser msid=" + _msId + ", so management server is ready to start");
isReady = true;
} else if (s_logger.isDebugEnabled()) {
//TODO : change to trace mode later
s_logger.debug("There are " + transferCount + " agents currently joinging the cluster " + _msId);
}
return isReady;
}
};
}
private boolean isRootCauseConnectionRelated(Throwable e) {
while(e != null) {
if(e instanceof com.mysql.jdbc.CommunicationsException || e instanceof com.mysql.jdbc.exceptions.jdbc4.CommunicationsException) {
while (e != null) {
if (e instanceof com.mysql.jdbc.CommunicationsException || e instanceof com.mysql.jdbc.exceptions.jdbc4.CommunicationsException)
return true;
}
e = e.getCause();
}
@ -857,7 +898,7 @@ public class ClusterManagerImpl implements ClusterManager {
@Override @DB
public boolean start() {
if(s_logger.isInfoEnabled()) {
s_logger.info("Starting cluster manager, msid : " + _msid);
s_logger.info("Starting cluster manager, msid : " + _msId);
}
Transaction txn = Transaction.currentTxn();
@ -867,10 +908,10 @@ public class ClusterManagerImpl implements ClusterManager {
final Class<?> c = this.getClass();
String version = c.getPackage().getImplementationVersion();
ManagementServerHostVO mshost = _mshostDao.findByMsid(_msid);
if(mshost == null) {
ManagementServerHostVO mshost = _mshostDao.findByMsid(_msId);
if (mshost == null) {
mshost = new ManagementServerHostVO();
mshost.setMsid(_msid);
mshost.setMsid(_msId);
mshost.setRunid(this.getCurrentRunId());
mshost.setName(NetUtils.getHostName());
mshost.setVersion(version);
@ -879,32 +920,49 @@ public class ClusterManagerImpl implements ClusterManager {
mshost.setLastUpdateTime(DateUtil.currentGMTTime());
mshost.setRemoved(null);
mshost.setAlertCount(0);
mshost.setState(ManagementServerNode.State.Up);
mshost.setState(ManagementServerHost.State.Starting);
_mshostDao.persist(mshost);
if(s_logger.isInfoEnabled()) {
s_logger.info("New instance of management server msid " + _msid + " is being started");
if (s_logger.isInfoEnabled()) {
s_logger.info("New instance of management server msid " + _msId + " is being started");
}
} else {
if(s_logger.isInfoEnabled()) {
s_logger.info("Management server " + _msid + " is being started");
if (s_logger.isInfoEnabled()) {
s_logger.info("Management server " + _msId + " is being started");
}
_mshostDao.update(mshost.getId(), getCurrentRunId(), NetUtils.getHostName(), version,
_clusterNodeIP, _currentServiceAdapter.getServicePort(), DateUtil.currentGMTTime());
_mshostDao.update(mshost.getId(), getCurrentRunId(), NetUtils.getHostName(), version, _clusterNodeIP, _currentServiceAdapter.getServicePort(), DateUtil.currentGMTTime());
}
txn.commit();
_mshostId = mshost.getId();
if(s_logger.isInfoEnabled()) {
s_logger.info("Management server (host id : " + _mshostId + ") is available at " + _clusterNodeIP + ":" + _currentServiceAdapter.getServicePort());
if (s_logger.isInfoEnabled()) {
s_logger.info("Management server (host id : " + _mshostId + ") is being started at " + _clusterNodeIP + ":" + _currentServiceAdapter.getServicePort());
}
// use seperated thread for heartbeat updates
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), heartbeatInterval,
heartbeatInterval, TimeUnit.MILLISECONDS);
// use seperate thread for heartbeat updates
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), heartbeatInterval, heartbeatInterval, TimeUnit.MILLISECONDS);
_notificationExecutor.submit(getNotificationTask());
// Do agent rebalancing
if (_agentLBEnabled) {
s_logger.debug("Management server " + _msId + " is asking other peers to rebalance their agents");
_rebalanceService.startRebalanceAgents();
}
//wait here for heartbeat task to update the host state
try {
synchronized (stateLock) {
while (_state != State.Up) {
stateLock.wait();
}
}
} catch (final InterruptedException e) {
} finally {
s_logger.debug("Agent rebalancing is completed, management server " + _mshostId + " is ready");
}
} catch (Throwable e) {
s_logger.error("Unexpected exception : ", e);
txn.rollback();
@ -912,8 +970,8 @@ public class ClusterManagerImpl implements ClusterManager {
throw new CloudRuntimeException("Unable to initialize cluster info into database");
}
if(s_logger.isInfoEnabled()) {
s_logger.info("Cluster manager is started");
if (s_logger.isInfoEnabled()) {
s_logger.info("Cluster manager was started successfully");
}
return true;
@ -955,15 +1013,20 @@ public class ClusterManagerImpl implements ClusterManager {
}
_mshostDao = locator.getDao(ManagementServerHostDao.class);
if(_mshostDao == null) {
if (_mshostDao == null) {
throw new ConfigurationException("Unable to get " + ManagementServerHostDao.class.getName());
}
_hostDao = locator.getDao(HostDao.class);
if(_hostDao == null) {
if (_hostDao == null) {
throw new ConfigurationException("Unable to get " + HostDao.class.getName());
}
_hostTransferDao = locator.getDao(HostTransferMapDao.class);
if (_hostTransferDao == null) {
throw new ConfigurationException("Unable to get agent transfer map dao");
}
ConfigurationDao configDao = locator.getDao(ConfigurationDao.class);
if (configDao == null) {
throw new ConfigurationException("Unable to get the configuration dao.");
@ -972,12 +1035,12 @@ public class ClusterManagerImpl implements ClusterManager {
Map<String, String> configs = configDao.getConfiguration("management-server", params);
String value = configs.get("cluster.heartbeat.interval");
if(value != null) {
if (value != null) {
heartbeatInterval = NumbersUtil.parseInt(value, ClusterManager.DEFAULT_HEARTBEAT_INTERVAL);
}
value = configs.get("cluster.heartbeat.threshold");
if(value != null) {
if (value != null) {
heartbeatThreshold = NumbersUtil.parseInt(value, ClusterManager.DEFAULT_HEARTBEAT_THRESHOLD);
}
@ -991,7 +1054,7 @@ public class ClusterManagerImpl implements ClusterManager {
throw new ConfigurationException("Unable to load db.properties content");
}
_clusterNodeIP = dbProps.getProperty("cluster.node.IP");
if(_clusterNodeIP == null) {
if (_clusterNodeIP == null) {
_clusterNodeIP = "127.0.0.1";
}
_clusterNodeIP = _clusterNodeIP.trim();
@ -1017,6 +1080,9 @@ public class ClusterManagerImpl implements ClusterManager {
throw new ConfigurationException("Unable to set current cluster service adapter");
}
_agentLBEnabled = Boolean.valueOf(configDao.getValue(Config.AgentLbEnable.key()));
checkConflicts();
if(s_logger.isInfoEnabled()) {
@ -1027,7 +1093,7 @@ public class ClusterManagerImpl implements ClusterManager {
@Override
public long getManagementNodeId() {
return _msid;
return _msId;
}
@Override
@ -1124,11 +1190,23 @@ public class ClusterManagerImpl implements ClusterManager {
s_logger.error(msg);
throw new ConfigurationException(msg);
} else {
String msg = "Detected that another management node with the same IP " + peer.getServiceIP() + " is considered as running in DB, however it is not pingable, we will continue cluster initialization with this management server node";
String msg = "Detected that another management node with the same IP " + peer.getServiceIP()
+ " is considered as running in DB, however it is not pingable, we will continue cluster initialization with this management server node";
s_logger.info(msg);
}
}
}
}
}
@Override
public boolean rebalanceAgent(long agentId, Event event) throws AgentUnavailableException, OperationTimedoutException {
return _rebalanceService.executeRebalanceRequest(agentId, event);
}
@Override
public boolean isAgentRebalanceEnabled() {
return _agentLBEnabled;
}
}

View File

@ -42,4 +42,5 @@ public class ClusterManagerMessage {
public List<ManagementServerHostVO> getNodes() {
return _nodes;
}
}

View File

@ -38,6 +38,7 @@ import com.cloud.agent.api.Answer;
import com.cloud.agent.api.ChangeAgentAnswer;
import com.cloud.agent.api.ChangeAgentCommand;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.TransferAgentCommand;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.serializer.GsonHelper;
@ -204,6 +205,29 @@ public class ClusterServiceServletHttpHandler implements HttpRequestHandler {
Answer[] answers = new Answer[1];
answers[0] = new ChangeAgentAnswer(cmd, result);
return gson.toJson(answers);
} else if (cmds.length == 1 && cmds[0] instanceof TransferAgentCommand) {
TransferAgentCommand cmd = (TransferAgentCommand) cmds[0];
if (s_logger.isDebugEnabled()) {
s_logger.debug("Intercepting command for agent rebalancing: agent " + cmd.getAgentId() + " event: " + cmd.getEvent());
}
boolean result = false;
try {
result = manager.rebalanceAgent(cmd.getAgentId(), cmd.getEvent());
if (s_logger.isDebugEnabled()) {
s_logger.debug("Result is " + result);
}
} catch (AgentUnavailableException e) {
s_logger.warn("Agent is unavailable", e);
return null;
} catch (OperationTimedoutException e) {
s_logger.warn("Operation timed out", e);
return null;
}
Answer[] answers = new Answer[1];
answers[0] = new Answer(cmd, result, null);
return gson.toJson(answers);
}
try {

View File

@ -37,7 +37,7 @@ public class ClusterServiceServletImpl implements ClusterService {
private String serviceUrl;
private Gson gson;
private final Gson gson;
public ClusterServiceServletImpl() {
gson = GsonHelper.getGson();
@ -103,7 +103,6 @@ public class ClusterServiceServletImpl implements ClusterService {
+ ", excutingPeer: " + executingPeer
+ ", seq: " + seq + ", gsonPackage: " + gsonPackage);
}
HttpClient client = new HttpClient();
PostMethod method = new PostMethod(serviceUrl);
@ -169,6 +168,7 @@ public class ClusterServiceServletImpl implements ClusterService {
} catch(Throwable e) {
s_logger.error("Exception from : " + serviceUrl + ", method : " + method.getParameter("method") + ", exception :", e);
}
return result;
}

View File

@ -0,0 +1,14 @@
package com.cloud.cluster;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.host.Status.Event;
public interface ClusteredAgentRebalanceService {
public static final int DEFAULT_TRANSFER_CHECK_INTERVAL = 10000;
void startRebalanceAgents();
boolean executeRebalanceRequest(long agentId, Event event) throws AgentUnavailableException, OperationTimedoutException;
}

View File

@ -44,47 +44,58 @@ public class DummyClusterManagerImpl implements ClusterManager {
private String _name;
private final String _clusterNodeIP = "127.0.0.1";
@Override
public Answer[] execute(String strPeer, long agentId, Command [] cmds, boolean stopOnError) {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public long executeAsync(String strPeer, long agentId, Command[] cmds, boolean stopOnError, Listener listener) {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public boolean onAsyncResult(String executingPeer, long agentId, long seq, Answer[] answers) {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public boolean forwardAnswer(String targetPeer, long agentId, long seq, Answer[] answers) {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public Answer[] sendToAgent(Long hostId, Command [] cmds, boolean stopOnError)
throws AgentUnavailableException, OperationTimedoutException {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public long sendToAgent(Long hostId, Command[] cmds, boolean stopOnError, Listener listener) throws AgentUnavailableException {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public boolean executeAgentUserRequest(long agentId, Event event) throws AgentUnavailableException {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public Boolean propagateAgentEvent(long agentId, Event event) throws AgentUnavailableException {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public int getHeartbeatThreshold() {
return ClusterManager.DEFAULT_HEARTBEAT_INTERVAL;
}
@Override
public long getManagementNodeId() {
return _id;
}
@Override
public long getCurrentRunId() {
return _runId;
}
@ -94,29 +105,36 @@ public class DummyClusterManagerImpl implements ClusterManager {
return null;
}
@Override
public String getSelfPeerName() {
return Long.toString(_id);
}
@Override
public String getSelfNodeIP() {
return _clusterNodeIP;
}
@Override
public boolean isManagementNodeAlive(long msid) {
return true;
}
@Override
public boolean pingManagementNode(long msid) {
return false;
}
@Override
public String getPeerName(long agentHostId) {
throw new CloudRuntimeException("Unsupported feature");
}
@Override
public void registerListener(ClusterManagerListener listener) {
}
@Override
public void unregisterListener(ClusterManagerListener listener) {
}
@ -147,4 +165,14 @@ public class DummyClusterManagerImpl implements ClusterManager {
public boolean stop() {
return true;
}
@Override
public boolean rebalanceAgent(long agentId, Event event) throws AgentUnavailableException, OperationTimedoutException {
return false;
}
@Override
public boolean isAgentRebalanceEnabled() {
return false;
}
}

View File

@ -35,7 +35,7 @@ import com.cloud.utils.db.GenericDao;
@Entity
@Table(name="mshost")
public class ManagementServerHostVO {
public class ManagementServerHostVO implements ManagementServerHost{
@Id
@GeneratedValue(strategy=GenerationType.IDENTITY)
@ -53,7 +53,7 @@ public class ManagementServerHostVO {
@Column(name="state", updatable = true, nullable=false)
@Enumerated(value=EnumType.STRING)
private ManagementServerNode.State state;
private ManagementServerHost.State state;
@Column(name="version", updatable=true, nullable=true)
private String version;
@ -101,6 +101,7 @@ public class ManagementServerHostVO {
this.runid = runid;
}
@Override
public long getMsid() {
return msid;
}
@ -117,14 +118,16 @@ public class ManagementServerHostVO {
this.name = name;
}
public ManagementServerNode.State getState() {
@Override
public ManagementServerHost.State getState() {
return this.state;
}
public void setState(ManagementServerNode.State state) {
public void setState(ManagementServerHost.State state) {
this.state = state;
}
@Override
public String getVersion() {
return version;
}

View File

@ -0,0 +1,31 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.cluster.agentlb;
import java.util.List;
import com.cloud.host.HostVO;
import com.cloud.utils.component.Adapter;
public interface AgentLoadBalancerPlanner extends Adapter{
List<HostVO> getHostsToRebalance(long msId, long avLoad);
}

View File

@ -0,0 +1,167 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.cluster.agentlb;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.ejb.Local;
import javax.naming.ConfigurationException;
import org.apache.log4j.Logger;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.utils.component.Inject;
@Local(value=AgentLoadBalancerPlanner.class)
public class ClusterBasedAgentLoadBalancerPlanner implements AgentLoadBalancerPlanner{
private static final Logger s_logger = Logger.getLogger(AgentLoadBalancerPlanner.class);
private String _name;
@Inject HostDao _hostDao = null;
@Override
public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
_name = name;
return true;
}
@Override
public String getName() {
return _name;
}
@Override
public boolean start() {
return true;
}
@Override
public boolean stop() {
return true;
}
@Override
public List<HostVO> getHostsToRebalance(long msId, long avLoad) {
List<HostVO> allHosts = _hostDao.listByManagementServer(msId);
if (allHosts.size() <= avLoad) {
s_logger.debug("Agent load for management server " + msId + " doesn't exceed av load " + avLoad + "; so it doesn't participate in agent rebalancing process");
return null;
}
List<HostVO> directHosts = _hostDao.listDirectHostsBy(msId, Status.Up);
if (directHosts.isEmpty()) {
s_logger.debug("No direct agents in status " + Status.Up + " exist for the management server " + msId + "; so it doesn't participate in agent rebalancing process");
return null;
}
Map<Long, List<HostVO>> hostToClusterMap = new HashMap<Long, List<HostVO>>();
for (HostVO directHost : directHosts) {
Long clusterId = directHost.getClusterId();
List<HostVO> directHostsPerCluster = null;
if (!hostToClusterMap.containsKey(clusterId)) {
directHostsPerCluster = new ArrayList<HostVO>();
} else {
directHostsPerCluster = hostToClusterMap.get(clusterId);
}
directHostsPerCluster.add(directHost);
hostToClusterMap.put(clusterId, directHostsPerCluster);
}
hostToClusterMap = sortByClusterSize(hostToClusterMap);
long hostsToGive = allHosts.size() - avLoad;
long hostsLeftToGive = hostsToGive;
long hostsLeft = directHosts.size();
List<HostVO> hostsToReturn = new ArrayList<HostVO>();
int count = 0;
for (Long cluster : hostToClusterMap.keySet()) {
List<HostVO> hostsInCluster = hostToClusterMap.get(cluster);
hostsLeft = hostsLeft - hostsInCluster.size();
count++;
if (hostsToReturn.size() < hostsToGive) {
s_logger.debug("Trying cluster id=" + cluster);
if (hostsInCluster.size() > hostsLeftToGive) {
if (hostsLeft >= hostsLeftToGive) {
s_logger.debug("Skipping cluster id=" + cluster + " as it has more hosts that we need: " + hostsInCluster.size() + " vs " + hostsLeftToGive);
continue;
} else {
if (count == hostToClusterMap.size()) {
//get all hosts that are needed from the cluster
for (int i=0; i <= hostsLeftToGive; i++) {
hostsToReturn.add(hostsInCluster.get(i));
hostsLeftToGive = hostsLeftToGive - 1;
s_logger.debug("Taking host " + hostsInCluster.get(i) + " from cluster " + cluster);
}
}
break;
}
} else {
s_logger.debug("Taking all " + hostsInCluster.size() + " hosts: " + hostsInCluster + " from cluster id=" + cluster);
hostsToReturn.addAll(hostsInCluster);
hostsLeftToGive = hostsLeftToGive - hostsInCluster.size();
}
} else {
break;
}
}
return hostsToReturn;
}
public static LinkedHashMap<Long, List<HostVO>> sortByClusterSize(final Map<Long, List<HostVO>> hostToClusterMap) {
List<Long> keys = new ArrayList<Long>();
keys.addAll(hostToClusterMap.keySet());
Collections.sort(keys, new Comparator<Long>() {
@Override
public int compare(Long o1, Long o2) {
List<HostVO> v1 = hostToClusterMap.get(o1);
List<HostVO> v2 = hostToClusterMap.get(o2);
if (v1 == null) {
return (v2 == null) ? 0 : 1;
}
if (v1.size() < v2.size()) {
return 1;
} else {
return 0;
}
}
});
LinkedHashMap<Long, List<HostVO>> sortedMap = new LinkedHashMap<Long, List<HostVO>>();
for (Long key : keys) {
sortedMap.put(key, hostToClusterMap.get(key));
}
return sortedMap;
}
}

View File

@ -0,0 +1,99 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.cluster.agentlb;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Id;
import javax.persistence.Inheritance;
import javax.persistence.InheritanceType;
import javax.persistence.Table;
import com.cloud.utils.db.GenericDao;
@Entity
@Table(name = "op_host_transfer")
@Inheritance(strategy = InheritanceType.TABLE_PER_CLASS)
public class HostTransferMapVO {
public enum HostTransferState {
TransferRequested, TransferStarted, TransferCompleted, TransferFailed;
}
@Id
@Column(name = "id")
private long id;
@Column(name = "initial_mgmt_server_id")
private long initialOwner;
@Column(name = "future_mgmt_server_id")
private long futureOwner;
@Column(name = "state")
private HostTransferState state;
@Column(name=GenericDao.CREATED_COLUMN)
private Date created;
public HostTransferMapVO(long hostId, long initialOwner, long futureOwner) {
this.id = hostId;
this.initialOwner = initialOwner;
this.futureOwner = futureOwner;
this.state = HostTransferState.TransferRequested;
}
protected HostTransferMapVO() {
}
public long getInitialOwner() {
return initialOwner;
}
public long getFutureOwner() {
return futureOwner;
}
public HostTransferState getState() {
return state;
}
public void setInitialOwner(long initialOwner) {
this.initialOwner = initialOwner;
}
public void setFutureOwner(long futureOwner) {
this.futureOwner = futureOwner;
}
public void setState(HostTransferState state) {
this.state = state;
}
public long getId() {
return id;
}
public Date getCreated() {
return created;
}
}

View File

@ -0,0 +1,41 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.cluster.agentlb.dao;
import java.util.Date;
import java.util.List;
import com.cloud.cluster.agentlb.HostTransferMapVO;
import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState;
import com.cloud.utils.db.GenericDao;
public interface HostTransferMapDao extends GenericDao<HostTransferMapVO, Long> {
List<HostTransferMapVO> listHostsLeavingCluster(long clusterId);
List<HostTransferMapVO> listHostsJoiningCluster(long clusterId);
HostTransferMapVO startAgentTransfering(long hostId, long currentOwner, long futureOwner);
boolean completeAgentTransfering(long hostId, boolean success);
List<HostTransferMapVO> listBy(long futureOwnerId, HostTransferState state);
boolean isActive(long hostId, Date cutTime);
}

View File

@ -0,0 +1,125 @@
/**
* Copyright (C) 2010 Cloud.com, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.cluster.agentlb.dao;
import java.util.Date;
import java.util.List;
import javax.ejb.Local;
import org.apache.log4j.Logger;
import com.cloud.cluster.agentlb.HostTransferMapVO;
import com.cloud.cluster.agentlb.HostTransferMapVO.HostTransferState;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
@Local(value = { HostTransferMapDao.class })
@DB(txn = false)
public class HostTransferMapDaoImpl extends GenericDaoBase<HostTransferMapVO, Long> implements HostTransferMapDao {
private static final Logger s_logger = Logger.getLogger(HostTransferMapDaoImpl.class);
protected final SearchBuilder<HostTransferMapVO> AllFieldsSearch;
protected final SearchBuilder<HostTransferMapVO> IntermediateStateSearch;
protected final SearchBuilder<HostTransferMapVO> InactiveSearch;
public HostTransferMapDaoImpl() {
AllFieldsSearch = createSearchBuilder();
AllFieldsSearch.and("id", AllFieldsSearch.entity().getId(), SearchCriteria.Op.EQ);
AllFieldsSearch.and("initialOwner", AllFieldsSearch.entity().getInitialOwner(), SearchCriteria.Op.EQ);
AllFieldsSearch.and("futureOwner", AllFieldsSearch.entity().getFutureOwner(), SearchCriteria.Op.EQ);
AllFieldsSearch.and("state", AllFieldsSearch.entity().getState(), SearchCriteria.Op.EQ);
AllFieldsSearch.done();
IntermediateStateSearch = createSearchBuilder();
IntermediateStateSearch.and("futureOwner", IntermediateStateSearch.entity().getFutureOwner(), SearchCriteria.Op.EQ);
IntermediateStateSearch.and("state", IntermediateStateSearch.entity().getState(), SearchCriteria.Op.NOTIN);
IntermediateStateSearch.done();
InactiveSearch = createSearchBuilder();
InactiveSearch.and("created", InactiveSearch.entity().getCreated(), SearchCriteria.Op.LTEQ);
InactiveSearch.and("id", InactiveSearch.entity().getId(), SearchCriteria.Op.EQ);
InactiveSearch.and("state", InactiveSearch.entity().getState(), SearchCriteria.Op.EQ);
InactiveSearch.done();
}
@Override
public List<HostTransferMapVO> listHostsLeavingCluster(long clusterId) {
SearchCriteria<HostTransferMapVO> sc = IntermediateStateSearch.create();
sc.setParameters("initialOwner", clusterId);
sc.setParameters("state", HostTransferState.TransferRequested, HostTransferState.TransferStarted);
return listBy(sc);
}
@Override
public List<HostTransferMapVO> listHostsJoiningCluster(long clusterId) {
SearchCriteria<HostTransferMapVO> sc = IntermediateStateSearch.create();
sc.setParameters("futureOwner", clusterId);
sc.setParameters("state", HostTransferState.TransferRequested, HostTransferState.TransferStarted);
return listBy(sc);
}
@Override
public HostTransferMapVO startAgentTransfering(long hostId, long initialOwner, long futureOwner) {
HostTransferMapVO transfer = new HostTransferMapVO(hostId, initialOwner, futureOwner);
return persist(transfer);
}
@Override
public boolean completeAgentTransfering(long hostId, boolean success) {
HostTransferMapVO transfer = findById(hostId);
if (success) {
transfer.setState(HostTransferState.TransferCompleted);
} else {
transfer.setState(HostTransferState.TransferFailed);
}
return update(hostId, transfer);
}
@Override
public List<HostTransferMapVO> listBy(long futureOwnerId, HostTransferState state) {
SearchCriteria<HostTransferMapVO> sc = AllFieldsSearch.create();
sc.setParameters("futureOwner", futureOwnerId);
sc.setParameters("state", state);
return listBy(sc);
}
@Override
public boolean isActive(long hostId, Date cutTime) {
SearchCriteria<HostTransferMapVO> sc = InactiveSearch.create();
sc.setParameters("id", hostId);
sc.setParameters("state", HostTransferState.TransferRequested);
sc.setParameters("created", cutTime);
if (listBy(sc).isEmpty()) {
return true;
} else {
return false;
}
}
}

View File

@ -22,10 +22,12 @@ import java.sql.Connection;
import java.util.Date;
import java.util.List;
import com.cloud.cluster.ManagementServerHost.State;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.utils.db.GenericDao;
public interface ManagementServerHostDao extends GenericDao<ManagementServerHostVO, Long> {
@Override
boolean remove(Long id);
ManagementServerHostVO findByMsid(long msid);
@ -41,4 +43,6 @@ public interface ManagementServerHostDao extends GenericDao<ManagementServerHost
void invalidateRunSession(Connection conn, long id, long runid);
List<ManagementServerHostVO> getActiveList(Connection conn, Date cutTime);
List<ManagementServerHostVO> getInactiveList(Connection conn, Date cutTime);
void update(Connection conn, long id, long runId, State state, Date lastUpdate);
}

View File

@ -30,8 +30,9 @@ import javax.ejb.Local;
import org.apache.log4j.Logger;
import com.cloud.cluster.ClusterInvalidSessionException;
import com.cloud.cluster.ManagementServerHost;
import com.cloud.cluster.ManagementServerHost.State;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.ManagementServerNode;
import com.cloud.utils.DateUtil;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GenericDaoBase;
@ -48,6 +49,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
private final SearchBuilder<ManagementServerHostVO> ActiveSearch;
private final SearchBuilder<ManagementServerHostVO> InactiveSearch;
@Override
public void update(Connection conn, long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate) {
PreparedStatement pstmt = null;
try {
@ -75,6 +77,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public void update(Connection conn, long id, long runid, Date lastUpdate) {
PreparedStatement pstmt = null;
try {
@ -101,6 +104,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public void invalidateRunSession(Connection conn, long id, long runid) {
PreparedStatement pstmt = null;
try {
@ -123,6 +127,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public List<ManagementServerHostVO> getActiveList(Connection conn, Date cutTime) {
Transaction txn = Transaction.openNew("getActiveList", conn);
try {
@ -135,6 +140,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public List<ManagementServerHostVO> getInactiveList(Connection conn, Date cutTime) {
Transaction txn = Transaction.openNew("getInactiveList", conn);
try {
@ -147,6 +153,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public ManagementServerHostVO findByMsid(long msid) {
SearchCriteria<ManagementServerHostVO> sc = MsIdSearch.create();
sc.setParameters("msid", msid);
@ -158,6 +165,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
return null;
}
@Override
@DB
public void update(long id, long runid, String name, String version, String serviceIP, int servicePort, Date lastUpdate) {
Transaction txn = Transaction.currentTxn();
@ -165,14 +173,15 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
try {
txn.start();
pstmt = txn.prepareAutoCloseStatement("update mshost set name=?, version=?, service_ip=?, service_port=?, last_update=?, removed=null, alert_count=0, runid=? where id=?");
pstmt = txn.prepareAutoCloseStatement("update mshost set name=?, version=?, service_ip=?, service_port=?, last_update=?, removed=null, alert_count=0, runid=?, state=? where id=?");
pstmt.setString(1, name);
pstmt.setString(2, version);
pstmt.setString(3, serviceIP);
pstmt.setInt(4, servicePort);
pstmt.setString(5, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), lastUpdate));
pstmt.setLong(6, runid);
pstmt.setLong(7, id);
pstmt.setString(7, State.Starting.toString());
pstmt.setLong(8, id);
pstmt.executeUpdate();
txn.commit();
@ -182,6 +191,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
@DB
public boolean remove(Long id) {
Transaction txn = Transaction.currentTxn();
@ -190,7 +200,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
txn.start();
ManagementServerHostVO msHost = findById(id);
msHost.setState(ManagementServerNode.State.Down);
msHost.setState(ManagementServerHost.State.Down);
super.remove(id);
txn.commit();
@ -203,6 +213,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
return false;
}
@Override
@DB
public void update(long id, long runid, Date lastUpdate) {
Transaction txn = Transaction.currentTxn();
@ -226,6 +237,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
}
}
@Override
public List<ManagementServerHostVO> getActiveList(Date cutTime) {
SearchCriteria<ManagementServerHostVO> sc = ActiveSearch.create();
sc.setParameters("lastUpdateTime", cutTime);
@ -233,6 +245,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
return listIncludingRemovedBy(sc);
}
@Override
public List<ManagementServerHostVO> getInactiveList(Date cutTime) {
SearchCriteria<ManagementServerHostVO> sc = InactiveSearch.create();
sc.setParameters("lastUpdateTime", cutTime);
@ -240,6 +253,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
return listIncludingRemovedBy(sc);
}
@Override
@DB
public void increaseAlertCount(long id) {
Transaction txn = Transaction.currentTxn();
@ -273,4 +287,33 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
InactiveSearch.and("removed", InactiveSearch.entity().getRemoved(), SearchCriteria.Op.NULL);
InactiveSearch.done();
}
@Override
public void update(Connection conn, long id, long runId, State state, Date lastUpdate) {
PreparedStatement pstmt = null;
try {
pstmt = conn.prepareStatement("update mshost set state=?, last_update=? where id=? and runid=?");
pstmt.setString(1, state.toString());
pstmt.setString(2, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), lastUpdate));
pstmt.setLong(3, id);
pstmt.setLong(4, runId);
int count = pstmt.executeUpdate();
conn.commit();
if(count < 1)
throw new CloudRuntimeException("Invalid cluster session detected", new ClusterInvalidSessionException("runid " + runId + " is no longer valid"));
} catch (SQLException e) {
throw new CloudRuntimeException("DB exception on " + pstmt.toString(), e);
} finally {
if(pstmt != null) {
try {
pstmt.close();
} catch(Exception e) {
s_logger.warn("Unable to close prepared statement due to exception ", e);
}
}
}
}
}

View File

@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.List;
import com.cloud.agent.AgentManager;
import com.cloud.cluster.ClusterManager;
import com.cloud.consoleproxy.ConsoleProxyManager;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
@ -238,7 +239,9 @@ public enum Config {
DefaultMaxAccountTemplates("Account Defaults", ManagementServer.class, Long.class, "max.account.templates", "20", "The default maximum number of templates that can be deployed for an account", null),
DefaultMaxAccountSnapshots("Account Defaults", ManagementServer.class, Long.class, "max.account.snapshots", "20", "The default maximum number of snapshots that can be created for an account", null),
DefaultMaxAccountVolumes("Account Defaults", ManagementServer.class, Long.class, "max.account.volumes", "20", "The default maximum number of volumes that can be created for an account", null),
DirectAgentLoadSize("Advanced", ManagementServer.class, Integer.class, "direct.agent.load.size", "16", "The number of direct agents to load each time", null);
DirectAgentLoadSize("Advanced", ManagementServer.class, Integer.class, "direct.agent.load.size", "16", "The number of direct agents to load each time", null),
AgentLbEnable("Advanced", ClusterManager.class, Boolean.class, "agent.lb.enabled", "false", "If agent load balancing enabled in cluster setup", null);
private final String _category;
private final Class<?> _componentClass;

View File

@ -41,6 +41,7 @@ import com.cloud.cluster.ClusterFenceManagerImpl;
import com.cloud.cluster.ClusterManagerImpl;
import com.cloud.cluster.DummyClusterManagerImpl;
import com.cloud.cluster.ManagementServerNode;
import com.cloud.cluster.agentlb.dao.HostTransferMapDaoImpl;
import com.cloud.cluster.dao.ManagementServerHostDaoImpl;
import com.cloud.cluster.dao.StackMaidDaoImpl;
import com.cloud.configuration.dao.ConfigurationDaoImpl;
@ -276,6 +277,7 @@ public class DefaultComponentLibrary extends ComponentLibraryBase implements Com
addDao("KeystoreDao", KeystoreDaoImpl.class);
addDao("DcDetailsDao", DcDetailsDaoImpl.class);
addDao("SwiftDao", SwiftDaoImpl.class);
addDao("AgentTransferMapDao", HostTransferMapDaoImpl.class);
}
@Override

View File

@ -68,8 +68,7 @@ public interface HostDao extends GenericDao<HostVO, Long> {
*/
List<HostVO> findDirectlyConnectedHosts();
List<HostVO> findDirectAgentToLoad(long msid, long lastPingSecondsAfter, Long limit);
List<HostVO> findDirectAgentToLoad(long lastPingSecondsAfter, Long limit);
/**
* Mark the host as disconnected if it is in one of these states.
* The management server id is set to null.
@ -168,6 +167,12 @@ public interface HostDao extends GenericDao<HostVO, Long> {
boolean directConnect(HostVO host, long msId, boolean secondConnect);
List<HostVO> listDirectHostsBy(long msId, Status status);
List<HostVO> listManagedDirectAgents();
List<HostVO> listManagedAgents();
HostVO findTrafficMonitorHost();
List<HostVO> listLocalSecondaryStorageHosts();
@ -175,4 +180,6 @@ public interface HostDao extends GenericDao<HostVO, Long> {
List<HostVO> listLocalSecondaryStorageHosts(long dataCenterId);
List<HostVO> listAllSecondaryStorageHosts(long dataCenterId);
List<HostVO> listByManagementServer(long msId);
}

View File

@ -25,7 +25,6 @@ import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.TimeZone;
import javax.ejb.Local;
@ -57,8 +56,9 @@ import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.UpdateBuilder;
import com.cloud.utils.exception.CloudRuntimeException;
@Local(value = { HostDao.class }) @DB(txn=false)
@TableGenerator(name="host_req_sq", table="op_host", pkColumnName="id", valueColumnName="sequence", allocationSize=1)
@Local(value = { HostDao.class })
@DB(txn = false)
@TableGenerator(name = "host_req_sq", table = "op_host", pkColumnName = "id", valueColumnName = "sequence", allocationSize = 1)
public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao {
private static final Logger s_logger = Logger.getLogger(HostDaoImpl.class);
@ -90,6 +90,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
protected final SearchBuilder<HostVO> AvailHypevisorInZone;
protected final SearchBuilder<HostVO> DirectConnectSearch;
protected final SearchBuilder<HostVO> ManagedDirectConnectSearch;
protected final SearchBuilder<HostVO> ManagedConnectSearch;
protected final GenericSearchBuilder<HostVO, Long> HostsInStatusSearch;
protected final GenericSearchBuilder<HostVO, Long> CountRoutingByDc;
@ -183,7 +185,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
TypeSearch.and("type", TypeSearch.entity().getType(), SearchCriteria.Op.EQ);
TypeSearch.done();
StatusSearch =createSearchBuilder();
StatusSearch = createSearchBuilder();
StatusSearch.and("status", StatusSearch.entity().getStatus(), SearchCriteria.Op.IN);
StatusSearch.done();
@ -197,11 +199,13 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
SequenceSearch = createSearchBuilder();
SequenceSearch.and("id", SequenceSearch.entity().getId(), SearchCriteria.Op.EQ);
// SequenceSearch.addRetrieve("sequence", SequenceSearch.entity().getSequence());
// SequenceSearch.addRetrieve("sequence", SequenceSearch.entity().getSequence());
SequenceSearch.done();
DirectlyConnectedSearch = createSearchBuilder();
DirectlyConnectedSearch.and("resource", DirectlyConnectedSearch.entity().getResource(), SearchCriteria.Op.NNULL);
DirectlyConnectedSearch.and("ms", DirectlyConnectedSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
DirectlyConnectedSearch.and("statuses", DirectlyConnectedSearch.entity().getStatus(), SearchCriteria.Op.EQ);
DirectlyConnectedSearch.done();
UnmanagedDirectConnectSearch = createSearchBuilder();
@ -209,10 +213,10 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
UnmanagedDirectConnectSearch.and("server", UnmanagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NULL);
UnmanagedDirectConnectSearch.and("lastPinged", UnmanagedDirectConnectSearch.entity().getLastPinged(), SearchCriteria.Op.LTEQ);
/*
UnmanagedDirectConnectSearch.op(SearchCriteria.Op.OR, "managementServerId", UnmanagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
UnmanagedDirectConnectSearch.and("lastPinged", UnmanagedDirectConnectSearch.entity().getLastPinged(), SearchCriteria.Op.LTEQ);
UnmanagedDirectConnectSearch.cp();
UnmanagedDirectConnectSearch.cp();
* UnmanagedDirectConnectSearch.op(SearchCriteria.Op.OR, "managementServerId",
* UnmanagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
* UnmanagedDirectConnectSearch.and("lastPinged", UnmanagedDirectConnectSearch.entity().getLastPinged(),
* SearchCriteria.Op.LTEQ); UnmanagedDirectConnectSearch.cp(); UnmanagedDirectConnectSearch.cp();
*/
UnmanagedDirectConnectSearch.done();
@ -252,8 +256,18 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
CountRoutingByDc.and("dc", CountRoutingByDc.entity().getDataCenterId(), SearchCriteria.Op.EQ);
CountRoutingByDc.and("type", CountRoutingByDc.entity().getType(), SearchCriteria.Op.EQ);
CountRoutingByDc.and("status", CountRoutingByDc.entity().getStatus(), SearchCriteria.Op.EQ);
CountRoutingByDc.done();
ManagedDirectConnectSearch = createSearchBuilder();
ManagedDirectConnectSearch.and("resource", ManagedDirectConnectSearch.entity().getResource(), SearchCriteria.Op.NNULL);
ManagedDirectConnectSearch.and("server", ManagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NULL);
ManagedDirectConnectSearch.done();
ManagedConnectSearch = createSearchBuilder();
ManagedConnectSearch.and("server", ManagedConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NNULL);
ManagedConnectSearch.done();
_statusAttr = _allAttributes.get("status");
_msIdAttr = _allAttributes.get("managementServerId");
_pingTimeAttr = _allAttributes.get("lastPinged");
@ -265,7 +279,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
public long countBy(long clusterId, Status... statuses) {
SearchCriteria<HostVO> sc = MaintenanceCountSearch.create();
sc.setParameters("status", (Object[])statuses);
sc.setParameters("status", (Object[]) statuses);
sc.setParameters("cluster", clusterId);
List<HostVO> hosts = listBy(sc);
@ -333,7 +347,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
}
@Override
public List<HostVO> findDirectAgentToLoad(long msid, long lastPingSecondsAfter, Long limit) {
public List<HostVO> findDirectAgentToLoad(long lastPingSecondsAfter, Long limit) {
SearchCriteria<HostVO> sc = UnmanagedDirectConnectSearch.create();
sc.setParameters("lastPinged", lastPingSecondsAfter);
return search(sc, new Filter(HostVO.class, "clusterId", true, 0L, limit));
@ -346,7 +360,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
HostVO host = createForUpdate();
host.setManagementServerId(null);
host.setLastPinged((System.currentTimeMillis() >> 10) - ( 10 * 60 ));
host.setLastPinged((System.currentTimeMillis() >> 10) - (10 * 60));
host.setDisconnectedOn(new Date());
UpdateBuilder ub = getUpdateBuilder(host);
@ -358,13 +372,13 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
@Override
public List<HostVO> listBy(Host.Type type, Long clusterId, Long podId, long dcId) {
SearchCriteria<HostVO> sc = TypePodDcStatusSearch.create();
if ( type != null ) {
if (type != null) {
sc.setParameters("type", type.toString());
}
if (clusterId != null) {
sc.setParameters("cluster", clusterId);
}
if (podId != null ) {
if (podId != null) {
sc.setParameters("pod", podId);
}
sc.setParameters("dc", dcId);
@ -391,7 +405,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
SearchBuilder<HostTagVO> hostTagSearch = _hostTagsDao.createSearchBuilder();
HostTagVO tagEntity = hostTagSearch.entity();
hostTagSearch.and("tag",tagEntity.getTag(), SearchCriteria.Op.EQ);
hostTagSearch.and("tag", tagEntity.getTag(), SearchCriteria.Op.EQ);
SearchBuilder<HostVO> hostSearch = createSearchBuilder();
HostVO entity = hostSearch.entity();
@ -402,7 +416,6 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
hostSearch.and("status", entity.getStatus(), SearchCriteria.Op.EQ);
hostSearch.join("hostTagSearch", hostTagSearch, entity.getId(), tagEntity.getHostId(), JoinBuilder.JoinType.INNER);
SearchCriteria<HostVO> sc = hostSearch.create();
sc.setJoinParameters("hostTagSearch", "tag", hostTag);
sc.setParameters("type", type.toString());
@ -466,12 +479,12 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
@Override
public void loadDetails(HostVO host) {
Map<String, String> details =_detailsDao.findDetails(host.getId());
Map<String, String> details = _detailsDao.findDetails(host.getId());
host.setDetails(details);
}
@Override
public void loadHostTags(HostVO host){
public void loadHostTags(HostVO host) {
List<String> hostTags = _hostTagsDao.gethostTags(host.getId());
host.setHostTags(hostTags);
}
@ -494,12 +507,13 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
@Override
public boolean updateStatus(HostVO host, Event event, long msId) {
if (host == null) {
return false;
}
Status oldStatus = host.getStatus();
long oldPingTime = host.getLastPinged();
Status newStatus = oldStatus.getNextStatus(event);
if ( host == null ) {
return false;
}
if (newStatus == null) {
return false;
@ -533,12 +547,12 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
} else {
ub.set(host, _msIdAttr, msId);
}
if( event.equals(Event.Ping) || event.equals(Event.AgentConnected)) {
if (event.equals(Event.Ping) || event.equals(Event.AgentConnected)) {
ub.set(host, _pingTimeAttr, System.currentTimeMillis() >> 10);
}
}
if ( event.equals(Event.ManagementServerDown)) {
ub.set(host, _pingTimeAttr, (( System.currentTimeMillis() >> 10) - ( 10 * 60 )));
if (event.equals(Event.ManagementServerDown)) {
ub.set(host, _pingTimeAttr, ((System.currentTimeMillis() >> 10) - (10 * 60)));
}
int result = update(ub, sc, null);
assert result <= 1 : "How can this update " + result + " rows? ";
@ -548,7 +562,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
assert vo != null : "How how how? : " + host.getId();
StringBuilder str = new StringBuilder("Unable to update host for event:").append(event.toString());
str.append(". New=[status=").append(newStatus.toString()).append(":msid=").append(newStatus.lostConnection() ? "null" : msId).append(":lastpinged=").append(host.getLastPinged()).append("]");
str.append(". New=[status=").append(newStatus.toString()).append(":msid=").append(newStatus.lostConnection() ? "null" : msId).append(":lastpinged=").append(host.getLastPinged())
.append("]");
str.append("; Old=[status=").append(oldStatus.toString()).append(":msid=").append(msId).append(":lastpinged=").append(oldPingTime).append("]");
str.append("; DB=[status=").append(vo.getStatus().toString()).append(":msid=").append(vo.getManagementServerId()).append(":lastpinged=").append(vo.getLastPinged()).append("]");
s_logger.debug(str.toString());
@ -559,14 +574,15 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
@Override
public boolean disconnect(HostVO host, Event event, long msId) {
host.setDisconnectedOn(new Date());
if(event!=null && event.equals(Event.Remove)) {
if (event != null && event.equals(Event.Remove)) {
host.setGuid(null);
host.setClusterId(null);
}
return updateStatus(host, event, msId);
}
@Override @DB
@Override
@DB
public boolean connect(HostVO host, long msId) {
Transaction txn = Transaction.currentTxn();
long id = host.getId();
@ -626,9 +642,9 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
SearchCriteria<HostVO> sc = ConsoleProxyHostSearch.create();
sc.setParameters("name", name);
sc.setParameters("type", type);
List<HostVO>hostList = listBy(sc);
List<HostVO> hostList = listBy(sc);
if(hostList==null || hostList.size() == 0) {
if (hostList == null || hostList.size() == 0) {
return null;
} else {
return hostList.get(0);
@ -644,7 +660,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
@Override
public List<HostVO> listByStatus(Status... status) {
SearchCriteria<HostVO> sc = StatusSearch.create();
sc.setParameters("status", (Object[])status);
sc.setParameters("status", (Object[]) status);
return listBy(sc);
}
@ -681,7 +697,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
_hostTagsDao.persist(host.getId(), hostTags);
}
@Override @DB
@Override
@DB
public HostVO persist(HostVO host) {
final String InsertSequenceSql = "INSERT INTO op_host(id) VALUES(?)";
@ -708,7 +725,8 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return dbHost;
}
@Override @DB
@Override
@DB
public boolean update(Long hostId, HostVO host) {
Transaction txn = Transaction.currentTxn();
txn.start();
@ -726,21 +744,18 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return persisted;
}
@Override @DB
@Override
@DB
public List<RunningHostCountInfo> getRunningHostCounts(Date cutTime) {
String sql = "select * from (" +
"select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " +
"where h.status='Up' and h.type='SecondaryStorage' and m.last_update > ? " +
"group by h.data_center_id, h.type " +
"UNION ALL " +
"select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " +
"where h.status='Up' and h.type='Routing' and m.last_update > ? " +
"group by h.data_center_id, h.type) as t " +
"ORDER by t.data_center_id, t.type";
String sql = "select * from (" + "select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid "
+ "where h.status='Up' and h.type='SecondaryStorage' and m.last_update > ? " + "group by h.data_center_id, h.type " + "UNION ALL "
+ "select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid "
+ "where h.status='Up' and h.type='Routing' and m.last_update > ? " + "group by h.data_center_id, h.type) as t " + "ORDER by t.data_center_id, t.type";
ArrayList<RunningHostCountInfo> l = new ArrayList<RunningHostCountInfo>();
Transaction txn = Transaction.currentTxn();;
Transaction txn = Transaction.currentTxn();
;
PreparedStatement pstmt = null;
try {
pstmt = txn.prepareAutoCloseStatement(sql);
@ -749,7 +764,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
pstmt.setString(2, gmtCutTime);
ResultSet rs = pstmt.executeQuery();
while(rs.next()) {
while (rs.next()) {
RunningHostCountInfo info = new RunningHostCountInfo();
info.setDcId(rs.getLong(1));
info.setHostType(rs.getString(2));
@ -808,13 +823,13 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
sc.setParameters("type", hostType);
}
sc.setParameters("statuses", (Object[])statuses);
sc.setParameters("statuses", (Object[]) statuses);
return customSearch(sc, null);
}
@Override
public long countRoutingHostsByDataCenter(long dcId){
public long countRoutingHostsByDataCenter(long dcId) {
SearchCriteria<Long> sc = CountRoutingByDc.create();
sc.setParameters("dc", dcId);
sc.setParameters("type", Host.Type.Routing);
@ -834,4 +849,35 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return trafficHosts.get(0);
}
}
@Override
public List<HostVO> listDirectHostsBy(long msId, Status status) {
SearchCriteria<HostVO> sc = DirectlyConnectedSearch.create();
sc.setParameters("ms", msId);
if (status != null) {
sc.setParameters("statuses", Status.Up);
}
return listBy(sc);
}
@Override
public List<HostVO> listManagedDirectAgents() {
SearchCriteria<HostVO> sc = ManagedDirectConnectSearch.create();
return listBy(sc);
}
@Override
public List<HostVO> listManagedAgents() {
SearchCriteria<HostVO> sc = ManagedConnectSearch.create();
return listBy(sc);
}
@Override
public List<HostVO> listByManagementServer(long msId) {
SearchCriteria<HostVO> sc = MsStatusSearch.create();
sc.setParameters("ms", msId);
return listBy(sc);
}
}

View File

@ -113,6 +113,7 @@ DROP TABLE IF EXISTS `cloud`.`user_vm_details`;
DROP TABLE IF EXISTS `cloud`.`vpn_users`;
DROP TABLE IF EXISTS `cloud`.`data_center_details`;
DROP TABLE IF EXISTS `cloud`.`network_tags`;
DROP TABLE IF EXISTS `cloud`.`op_host_transfer`;
CREATE TABLE `cloud`.`version` (
`id` bigint unsigned NOT NULL UNIQUE AUTO_INCREMENT COMMENT 'id',
@ -1520,4 +1521,16 @@ CREATE TABLE `cloud`.`swift` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `cloud`.`op_host_transfer` (
`id` bigint unsigned UNIQUE NOT NULL COMMENT 'Id of the host',
`initial_mgmt_server_id` bigint unsigned COMMENT 'management server the host is transfered from',
`future_mgmt_server_id` bigint unsigned COMMENT 'management server the host is transfered to',
`state` varchar(32) NOT NULL COMMENT 'the transfer state of the host',
`created` datetime NOT NULL COMMENT 'date created',
PRIMARY KEY (`id`),
CONSTRAINT `fk_op_host_transfer__id` FOREIGN KEY `fk_op_host_transfer__id` (`id`) REFERENCES `host` (`id`),
CONSTRAINT `fk_op_host_transfer__initial_mgmt_server_id` FOREIGN KEY `fk_op_host_transfer__initial_mgmt_server_id`(`initial_mgmt_server_id`) REFERENCES `mshost`(`msid`),
CONSTRAINT `fk_op_host_transfer__future_mgmt_server_id` FOREIGN KEY `fk_op_host_transfer__future_mgmt_server_id`(`future_mgmt_server_id`) REFERENCES `mshost`(`msid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
SET foreign_key_checks = 1;