fixed problem with the standalone connection has open transaction

This commit is contained in:
Alex Huang 2011-07-22 11:23:41 -07:00
parent 9791cddccc
commit 66713a490d
4 changed files with 126 additions and 123 deletions

View File

@ -18,6 +18,7 @@
package com.cloud.agent.manager;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.List;
import org.apache.log4j.Logger;
@ -59,13 +60,13 @@ public class AgentMonitor extends Thread implements Listener {
private AlertManager _alertMgr;
private long _msId;
private ConnectionConcierge _concierge;
protected AgentMonitor() {
}
public AgentMonitor(long msId, HostDao hostDao, VMInstanceDao vmDao, DataCenterDao dcDao, HostPodDao podDao, AgentManagerImpl agentMgr, AlertManager alertMgr, long pingTimeout) {
super("AgentMonitor");
_msId = msId;
super("AgentMonitor");
_msId = msId;
_pingTimeout = pingTimeout;
_hostDao = hostDao;
_agentMgr = agentMgr;
@ -74,19 +75,21 @@ public class AgentMonitor extends Thread implements Listener {
_dcDao = dcDao;
_podDao = podDao;
_alertMgr = alertMgr;
Connection conn = Transaction.getStandaloneConnection();
if (conn == null) {
throw new CloudRuntimeException("Unable to get a db connection.");
try {
Connection conn = Transaction.getStandaloneConnectionWithException();
conn.setAutoCommit(true);
_concierge = new ConnectionConcierge("AgentMonitor", conn, false);
} catch (SQLException e) {
throw new CloudRuntimeException("Unable to get a db connection", e);
}
_concierge = new ConnectionConcierge("AgentMonitor", conn, true, true);
}
// TODO : use host machine time is not safe in clustering environment
@Override
public void run() {
public void run() {
s_logger.info("Agent Monitor is started.");
while (!_stop) {
try {
// check every 60 seconds
@ -94,41 +97,41 @@ public class AgentMonitor extends Thread implements Listener {
} catch (InterruptedException e) {
s_logger.info("Who woke me from my slumber?");
}
GlobalLock lock = GlobalLock.getInternLock("AgentMonitorLock");
if (lock == null) {
s_logger.error("Unable to acquire lock. Better luck next time?");
continue;
}
if (!lock.lock(10)) {
s_logger.info("Someone else is already working on the agents. Skipping my turn");
continue;
}
GlobalLock lock = GlobalLock.getInternLock("AgentMonitorLock");
if (lock == null) {
s_logger.error("Unable to acquire lock. Better luck next time?");
continue;
}
if (!lock.lock(10)) {
s_logger.info("Someone else is already working on the agents. Skipping my turn");
continue;
}
try {
long time = (System.currentTimeMillis() >> 10) - _pingTimeout;
List<HostVO> hosts = _hostDao.findLostHosts(time);
if (s_logger.isInfoEnabled()) {
s_logger.info("Found " + hosts.size() + " hosts behind on ping. pingTimeout : " + _pingTimeout + ", mark time : " + time);
}
for (HostVO host : hosts) {
if (host.getType().equals(Host.Type.ExternalFirewall) ||
host.getType().equals(Host.Type.ExternalLoadBalancer) ||
host.getType().equals(Host.Type.TrafficMonitor) ||
host.getType().equals(Host.Type.SecondaryStorage)) {
continue;
}
if (host.getManagementServerId() == null || host.getManagementServerId() == _msId) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Asking agent mgr to investgate why host " + host.getId() + " is behind on ping. last ping time: " + host.getLastPinged());
}
_agentMgr.disconnect(host.getId(), Event.PingTimeout, true);
}
if (host.getType().equals(Host.Type.ExternalFirewall) ||
host.getType().equals(Host.Type.ExternalLoadBalancer) ||
host.getType().equals(Host.Type.TrafficMonitor) ||
host.getType().equals(Host.Type.SecondaryStorage)) {
continue;
}
if (host.getManagementServerId() == null || host.getManagementServerId() == _msId) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Asking agent mgr to investgate why host " + host.getId() + " is behind on ping. last ping time: " + host.getLastPinged());
}
_agentMgr.disconnect(host.getId(), Event.PingTimeout, true);
}
}
hosts = _hostDao.listByStatus(Status.PrepareForMaintenance, Status.ErrorInMaintenance);
for (HostVO host : hosts) {
long hostId = host.getId();
@ -147,13 +150,13 @@ public class AgentMonitor extends Thread implements Listener {
} catch (Throwable th) {
s_logger.error("Caught the following exception: ", th);
} finally {
lock.unlock();
lock.unlock();
}
}
s_logger.info("Agent Monitor is leaving the building!");
}
public void signalStop() {
_stop = true;
interrupt();
@ -193,10 +196,10 @@ public class AgentMonitor extends Thread implements Listener {
}
return processed;
}
@Override
public AgentControlAnswer processControlCommand(long agentId, AgentControlCommand cmd) {
return null;
return null;
}
@Override
@ -207,15 +210,15 @@ public class AgentMonitor extends Thread implements Listener {
public boolean processDisconnect(long agentId, Status state) {
return true;
}
@Override
public boolean processTimeout(long agentId, long seq) {
return true;
return true;
}
@Override
public int getTimeout() {
return -1;
return -1;
}
}

View File

@ -461,26 +461,26 @@ public class ClusterManagerImpl implements ClusterManager {
public void registerListener(ClusterManagerListener listener) {
// Note : we don't check duplicates
synchronized (listeners) {
listeners.add(listener);
listeners.add(listener);
}
}
@Override
public void unregisterListener(ClusterManagerListener listener) {
synchronized(listeners) {
listeners.remove(listener);
listeners.remove(listener);
}
}
public void notifyNodeJoined(List<ManagementServerHostVO> nodeList) {
if(s_logger.isDebugEnabled()) {
s_logger.debug("Notify management server node join to listeners.");
for(ManagementServerHostVO mshost : nodeList) {
s_logger.debug("Joining node, IP: " + mshost.getServiceIP() + ", msid: " + mshost.getMsid());
}
}
if(s_logger.isDebugEnabled()) {
s_logger.debug("Notify management server node join to listeners.");
for(ManagementServerHostVO mshost : nodeList) {
s_logger.debug("Joining node, IP: " + mshost.getServiceIP() + ", msid: " + mshost.getMsid());
}
}
synchronized(listeners) {
for(ClusterManagerListener listener : listeners) {
listener.onManagementNodeJoined(nodeList, _mshostId);
@ -492,14 +492,14 @@ public class ClusterManagerImpl implements ClusterManager {
}
public void notifyNodeLeft(List<ManagementServerHostVO> nodeList) {
if(s_logger.isDebugEnabled()) {
s_logger.debug("Notify management server node left to listeners.");
for(ManagementServerHostVO mshost : nodeList) {
s_logger.debug("Leaving node, IP: " + mshost.getServiceIP() + ", msid: " + mshost.getMsid());
}
}
if(s_logger.isDebugEnabled()) {
s_logger.debug("Notify management server node left to listeners.");
for(ManagementServerHostVO mshost : nodeList) {
s_logger.debug("Leaving node, IP: " + mshost.getServiceIP() + ", msid: " + mshost.getMsid());
}
}
synchronized(listeners) {
for(ClusterManagerListener listener : listeners) {
listener.onManagementNodeLeft(nodeList, _mshostId);
@ -511,9 +511,9 @@ public class ClusterManagerImpl implements ClusterManager {
}
public void notifyNodeIsolated() {
if(s_logger.isDebugEnabled())
s_logger.debug("Notify management server node isolation to listeners");
if(s_logger.isDebugEnabled())
s_logger.debug("Notify management server node isolation to listeners");
synchronized(listeners) {
for(ClusterManagerListener listener : listeners) {
listener.onManagementNodeIsolated();
@ -669,7 +669,7 @@ public class ClusterManagerImpl implements ClusterManager {
private Connection getHeartbeatConnection() throws SQLException {
if(_heartbeatConnection == null) {
Connection conn = Transaction.getStandaloneConnectionWithException();
_heartbeatConnection = new ConnectionConcierge("ClusterManagerHeartBeat", conn, false, false);
_heartbeatConnection = new ConnectionConcierge("ClusterManagerHeartBeat", conn, false);
}
return _heartbeatConnection.conn();
@ -957,11 +957,10 @@ public class ClusterManagerImpl implements ClusterManager {
if (s_logger.isInfoEnabled()) {
s_logger.info("Management server (host id : " + _mshostId + ") is being started at " + _clusterNodeIP + ":" + _currentServiceAdapter.getServicePort());
}
// use seperate thread for heartbeat updates
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), heartbeatInterval, heartbeatInterval, TimeUnit.MILLISECONDS);
_notificationExecutor.submit(getNotificationTask());
} catch (Throwable e) {
s_logger.error("Unexpected exception : ", e);
@ -1081,8 +1080,8 @@ public class ClusterManagerImpl implements ClusterManager {
if(_currentServiceAdapter == null) {
throw new ConfigurationException("Unable to set current cluster service adapter");
}
_agentLBEnabled = Boolean.valueOf(configDao.getValue(Config.AgentLbEnable.key()));
String connectedAgentsThreshold = configs.get("agent.load.threshold");
@ -1213,7 +1212,7 @@ public class ClusterManagerImpl implements ClusterManager {
public boolean rebalanceAgent(long agentId, Event event, long currentOwnerId, long futureOwnerId) throws AgentUnavailableException, OperationTimedoutException {
return _rebalanceService.executeRebalanceRequest(agentId, currentOwnerId, futureOwnerId, event);
}
@Override
public boolean isAgentRebalanceEnabled() {
return _agentLBEnabled;

View File

@ -48,19 +48,19 @@ import com.cloud.utils.mgmt.JmxUtil;
* your own.
*/
public class ConnectionConcierge {
static final Logger s_logger = Logger.getLogger(ConnectionConcierge.class);
static final ConnectionConciergeManager s_mgr = new ConnectionConciergeManager();
Connection _conn;
String _name;
boolean _keepAlive;
boolean _autoCommit;
int _isolationLevel;
int _holdability;
public ConnectionConcierge(String name, Connection conn, boolean autoCommit, boolean keepAlive) {
public ConnectionConcierge(String name, Connection conn, boolean keepAlive) {
_name = name + s_mgr.getNextId();
_keepAlive = keepAlive;
try {
@ -72,7 +72,7 @@ public class ConnectionConcierge {
}
reset(conn);
}
public void reset(Connection conn) {
try {
release();
@ -90,11 +90,11 @@ public class ConnectionConcierge {
s_mgr.register(_name, this);
s_logger.debug("Registering a database connection for " + _name);
}
public final Connection conn() {
return _conn;
}
public void release() {
s_mgr.unregister(_name);
try {
@ -106,23 +106,23 @@ public class ConnectionConcierge {
throw new CloudRuntimeException("Problem in closing a connection", e);
}
}
@Override
protected void finalize() throws Exception {
if (_conn != null) {
release();
}
}
public boolean keepAlive() {
return _keepAlive;
}
protected static class ConnectionConciergeManager extends StandardMBean implements ConnectionConciergeMBean {
ScheduledExecutorService _executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("ConnectionKeeper"));
final ConcurrentHashMap<String, ConnectionConcierge> _conns = new ConcurrentHashMap<String, ConnectionConcierge>();
final AtomicInteger _idGenerator = new AtomicInteger();
ConnectionConciergeManager() {
super(ConnectionConciergeMBean.class, false);
resetKeepAliveTask(20);
@ -132,19 +132,19 @@ public class ConnectionConcierge {
s_logger.error("Unable to register mbean", e);
}
}
public Integer getNextId() {
return _idGenerator.incrementAndGet();
}
public void register(String name, ConnectionConcierge concierge) {
_conns.put(name, concierge);
}
public void unregister(String name) {
_conns.remove(name);
}
protected String testValidity(String name, Connection conn) {
PreparedStatement pstmt = null;
try {
@ -182,12 +182,12 @@ public class ConnectionConcierge {
if (concierge == null) {
return "Not Found";
}
Connection conn = Transaction.getStandaloneConnection();
if (conn == null) {
return "Unable to get anotehr db connection";
}
concierge.reset(conn);
return "Done";
}
@ -201,7 +201,7 @@ public class ConnectionConcierge {
s_logger.error("Unable to shutdown executor", e);
}
}
_executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("ConnectionConcierge"));
_executor.schedule(new Runnable() {
@Override
@ -215,7 +215,7 @@ public class ConnectionConcierge {
}
}
}, seconds, TimeUnit.SECONDS);
return "As you wish.";
}

View File

@ -40,7 +40,7 @@ import com.cloud.utils.time.InaccurateClock;
public class Merovingian2 extends StandardMBean implements MerovingianMBean {
private static final Logger s_logger = Logger.getLogger(Merovingian2.class);
private static final String ACQUIRE_SQL = "INSERT INTO op_lock (op_lock.key, op_lock.mac, op_lock.ip, op_lock.thread, op_lock.acquired_on, waiters) VALUES (?, ?, ?, ?, ?, 1)";
private static final String INCREMENT_SQL = "UPDATE op_lock SET waiters=waiters+1 where op_lock.key=? AND op_lock.mac=? AND op_lock.ip=? AND op_lock.thread=?";
private static final String SELECT_SQL = "SELECT op_lock.key, mac, ip, thread, acquired_on, waiters FROM op_lock";
@ -52,14 +52,14 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
private static final String SELECT_MGMT_LOCKS_SQL = SELECT_SQL + " WHERE mac=?";
private static final String SELECT_THREAD_LOCKS_SQL = SELECT_SQL + " WHERE mac=? AND ip=?";
private static final String CLEANUP_THREAD_LOCKS_SQL = "DELETE FROM op_lock WHERE mac=? AND ip=? AND thread=?";
TimeZone s_gmtTimeZone = TimeZone.getTimeZone("GMT");
private long _msId;
private final long _msId;
private static Merovingian2 s_instance = null;
ConnectionConcierge _concierge = null;
private Merovingian2(long msId) {
super(MerovingianMBean.class, false);
_msId = msId;
@ -67,13 +67,14 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
try {
conn = Transaction.getStandaloneConnectionWithException();
conn.setTransactionIsolation(Connection.TRANSACTION_READ_COMMITTED);
_concierge = new ConnectionConcierge("LockMaster", conn, true, true);
conn.setAutoCommit(true);
_concierge = new ConnectionConcierge("LockMaster", conn, false);
} catch (SQLException e) {
s_logger.error("Unable to get a new db connection", e);
throw new CloudRuntimeException("Unable to initialize a connection to the database for locking purposes: ", e);
}
}
public static synchronized Merovingian2 createLockMaster(long msId) {
assert s_instance == null : "No lock can serve two masters. Either he will hate the one and love the other, or he will be devoted to the one and despise the other.";
s_instance = new Merovingian2(msId);
@ -85,16 +86,16 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
return s_instance;
}
public static Merovingian2 getLockMaster() {
return s_instance;
}
public boolean acquire(String key, int timeInSeconds) {
Thread th = Thread.currentThread();
String threadName = th.getName();
int threadId = System.identityHashCode(th);
if (s_logger.isTraceEnabled()) {
s_logger.trace("Acquiring lck-" + key + " with wait time of " + timeInSeconds);
}
@ -120,7 +121,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
return false;
}
protected boolean increment(String key, String threadName, int threadId) {
PreparedStatement pstmt = null;
try {
@ -146,7 +147,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
}
}
protected boolean doAcquire(String key, String threadName, int threadId) {
PreparedStatement pstmt = null;
@ -181,11 +182,11 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
} catch (SQLException e) {
}
}
s_logger.trace("Unable to acquire lck-" + key);
return false;
}
protected Map<String, String> isLocked(String key) {
PreparedStatement pstmt = null;
ResultSet rs = null;
@ -196,7 +197,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
if (!rs.next()) {
return null;
}
return toLock(rs);
} catch (SQLException e) {
throw new CloudRuntimeException("SQL Exception on inquiry", e);
@ -213,7 +214,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
}
}
public void cleanupThisServer() {
cleanupForServer(_msId);
}
@ -238,7 +239,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
}
}
public boolean release(String key) {
PreparedStatement pstmt = null;
Thread th = Thread.currentThread();
@ -277,7 +278,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
}
}
protected Map<String, String> toLock(ResultSet rs) throws SQLException {
Map<String, String> map = new HashMap<String, String>();
map.put("key", rs.getString(1));
@ -287,9 +288,9 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
map.put("date", rs.getString(5));
map.put("count", Integer.toString(rs.getInt(6)));
return map;
}
protected List<Map<String, String>> toLocks(ResultSet rs) throws SQLException {
LinkedList<Map<String, String>> results = new LinkedList<Map<String, String>>();
while (rs.next()) {
@ -297,7 +298,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
return results;
}
protected List<Map<String, String>> getLocks(String sql, Long msId) {
PreparedStatement pstmt = null;
ResultSet rs = null;
@ -332,7 +333,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
public List<Map<String, String>> getLocksAcquiredByThisServer() {
return getLocks(SELECT_MGMT_LOCKS_SQL, _msId);
}
public int owns(String key) {
Thread th = Thread.currentThread();
int threadId = System.identityHashCode(th);
@ -345,7 +346,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
return -1;
}
public List<Map<String, String>> getLocksAcquiredBy(long msId, String threadName) {
PreparedStatement pstmt = null;
ResultSet rs = null;
@ -369,12 +370,12 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
}
}
public void cleanupThread() {
Thread th = Thread.currentThread();
String threadName = th.getName();
int threadId = System.identityHashCode(th);
PreparedStatement pstmt = null;
try {
pstmt = _concierge.conn().prepareStatement(CLEANUP_THREAD_LOCKS_SQL);
@ -394,7 +395,7 @@ public class Merovingian2 extends StandardMBean implements MerovingianMBean {
}
}
}
@Override
public boolean releaseLockAsLastResortAndIReallyKnowWhatIAmDoing(String key) {
PreparedStatement pstmt = null;