CLOUDSTACK-7916: Generate Alerts if System VMs cannot be started.

(cherry picked from commit 3907bc6f9d2b4a61a4d0b32e5c1a15d95210531e)
Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
Nitin Mehta 2014-11-14 13:18:27 -08:00 committed by Rohit Yadav
parent 4e01d61f87
commit a12ab8a2b3
4 changed files with 114 additions and 97 deletions

View File

@ -53,7 +53,8 @@ public class ConsoleProxyAlertAdapter extends AdapterBase implements AlertAdapte
DataCenterVO dc = _dcDao.findById(args.getZoneId());
ConsoleProxyVO proxy = args.getProxy();
if (proxy == null)
//FIXME - Proxy can be null in case of creation failure. Have a better fix than checking for != 0
if (proxy == null && args.getProxyId() != 0)
proxy = _consoleProxyDao.findById(args.getProxyId());
switch (args.getType()) {
@ -98,12 +99,9 @@ public class ConsoleProxyAlertAdapter extends AdapterBase implements AlertAdapte
case ConsoleProxyAlertEventArgs.PROXY_CREATE_FAILURE:
if (s_logger.isDebugEnabled())
s_logger.debug("Console proxy creation failure, zone: " + dc.getName() + ", proxy: " + proxy.getHostName() + ", public IP: " +
proxy.getPublicIpAddress() + ", private IP: " + (proxy.getPrivateIpAddress() == null ? "N/A" : proxy.getPrivateIpAddress()));
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY, args.getZoneId(), proxy.getPodIdToDeployIn(),
"Console proxy creation failure. zone: " + dc.getName() + ", proxy: " + proxy.getHostName() + ", public IP: " + proxy.getPublicIpAddress() +
", private IP: " + (proxy.getPrivateIpAddress() == null ? "N/A" : proxy.getPrivateIpAddress()) + ", error details: " + args.getMessage(),
s_logger.debug("Console proxy creation failure, zone: " + dc.getName());
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY, args.getZoneId(), null,
"Console proxy creation failure. zone: " + dc.getName() + ", error details: " + args.getMessage(),
"Console proxy creation failure (zone " + dc.getName() + ")");
break;

View File

@ -53,7 +53,7 @@ public class SecondaryStorageVmAlertAdapter extends AdapterBase implements Alert
DataCenterVO dc = _dcDao.findById(args.getZoneId());
SecondaryStorageVmVO secStorageVm = args.getSecStorageVm();
if (secStorageVm == null)
if (secStorageVm == null && args.getSecStorageVmId() != 0)
secStorageVm = _ssvmDao.findById(args.getSecStorageVmId());
switch (args.getType()) {
@ -103,13 +103,10 @@ public class SecondaryStorageVmAlertAdapter extends AdapterBase implements Alert
case SecStorageVmAlertEventArgs.SSVM_CREATE_FAILURE:
if (s_logger.isDebugEnabled())
s_logger.debug("Secondary Storage Vm creation failure, zone: " + dc.getName() + ", secStorageVm: " + secStorageVm.getHostName() + ", public IP: " +
secStorageVm.getPublicIpAddress() + ", private IP: " + (secStorageVm.getPrivateIpAddress() == null ? "N/A" : secStorageVm.getPrivateIpAddress()));
s_logger.debug("Secondary Storage Vm creation failure, zone: " + dc.getName());
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SSVM, args.getZoneId(), secStorageVm.getPodIdToDeployIn(),
"Secondary Storage Vm creation failure. zone: " +
dc.getName() + ", secStorageVm: " + secStorageVm.getHostName() + ", public IP: " + secStorageVm.getPublicIpAddress() + ", private IP: " +
(secStorageVm.getPrivateIpAddress() == null ? "N/A" : secStorageVm.getPrivateIpAddress()) + ", error details: " + args.getMessage(),
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SSVM, args.getZoneId(), null,
"Secondary Storage Vm creation failure. zone: " + dc.getName() + ", error details: " + args.getMessage(),
"Secondary Storage Vm creation failure (zone " + dc.getName() + ")");
break;

View File

@ -535,6 +535,7 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy
if (proxy.getState() == VirtualMachine.State.Stopped) {
_itMgr.advanceStart(proxy.getUuid(), null, null);
proxy = _consoleProxyDao.findById(proxy.getId());
return proxy;
}
// For VMs that are in Stopping, Starting, Migrating state, let client to wait by returning null
@ -664,9 +665,6 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy
if (s_logger.isDebugEnabled()) {
s_logger.debug("Unable to allocate console proxy storage, remove the console proxy record from DB, proxy id: " + proxyVmId);
}
SubscriptionMgr.getInstance().notifySubscribers(ConsoleProxyManager.ALERT_SUBJECT, this,
new ConsoleProxyAlertEventArgs(ConsoleProxyAlertEventArgs.PROXY_CREATE_FAILURE, dataCenterId, proxyVmId, null, "Unable to allocate storage"));
}
return null;
}
@ -883,43 +881,57 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy
}
ConsoleProxyVO proxy = null;
if (_allocProxyLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
try {
proxy = assignProxyFromStoppedPool(dataCenterId);
if (proxy == null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("No stopped console proxy is available, need to allocate a new console proxy");
}
String errorString = null;
try{
if (_allocProxyLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
try {
proxy = assignProxyFromStoppedPool(dataCenterId);
if (proxy == null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("No stopped console proxy is available, need to allocate a new console proxy");
}
try {
proxy = startNew(dataCenterId);
} catch (ConcurrentOperationException e) {
s_logger.info("Concurrent Operation caught " + e);
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Found a stopped console proxy, bring it up to running pool. proxy vm id : " + proxy.getId());
try {
proxy = startNew(dataCenterId);
} catch (ConcurrentOperationException e) {
s_logger.info("Concurrent Operation caught " + e);
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Found a stopped console proxy, bring it up to running pool. proxy vm id : " + proxy.getId());
}
}
} finally {
_allocProxyLock.unlock();
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to acquire proxy allocation lock, skip for next time");
}
} finally {
_allocProxyLock.unlock();
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to acquire proxy allocation lock, skip for next time");
}
}
if (proxy != null) {
long proxyVmId = proxy.getId();
proxy = startProxy(proxyVmId);
if (proxy != null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Console proxy " + proxy.getHostName() + " is started");
long proxyVmId = proxy.getId();
proxy = startProxy(proxyVmId);
if (proxy != null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Console proxy " + proxy.getHostName() + " is started");
}
SubscriptionMgr.getInstance().notifySubscribers(ConsoleProxyManager.ALERT_SUBJECT, this,
new ConsoleProxyAlertEventArgs(ConsoleProxyAlertEventArgs.PROXY_UP, dataCenterId, proxy.getId(), proxy, null));
}
}
}catch (Exception e){
errorString = e.getMessage();
}finally {
// TODO - For now put all the alerts as creation failure. Distinguish between creation vs start failure in future.
// Also add failure reason since startvm masks some of them.
if(proxy == null || proxy.getState() != State.Running)
SubscriptionMgr.getInstance().notifySubscribers(ConsoleProxyManager.ALERT_SUBJECT, this,
new ConsoleProxyAlertEventArgs(ConsoleProxyAlertEventArgs.PROXY_CREATE_FAILURE, dataCenterId, 0l, null, errorString));
}
}
public boolean isZoneReady(Map<Long, ZoneHostInfo> zoneHostInfoMap, long dataCenterId) {

View File

@ -506,9 +506,6 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar
s_logger.debug("Unable to allocate secondary storage vm storage, remove the secondary storage vm record from DB, secondary storage vm id: " +
secStorageVmId);
}
SubscriptionMgr.getInstance().notifySubscribers(ALERT_SUBJECT, this,
new SecStorageVmAlertEventArgs(SecStorageVmAlertEventArgs.SSVM_CREATE_FAILURE, dataCenterId, secStorageVmId, null, "Unable to allocate storage"));
}
return null;
}
@ -654,70 +651,83 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar
}
return;
}
boolean secStorageVmFromStoppedPool = false;
SecondaryStorageVmVO secStorageVm = assignSecStorageVmFromStoppedPool(dataCenterId, role);
if (secStorageVm == null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("No stopped secondary storage vm is available, need to allocate a new secondary storage vm");
}
if (_allocLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
try {
secStorageVm = startNew(dataCenterId, role);
for (UploadVO upload : _uploadDao.listAll()) {
_uploadDao.expunge(upload.getId());
}
} finally {
_allocLock.unlock();
}
} else {
SecondaryStorageVmVO secStorageVm = null;
String errorString = null;
try{
boolean secStorageVmFromStoppedPool = false;
secStorageVm = assignSecStorageVmFromStoppedPool(dataCenterId, role);
if (secStorageVm == null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to acquire synchronization lock to allocate secStorageVm resource for standby capacity, wait for next scan");
s_logger.info("No stopped secondary storage vm is available, need to allocate a new secondary storage vm");
}
return;
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Found a stopped secondary storage vm, bring it up to running pool. secStorageVm vm id : " + secStorageVm.getId());
}
secStorageVmFromStoppedPool = true;
}
if (secStorageVm != null) {
long secStorageVmId = secStorageVm.getId();
GlobalLock secStorageVmLock = GlobalLock.getInternLock(getSecStorageVmLockName(secStorageVmId));
try {
if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
if (_allocLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
try {
secStorageVm = startSecStorageVm(secStorageVmId);
secStorageVm = startNew(dataCenterId, role);
for (UploadVO upload : _uploadDao.listAll()) {
_uploadDao.expunge(upload.getId());
}
} finally {
secStorageVmLock.unlock();
_allocLock.unlock();
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to acquire synchronization lock to start secStorageVm for standby capacity, secStorageVm vm id : " + secStorageVm.getId());
s_logger.info("Unable to acquire synchronization lock to allocate secStorageVm resource for standby capacity, wait for next scan");
}
return;
}
} finally {
secStorageVmLock.releaseRef();
}
if (secStorageVm == null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to start secondary storage vm for standby capacity, secStorageVm vm Id : " + secStorageVmId +
", will recycle it and start a new one");
}
if (secStorageVmFromStoppedPool) {
destroySecStorageVm(secStorageVmId);
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Secondary storage vm " + secStorageVm.getHostName() + " is started");
s_logger.info("Found a stopped secondary storage vm, bring it up to running pool. secStorageVm vm id : " + secStorageVm.getId());
}
secStorageVmFromStoppedPool = true;
}
if (secStorageVm != null) {
long secStorageVmId = secStorageVm.getId();
GlobalLock secStorageVmLock = GlobalLock.getInternLock(getSecStorageVmLockName(secStorageVmId));
try {
if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
try {
secStorageVm = startSecStorageVm(secStorageVmId);
} finally {
secStorageVmLock.unlock();
}
} else {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to acquire synchronization lock to start secStorageVm for standby capacity, secStorageVm vm id : " + secStorageVm.getId());
}
return;
}
} finally {
secStorageVmLock.releaseRef();
}
if (secStorageVm == null) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Unable to start secondary storage vm for standby capacity, secStorageVm vm Id : " + secStorageVmId +
", will recycle it and start a new one");
}
if (secStorageVmFromStoppedPool) {
destroySecStorageVm(secStorageVmId);
}
} else {
SubscriptionMgr.getInstance().notifySubscribers(ALERT_SUBJECT, this,
new SecStorageVmAlertEventArgs(SecStorageVmAlertEventArgs.SSVM_UP, dataCenterId, secStorageVmId, secStorageVm, null));
if (s_logger.isInfoEnabled()) {
s_logger.info("Secondary storage vm " + secStorageVm.getHostName() + " is started");
}
}
}
}catch (Exception e){
errorString = e.getMessage();
}finally{
// TODO - For now put all the alerts as creation failure. Distinguish between creation vs start failure in future.
// Also add failure reason since startvm masks some of them.
if(secStorageVm == null || secStorageVm.getState() != State.Running)
SubscriptionMgr.getInstance().notifySubscribers(ALERT_SUBJECT, this,
new SecStorageVmAlertEventArgs(SecStorageVmAlertEventArgs.SSVM_CREATE_FAILURE, dataCenterId, 0l, null, errorString));
}
}