mirror of
https://github.com/apache/cloudstack.git
synced 2025-11-02 11:52:28 +01:00
CLOUDSTACK-7916: Generate Alerts if System VMs cannot be started.
(cherry picked from commit 3907bc6f9d2b4a61a4d0b32e5c1a15d95210531e) Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
parent
4e01d61f87
commit
a12ab8a2b3
@ -53,7 +53,8 @@ public class ConsoleProxyAlertAdapter extends AdapterBase implements AlertAdapte
|
||||
|
||||
DataCenterVO dc = _dcDao.findById(args.getZoneId());
|
||||
ConsoleProxyVO proxy = args.getProxy();
|
||||
if (proxy == null)
|
||||
//FIXME - Proxy can be null in case of creation failure. Have a better fix than checking for != 0
|
||||
if (proxy == null && args.getProxyId() != 0)
|
||||
proxy = _consoleProxyDao.findById(args.getProxyId());
|
||||
|
||||
switch (args.getType()) {
|
||||
@ -98,12 +99,9 @@ public class ConsoleProxyAlertAdapter extends AdapterBase implements AlertAdapte
|
||||
|
||||
case ConsoleProxyAlertEventArgs.PROXY_CREATE_FAILURE:
|
||||
if (s_logger.isDebugEnabled())
|
||||
s_logger.debug("Console proxy creation failure, zone: " + dc.getName() + ", proxy: " + proxy.getHostName() + ", public IP: " +
|
||||
proxy.getPublicIpAddress() + ", private IP: " + (proxy.getPrivateIpAddress() == null ? "N/A" : proxy.getPrivateIpAddress()));
|
||||
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY, args.getZoneId(), proxy.getPodIdToDeployIn(),
|
||||
"Console proxy creation failure. zone: " + dc.getName() + ", proxy: " + proxy.getHostName() + ", public IP: " + proxy.getPublicIpAddress() +
|
||||
", private IP: " + (proxy.getPrivateIpAddress() == null ? "N/A" : proxy.getPrivateIpAddress()) + ", error details: " + args.getMessage(),
|
||||
s_logger.debug("Console proxy creation failure, zone: " + dc.getName());
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY, args.getZoneId(), null,
|
||||
"Console proxy creation failure. zone: " + dc.getName() + ", error details: " + args.getMessage(),
|
||||
"Console proxy creation failure (zone " + dc.getName() + ")");
|
||||
break;
|
||||
|
||||
|
||||
@ -53,7 +53,7 @@ public class SecondaryStorageVmAlertAdapter extends AdapterBase implements Alert
|
||||
|
||||
DataCenterVO dc = _dcDao.findById(args.getZoneId());
|
||||
SecondaryStorageVmVO secStorageVm = args.getSecStorageVm();
|
||||
if (secStorageVm == null)
|
||||
if (secStorageVm == null && args.getSecStorageVmId() != 0)
|
||||
secStorageVm = _ssvmDao.findById(args.getSecStorageVmId());
|
||||
|
||||
switch (args.getType()) {
|
||||
@ -103,13 +103,10 @@ public class SecondaryStorageVmAlertAdapter extends AdapterBase implements Alert
|
||||
|
||||
case SecStorageVmAlertEventArgs.SSVM_CREATE_FAILURE:
|
||||
if (s_logger.isDebugEnabled())
|
||||
s_logger.debug("Secondary Storage Vm creation failure, zone: " + dc.getName() + ", secStorageVm: " + secStorageVm.getHostName() + ", public IP: " +
|
||||
secStorageVm.getPublicIpAddress() + ", private IP: " + (secStorageVm.getPrivateIpAddress() == null ? "N/A" : secStorageVm.getPrivateIpAddress()));
|
||||
s_logger.debug("Secondary Storage Vm creation failure, zone: " + dc.getName());
|
||||
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SSVM, args.getZoneId(), secStorageVm.getPodIdToDeployIn(),
|
||||
"Secondary Storage Vm creation failure. zone: " +
|
||||
dc.getName() + ", secStorageVm: " + secStorageVm.getHostName() + ", public IP: " + secStorageVm.getPublicIpAddress() + ", private IP: " +
|
||||
(secStorageVm.getPrivateIpAddress() == null ? "N/A" : secStorageVm.getPrivateIpAddress()) + ", error details: " + args.getMessage(),
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SSVM, args.getZoneId(), null,
|
||||
"Secondary Storage Vm creation failure. zone: " + dc.getName() + ", error details: " + args.getMessage(),
|
||||
"Secondary Storage Vm creation failure (zone " + dc.getName() + ")");
|
||||
break;
|
||||
|
||||
|
||||
@ -535,6 +535,7 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy
|
||||
if (proxy.getState() == VirtualMachine.State.Stopped) {
|
||||
_itMgr.advanceStart(proxy.getUuid(), null, null);
|
||||
proxy = _consoleProxyDao.findById(proxy.getId());
|
||||
return proxy;
|
||||
}
|
||||
|
||||
// For VMs that are in Stopping, Starting, Migrating state, let client to wait by returning null
|
||||
@ -664,9 +665,6 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Unable to allocate console proxy storage, remove the console proxy record from DB, proxy id: " + proxyVmId);
|
||||
}
|
||||
|
||||
SubscriptionMgr.getInstance().notifySubscribers(ConsoleProxyManager.ALERT_SUBJECT, this,
|
||||
new ConsoleProxyAlertEventArgs(ConsoleProxyAlertEventArgs.PROXY_CREATE_FAILURE, dataCenterId, proxyVmId, null, "Unable to allocate storage"));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@ -883,43 +881,57 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy
|
||||
}
|
||||
|
||||
ConsoleProxyVO proxy = null;
|
||||
if (_allocProxyLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
|
||||
try {
|
||||
proxy = assignProxyFromStoppedPool(dataCenterId);
|
||||
if (proxy == null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("No stopped console proxy is available, need to allocate a new console proxy");
|
||||
}
|
||||
String errorString = null;
|
||||
try{
|
||||
if (_allocProxyLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
|
||||
try {
|
||||
proxy = assignProxyFromStoppedPool(dataCenterId);
|
||||
if (proxy == null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("No stopped console proxy is available, need to allocate a new console proxy");
|
||||
}
|
||||
|
||||
try {
|
||||
proxy = startNew(dataCenterId);
|
||||
} catch (ConcurrentOperationException e) {
|
||||
s_logger.info("Concurrent Operation caught " + e);
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Found a stopped console proxy, bring it up to running pool. proxy vm id : " + proxy.getId());
|
||||
try {
|
||||
proxy = startNew(dataCenterId);
|
||||
} catch (ConcurrentOperationException e) {
|
||||
s_logger.info("Concurrent Operation caught " + e);
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Found a stopped console proxy, bring it up to running pool. proxy vm id : " + proxy.getId());
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
_allocProxyLock.unlock();
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to acquire proxy allocation lock, skip for next time");
|
||||
}
|
||||
} finally {
|
||||
_allocProxyLock.unlock();
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to acquire proxy allocation lock, skip for next time");
|
||||
}
|
||||
}
|
||||
|
||||
if (proxy != null) {
|
||||
long proxyVmId = proxy.getId();
|
||||
proxy = startProxy(proxyVmId);
|
||||
|
||||
if (proxy != null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Console proxy " + proxy.getHostName() + " is started");
|
||||
long proxyVmId = proxy.getId();
|
||||
proxy = startProxy(proxyVmId);
|
||||
|
||||
if (proxy != null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Console proxy " + proxy.getHostName() + " is started");
|
||||
}
|
||||
SubscriptionMgr.getInstance().notifySubscribers(ConsoleProxyManager.ALERT_SUBJECT, this,
|
||||
new ConsoleProxyAlertEventArgs(ConsoleProxyAlertEventArgs.PROXY_UP, dataCenterId, proxy.getId(), proxy, null));
|
||||
}
|
||||
}
|
||||
}catch (Exception e){
|
||||
errorString = e.getMessage();
|
||||
}finally {
|
||||
// TODO - For now put all the alerts as creation failure. Distinguish between creation vs start failure in future.
|
||||
// Also add failure reason since startvm masks some of them.
|
||||
if(proxy == null || proxy.getState() != State.Running)
|
||||
SubscriptionMgr.getInstance().notifySubscribers(ConsoleProxyManager.ALERT_SUBJECT, this,
|
||||
new ConsoleProxyAlertEventArgs(ConsoleProxyAlertEventArgs.PROXY_CREATE_FAILURE, dataCenterId, 0l, null, errorString));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public boolean isZoneReady(Map<Long, ZoneHostInfo> zoneHostInfoMap, long dataCenterId) {
|
||||
|
||||
@ -506,9 +506,6 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar
|
||||
s_logger.debug("Unable to allocate secondary storage vm storage, remove the secondary storage vm record from DB, secondary storage vm id: " +
|
||||
secStorageVmId);
|
||||
}
|
||||
|
||||
SubscriptionMgr.getInstance().notifySubscribers(ALERT_SUBJECT, this,
|
||||
new SecStorageVmAlertEventArgs(SecStorageVmAlertEventArgs.SSVM_CREATE_FAILURE, dataCenterId, secStorageVmId, null, "Unable to allocate storage"));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@ -654,70 +651,83 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
boolean secStorageVmFromStoppedPool = false;
|
||||
SecondaryStorageVmVO secStorageVm = assignSecStorageVmFromStoppedPool(dataCenterId, role);
|
||||
if (secStorageVm == null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("No stopped secondary storage vm is available, need to allocate a new secondary storage vm");
|
||||
}
|
||||
|
||||
if (_allocLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
|
||||
try {
|
||||
secStorageVm = startNew(dataCenterId, role);
|
||||
for (UploadVO upload : _uploadDao.listAll()) {
|
||||
_uploadDao.expunge(upload.getId());
|
||||
}
|
||||
} finally {
|
||||
_allocLock.unlock();
|
||||
}
|
||||
} else {
|
||||
SecondaryStorageVmVO secStorageVm = null;
|
||||
String errorString = null;
|
||||
try{
|
||||
boolean secStorageVmFromStoppedPool = false;
|
||||
secStorageVm = assignSecStorageVmFromStoppedPool(dataCenterId, role);
|
||||
if (secStorageVm == null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to acquire synchronization lock to allocate secStorageVm resource for standby capacity, wait for next scan");
|
||||
s_logger.info("No stopped secondary storage vm is available, need to allocate a new secondary storage vm");
|
||||
}
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Found a stopped secondary storage vm, bring it up to running pool. secStorageVm vm id : " + secStorageVm.getId());
|
||||
}
|
||||
secStorageVmFromStoppedPool = true;
|
||||
}
|
||||
|
||||
if (secStorageVm != null) {
|
||||
long secStorageVmId = secStorageVm.getId();
|
||||
GlobalLock secStorageVmLock = GlobalLock.getInternLock(getSecStorageVmLockName(secStorageVmId));
|
||||
try {
|
||||
if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
|
||||
if (_allocLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
|
||||
try {
|
||||
secStorageVm = startSecStorageVm(secStorageVmId);
|
||||
secStorageVm = startNew(dataCenterId, role);
|
||||
for (UploadVO upload : _uploadDao.listAll()) {
|
||||
_uploadDao.expunge(upload.getId());
|
||||
}
|
||||
} finally {
|
||||
secStorageVmLock.unlock();
|
||||
_allocLock.unlock();
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to acquire synchronization lock to start secStorageVm for standby capacity, secStorageVm vm id : " + secStorageVm.getId());
|
||||
s_logger.info("Unable to acquire synchronization lock to allocate secStorageVm resource for standby capacity, wait for next scan");
|
||||
}
|
||||
return;
|
||||
}
|
||||
} finally {
|
||||
secStorageVmLock.releaseRef();
|
||||
}
|
||||
|
||||
if (secStorageVm == null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to start secondary storage vm for standby capacity, secStorageVm vm Id : " + secStorageVmId +
|
||||
", will recycle it and start a new one");
|
||||
}
|
||||
|
||||
if (secStorageVmFromStoppedPool) {
|
||||
destroySecStorageVm(secStorageVmId);
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Secondary storage vm " + secStorageVm.getHostName() + " is started");
|
||||
s_logger.info("Found a stopped secondary storage vm, bring it up to running pool. secStorageVm vm id : " + secStorageVm.getId());
|
||||
}
|
||||
secStorageVmFromStoppedPool = true;
|
||||
}
|
||||
|
||||
if (secStorageVm != null) {
|
||||
long secStorageVmId = secStorageVm.getId();
|
||||
GlobalLock secStorageVmLock = GlobalLock.getInternLock(getSecStorageVmLockName(secStorageVmId));
|
||||
try {
|
||||
if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) {
|
||||
try {
|
||||
secStorageVm = startSecStorageVm(secStorageVmId);
|
||||
} finally {
|
||||
secStorageVmLock.unlock();
|
||||
}
|
||||
} else {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to acquire synchronization lock to start secStorageVm for standby capacity, secStorageVm vm id : " + secStorageVm.getId());
|
||||
}
|
||||
return;
|
||||
}
|
||||
} finally {
|
||||
secStorageVmLock.releaseRef();
|
||||
}
|
||||
|
||||
if (secStorageVm == null) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Unable to start secondary storage vm for standby capacity, secStorageVm vm Id : " + secStorageVmId +
|
||||
", will recycle it and start a new one");
|
||||
}
|
||||
|
||||
if (secStorageVmFromStoppedPool) {
|
||||
destroySecStorageVm(secStorageVmId);
|
||||
}
|
||||
} else {
|
||||
SubscriptionMgr.getInstance().notifySubscribers(ALERT_SUBJECT, this,
|
||||
new SecStorageVmAlertEventArgs(SecStorageVmAlertEventArgs.SSVM_UP, dataCenterId, secStorageVmId, secStorageVm, null));
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Secondary storage vm " + secStorageVm.getHostName() + " is started");
|
||||
}
|
||||
}
|
||||
}
|
||||
}catch (Exception e){
|
||||
errorString = e.getMessage();
|
||||
}finally{
|
||||
// TODO - For now put all the alerts as creation failure. Distinguish between creation vs start failure in future.
|
||||
// Also add failure reason since startvm masks some of them.
|
||||
if(secStorageVm == null || secStorageVm.getState() != State.Running)
|
||||
SubscriptionMgr.getInstance().notifySubscribers(ALERT_SUBJECT, this,
|
||||
new SecStorageVmAlertEventArgs(SecStorageVmAlertEventArgs.SSVM_CREATE_FAILURE, dataCenterId, 0l, null, errorString));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user