mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-7589: VM not Starting and always stuck in Stopped state after
management server restarts. (cherry picked from commit 7cdb67dcf1ec4158ec0ab4c2fa868cc63121bbb5)
This commit is contained in:
parent
9bbcef4540
commit
b979c6f0c7
@ -835,24 +835,6 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
return ManagementServerNode.getManagementServerId();
|
||||
}
|
||||
|
||||
private void cleanupPendingJobs(List<SyncQueueItemVO> l) {
|
||||
for (SyncQueueItemVO item : l) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Discard left-over queue item: " + item.toString());
|
||||
}
|
||||
|
||||
String contentType = item.getContentType();
|
||||
if (contentType != null && contentType.equalsIgnoreCase(SyncQueueItem.AsyncJobContentType)) {
|
||||
Long jobId = item.getContentId();
|
||||
if (jobId != null) {
|
||||
s_logger.warn("Mark job as failed as its correspoding queue-item has been discarded. job id: " + jobId);
|
||||
completeAsyncJob(jobId, JobInfo.Status.FAILED, 0, "Execution was cancelled because of server shutdown");
|
||||
}
|
||||
}
|
||||
_queueMgr.purgeItem(item.getId());
|
||||
}
|
||||
}
|
||||
|
||||
@DB
|
||||
protected List<Long> wakeupByJoinedJobCompletion(long joinedJobId) {
|
||||
SearchCriteria<Long> joinJobSC = JoinJobSearch.create("joinJobId", joinedJobId);
|
||||
@ -967,6 +949,22 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
return true;
|
||||
}
|
||||
|
||||
private void cleanupLeftOverJobs(final long msid) {
|
||||
try {
|
||||
Transaction.execute(new TransactionCallbackNoReturn() {
|
||||
@Override
|
||||
public void doInTransactionWithoutResult(TransactionStatus status) {
|
||||
// purge sync queue item running on this ms node
|
||||
_queueMgr.cleanupActiveQueueItems(msid, true);
|
||||
// reset job status for all jobs running on this ms node
|
||||
_jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
|
||||
}
|
||||
});
|
||||
} catch (Throwable e) {
|
||||
s_logger.warn("Unexpected exception in cleaning up left over jobs for mamagement server node " + msid, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
||||
}
|
||||
@ -974,18 +972,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
@Override
|
||||
public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
||||
for (final ManagementServerHost msHost : nodeList) {
|
||||
try {
|
||||
Transaction.execute(new TransactionCallbackNoReturn() {
|
||||
@Override
|
||||
public void doInTransactionWithoutResult(TransactionStatus status) {
|
||||
List<SyncQueueItemVO> items = _queueMgr.getActiveQueueItems(msHost.getId(), true);
|
||||
cleanupPendingJobs(items);
|
||||
_jobDao.resetJobProcess(msHost.getId(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
|
||||
}
|
||||
});
|
||||
} catch (Throwable e) {
|
||||
s_logger.warn("Unexpected exception ", e);
|
||||
}
|
||||
cleanupLeftOverJobs(msHost.getId());
|
||||
}
|
||||
}
|
||||
|
||||
@ -995,15 +982,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
|
||||
@Override
|
||||
public boolean start() {
|
||||
try {
|
||||
_jobDao.cleanupPseduoJobs(getMsid());
|
||||
|
||||
List<SyncQueueItemVO> l = _queueMgr.getActiveQueueItems(getMsid(), false);
|
||||
cleanupPendingJobs(l);
|
||||
_jobDao.resetJobProcess(getMsid(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
|
||||
} catch (Throwable e) {
|
||||
s_logger.error("Unexpected exception " + e.getMessage(), e);
|
||||
}
|
||||
cleanupLeftOverJobs(getMsid());
|
||||
|
||||
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), HEARTBEAT_INTERVAL, HEARTBEAT_INTERVAL, TimeUnit.MILLISECONDS);
|
||||
_heartbeatScheduler.scheduleAtFixedRate(getGCTask(), GC_INTERVAL, GC_INTERVAL, TimeUnit.MILLISECONDS);
|
||||
|
||||
@ -36,4 +36,6 @@ public interface SyncQueueManager extends Manager {
|
||||
public List<SyncQueueItemVO> getBlockedQueueItems(long thresholdMs, boolean exclusive);
|
||||
|
||||
void purgeAsyncJobQueueItemId(long asyncJobId);
|
||||
|
||||
public void cleanupActiveQueueItems(Long msid, boolean exclusive);
|
||||
}
|
||||
|
||||
@ -26,7 +26,6 @@ import org.apache.log4j.Logger;
|
||||
|
||||
import org.apache.cloudstack.framework.jobs.dao.SyncQueueDao;
|
||||
import org.apache.cloudstack.framework.jobs.dao.SyncQueueItemDao;
|
||||
|
||||
import com.cloud.utils.DateUtil;
|
||||
import com.cloud.utils.component.ManagerBase;
|
||||
import com.cloud.utils.db.DB;
|
||||
@ -260,4 +259,16 @@ public class SyncQueueManagerImpl extends ManagerBase implements SyncQueueManage
|
||||
purgeItem(itemId);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cleanupActiveQueueItems(Long msid, boolean exclusive) {
|
||||
List<SyncQueueItemVO> l = getActiveQueueItems(msid, false);
|
||||
for (SyncQueueItemVO item : l) {
|
||||
if (s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Discard left-over queue item: " + item.toString());
|
||||
}
|
||||
purgeItem(item.getId());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user