CLOUDSTACK-7864: CPVM continues to be in Stopped state after a failure to start because of a management server restart.

This commit is contained in:
Min Chen 2014-11-07 11:47:16 -08:00
parent 392eaf3e56
commit a275bd7ca4
3 changed files with 34 additions and 2 deletions

View File

@ -39,4 +39,6 @@ public interface AsyncJobDao extends GenericDao<AsyncJobVO, Long> {
void resetJobProcess(long msid, int jobResultCode, String jobResultMessage);
List<AsyncJobVO> getExpiredCompletedJobs(Date cutTime, int limit);
List<AsyncJobVO> getResetJobs(long msid);
}

View File

@ -186,4 +186,24 @@ public class AsyncJobDaoImpl extends GenericDaoBase<AsyncJobVO, Long> implements
s_logger.warn("Unable to reset job status for management server " + msid, e);
}
}
@Override
public List<AsyncJobVO> getResetJobs(long msid) {
SearchCriteria<AsyncJobVO> sc = pendingAsyncJobSearch.create();
sc.setParameters("status", JobInfo.Status.IN_PROGRESS);
// construct query: (job_executing_msid=msid OR (job_executing_msid IS NULL AND job_init_msid=msid))
SearchCriteria<AsyncJobVO> msQuery = createSearchCriteria();
msQuery.addOr("executingMsid", SearchCriteria.Op.EQ, msid);
SearchCriteria<AsyncJobVO> initMsQuery = createSearchCriteria();
initMsQuery.addAnd("executingMsid", SearchCriteria.Op.NULL);
initMsQuery.addAnd("initMsid", SearchCriteria.Op.EQ, msid);
msQuery.addOr("initMsId", SearchCriteria.Op.SC, initMsQuery);
sc.addAnd("executingMsid", SearchCriteria.Op.SC, msQuery);
Filter filter = new Filter(AsyncJobVO.class, "created", true, null, null);
return listIncludingRemovedBy(sc, filter);
}
}

View File

@ -232,7 +232,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
s_logger.debug("job-" + jobId + " no longer exists, we just log completion info here. " + jobStatus + ", resultCode: " + resultCode + ", result: " +
resultObject);
}
// still purge item from queue to avoid any blocking
_queueMgr.purgeAsyncJobQueueItemId(jobId);
return;
}
@ -240,7 +241,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
if (s_logger.isDebugEnabled()) {
s_logger.debug("job-" + jobId + " is already completed.");
}
// still purge item from queue to avoid any blocking
_queueMgr.purgeAsyncJobQueueItemId(jobId);
return;
}
@ -547,6 +549,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
// guard final clause as well
try {
if (job.getSyncSource() != null) {
// here check queue item one more time to double make sure that queue item is removed in case of any uncaught exception
_queueMgr.purgeItem(job.getSyncSource().getId());
checkQueue(job.getSyncSource().getQueueId());
}
@ -976,6 +980,12 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
_queueMgr.cleanupActiveQueueItems(msid, true);
// reset job status for all jobs running on this ms node
_jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
// purge those queue items for those cancelled jobs above, which may not be picked up by any MS node yet
List<AsyncJobVO> cancelJobs = _jobDao.getResetJobs(msid);
for (AsyncJobVO job : cancelJobs){
_queueMgr.purgeAsyncJobQueueItemId(job.getId());
}
}
});
} catch (Throwable e) {