mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-7864: CPVM continues to be in Stopped state after a failure to start because of a management server restart.
This commit is contained in:
parent
392eaf3e56
commit
a275bd7ca4
@ -39,4 +39,6 @@ public interface AsyncJobDao extends GenericDao<AsyncJobVO, Long> {
|
||||
void resetJobProcess(long msid, int jobResultCode, String jobResultMessage);
|
||||
|
||||
List<AsyncJobVO> getExpiredCompletedJobs(Date cutTime, int limit);
|
||||
|
||||
List<AsyncJobVO> getResetJobs(long msid);
|
||||
}
|
||||
|
||||
@ -186,4 +186,24 @@ public class AsyncJobDaoImpl extends GenericDaoBase<AsyncJobVO, Long> implements
|
||||
s_logger.warn("Unable to reset job status for management server " + msid, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<AsyncJobVO> getResetJobs(long msid) {
|
||||
SearchCriteria<AsyncJobVO> sc = pendingAsyncJobSearch.create();
|
||||
sc.setParameters("status", JobInfo.Status.IN_PROGRESS);
|
||||
|
||||
// construct query: (job_executing_msid=msid OR (job_executing_msid IS NULL AND job_init_msid=msid))
|
||||
SearchCriteria<AsyncJobVO> msQuery = createSearchCriteria();
|
||||
msQuery.addOr("executingMsid", SearchCriteria.Op.EQ, msid);
|
||||
SearchCriteria<AsyncJobVO> initMsQuery = createSearchCriteria();
|
||||
initMsQuery.addAnd("executingMsid", SearchCriteria.Op.NULL);
|
||||
initMsQuery.addAnd("initMsid", SearchCriteria.Op.EQ, msid);
|
||||
msQuery.addOr("initMsId", SearchCriteria.Op.SC, initMsQuery);
|
||||
|
||||
sc.addAnd("executingMsid", SearchCriteria.Op.SC, msQuery);
|
||||
|
||||
Filter filter = new Filter(AsyncJobVO.class, "created", true, null, null);
|
||||
return listIncludingRemovedBy(sc, filter);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -232,7 +232,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
s_logger.debug("job-" + jobId + " no longer exists, we just log completion info here. " + jobStatus + ", resultCode: " + resultCode + ", result: " +
|
||||
resultObject);
|
||||
}
|
||||
|
||||
// still purge item from queue to avoid any blocking
|
||||
_queueMgr.purgeAsyncJobQueueItemId(jobId);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -240,7 +241,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("job-" + jobId + " is already completed.");
|
||||
}
|
||||
|
||||
// still purge item from queue to avoid any blocking
|
||||
_queueMgr.purgeAsyncJobQueueItemId(jobId);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -547,6 +549,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
// guard final clause as well
|
||||
try {
|
||||
if (job.getSyncSource() != null) {
|
||||
// here check queue item one more time to double make sure that queue item is removed in case of any uncaught exception
|
||||
_queueMgr.purgeItem(job.getSyncSource().getId());
|
||||
checkQueue(job.getSyncSource().getQueueId());
|
||||
}
|
||||
|
||||
@ -976,6 +980,12 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||
_queueMgr.cleanupActiveQueueItems(msid, true);
|
||||
// reset job status for all jobs running on this ms node
|
||||
_jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
|
||||
// purge those queue items for those cancelled jobs above, which may not be picked up by any MS node yet
|
||||
List<AsyncJobVO> cancelJobs = _jobDao.getResetJobs(msid);
|
||||
for (AsyncJobVO job : cancelJobs){
|
||||
_queueMgr.purgeAsyncJobQueueItemId(job.getId());
|
||||
}
|
||||
|
||||
}
|
||||
});
|
||||
} catch (Throwable e) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user