mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-7864: CPVM continues to be in Stopped state after a failure to start because of a management server restart.
This commit is contained in:
parent
392eaf3e56
commit
a275bd7ca4
@ -39,4 +39,6 @@ public interface AsyncJobDao extends GenericDao<AsyncJobVO, Long> {
|
|||||||
void resetJobProcess(long msid, int jobResultCode, String jobResultMessage);
|
void resetJobProcess(long msid, int jobResultCode, String jobResultMessage);
|
||||||
|
|
||||||
List<AsyncJobVO> getExpiredCompletedJobs(Date cutTime, int limit);
|
List<AsyncJobVO> getExpiredCompletedJobs(Date cutTime, int limit);
|
||||||
|
|
||||||
|
List<AsyncJobVO> getResetJobs(long msid);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -186,4 +186,24 @@ public class AsyncJobDaoImpl extends GenericDaoBase<AsyncJobVO, Long> implements
|
|||||||
s_logger.warn("Unable to reset job status for management server " + msid, e);
|
s_logger.warn("Unable to reset job status for management server " + msid, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<AsyncJobVO> getResetJobs(long msid) {
|
||||||
|
SearchCriteria<AsyncJobVO> sc = pendingAsyncJobSearch.create();
|
||||||
|
sc.setParameters("status", JobInfo.Status.IN_PROGRESS);
|
||||||
|
|
||||||
|
// construct query: (job_executing_msid=msid OR (job_executing_msid IS NULL AND job_init_msid=msid))
|
||||||
|
SearchCriteria<AsyncJobVO> msQuery = createSearchCriteria();
|
||||||
|
msQuery.addOr("executingMsid", SearchCriteria.Op.EQ, msid);
|
||||||
|
SearchCriteria<AsyncJobVO> initMsQuery = createSearchCriteria();
|
||||||
|
initMsQuery.addAnd("executingMsid", SearchCriteria.Op.NULL);
|
||||||
|
initMsQuery.addAnd("initMsid", SearchCriteria.Op.EQ, msid);
|
||||||
|
msQuery.addOr("initMsId", SearchCriteria.Op.SC, initMsQuery);
|
||||||
|
|
||||||
|
sc.addAnd("executingMsid", SearchCriteria.Op.SC, msQuery);
|
||||||
|
|
||||||
|
Filter filter = new Filter(AsyncJobVO.class, "created", true, null, null);
|
||||||
|
return listIncludingRemovedBy(sc, filter);
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -232,7 +232,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
s_logger.debug("job-" + jobId + " no longer exists, we just log completion info here. " + jobStatus + ", resultCode: " + resultCode + ", result: " +
|
s_logger.debug("job-" + jobId + " no longer exists, we just log completion info here. " + jobStatus + ", resultCode: " + resultCode + ", result: " +
|
||||||
resultObject);
|
resultObject);
|
||||||
}
|
}
|
||||||
|
// still purge item from queue to avoid any blocking
|
||||||
|
_queueMgr.purgeAsyncJobQueueItemId(jobId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,7 +241,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
if (s_logger.isDebugEnabled()) {
|
if (s_logger.isDebugEnabled()) {
|
||||||
s_logger.debug("job-" + jobId + " is already completed.");
|
s_logger.debug("job-" + jobId + " is already completed.");
|
||||||
}
|
}
|
||||||
|
// still purge item from queue to avoid any blocking
|
||||||
|
_queueMgr.purgeAsyncJobQueueItemId(jobId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -547,6 +549,8 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
// guard final clause as well
|
// guard final clause as well
|
||||||
try {
|
try {
|
||||||
if (job.getSyncSource() != null) {
|
if (job.getSyncSource() != null) {
|
||||||
|
// here check queue item one more time to double make sure that queue item is removed in case of any uncaught exception
|
||||||
|
_queueMgr.purgeItem(job.getSyncSource().getId());
|
||||||
checkQueue(job.getSyncSource().getQueueId());
|
checkQueue(job.getSyncSource().getQueueId());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -976,6 +980,12 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
_queueMgr.cleanupActiveQueueItems(msid, true);
|
_queueMgr.cleanupActiveQueueItems(msid, true);
|
||||||
// reset job status for all jobs running on this ms node
|
// reset job status for all jobs running on this ms node
|
||||||
_jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
|
_jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
|
||||||
|
// purge those queue items for those cancelled jobs above, which may not be picked up by any MS node yet
|
||||||
|
List<AsyncJobVO> cancelJobs = _jobDao.getResetJobs(msid);
|
||||||
|
for (AsyncJobVO job : cancelJobs){
|
||||||
|
_queueMgr.purgeAsyncJobQueueItemId(job.getId());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user