mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-7589: VM not Starting and always stuck in Stopped state after
management server restarts. (cherry picked from commit 7cdb67dcf1ec4158ec0ab4c2fa868cc63121bbb5)
This commit is contained in:
parent
9bbcef4540
commit
b979c6f0c7
@ -835,24 +835,6 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
return ManagementServerNode.getManagementServerId();
|
return ManagementServerNode.getManagementServerId();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void cleanupPendingJobs(List<SyncQueueItemVO> l) {
|
|
||||||
for (SyncQueueItemVO item : l) {
|
|
||||||
if (s_logger.isInfoEnabled()) {
|
|
||||||
s_logger.info("Discard left-over queue item: " + item.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
String contentType = item.getContentType();
|
|
||||||
if (contentType != null && contentType.equalsIgnoreCase(SyncQueueItem.AsyncJobContentType)) {
|
|
||||||
Long jobId = item.getContentId();
|
|
||||||
if (jobId != null) {
|
|
||||||
s_logger.warn("Mark job as failed as its correspoding queue-item has been discarded. job id: " + jobId);
|
|
||||||
completeAsyncJob(jobId, JobInfo.Status.FAILED, 0, "Execution was cancelled because of server shutdown");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_queueMgr.purgeItem(item.getId());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@DB
|
@DB
|
||||||
protected List<Long> wakeupByJoinedJobCompletion(long joinedJobId) {
|
protected List<Long> wakeupByJoinedJobCompletion(long joinedJobId) {
|
||||||
SearchCriteria<Long> joinJobSC = JoinJobSearch.create("joinJobId", joinedJobId);
|
SearchCriteria<Long> joinJobSC = JoinJobSearch.create("joinJobId", joinedJobId);
|
||||||
@ -967,6 +949,22 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void cleanupLeftOverJobs(final long msid) {
|
||||||
|
try {
|
||||||
|
Transaction.execute(new TransactionCallbackNoReturn() {
|
||||||
|
@Override
|
||||||
|
public void doInTransactionWithoutResult(TransactionStatus status) {
|
||||||
|
// purge sync queue item running on this ms node
|
||||||
|
_queueMgr.cleanupActiveQueueItems(msid, true);
|
||||||
|
// reset job status for all jobs running on this ms node
|
||||||
|
_jobDao.resetJobProcess(msid, ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart or shutdown");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (Throwable e) {
|
||||||
|
s_logger.warn("Unexpected exception in cleaning up left over jobs for mamagement server node " + msid, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
||||||
}
|
}
|
||||||
@ -974,18 +972,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
@Override
|
@Override
|
||||||
public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
||||||
for (final ManagementServerHost msHost : nodeList) {
|
for (final ManagementServerHost msHost : nodeList) {
|
||||||
try {
|
cleanupLeftOverJobs(msHost.getId());
|
||||||
Transaction.execute(new TransactionCallbackNoReturn() {
|
|
||||||
@Override
|
|
||||||
public void doInTransactionWithoutResult(TransactionStatus status) {
|
|
||||||
List<SyncQueueItemVO> items = _queueMgr.getActiveQueueItems(msHost.getId(), true);
|
|
||||||
cleanupPendingJobs(items);
|
|
||||||
_jobDao.resetJobProcess(msHost.getId(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} catch (Throwable e) {
|
|
||||||
s_logger.warn("Unexpected exception ", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -995,15 +982,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean start() {
|
public boolean start() {
|
||||||
try {
|
cleanupLeftOverJobs(getMsid());
|
||||||
_jobDao.cleanupPseduoJobs(getMsid());
|
|
||||||
|
|
||||||
List<SyncQueueItemVO> l = _queueMgr.getActiveQueueItems(getMsid(), false);
|
|
||||||
cleanupPendingJobs(l);
|
|
||||||
_jobDao.resetJobProcess(getMsid(), ApiErrorCode.INTERNAL_ERROR.getHttpCode(), "job cancelled because of management server restart");
|
|
||||||
} catch (Throwable e) {
|
|
||||||
s_logger.error("Unexpected exception " + e.getMessage(), e);
|
|
||||||
}
|
|
||||||
|
|
||||||
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), HEARTBEAT_INTERVAL, HEARTBEAT_INTERVAL, TimeUnit.MILLISECONDS);
|
_heartbeatScheduler.scheduleAtFixedRate(getHeartbeatTask(), HEARTBEAT_INTERVAL, HEARTBEAT_INTERVAL, TimeUnit.MILLISECONDS);
|
||||||
_heartbeatScheduler.scheduleAtFixedRate(getGCTask(), GC_INTERVAL, GC_INTERVAL, TimeUnit.MILLISECONDS);
|
_heartbeatScheduler.scheduleAtFixedRate(getGCTask(), GC_INTERVAL, GC_INTERVAL, TimeUnit.MILLISECONDS);
|
||||||
|
|||||||
@ -36,4 +36,6 @@ public interface SyncQueueManager extends Manager {
|
|||||||
public List<SyncQueueItemVO> getBlockedQueueItems(long thresholdMs, boolean exclusive);
|
public List<SyncQueueItemVO> getBlockedQueueItems(long thresholdMs, boolean exclusive);
|
||||||
|
|
||||||
void purgeAsyncJobQueueItemId(long asyncJobId);
|
void purgeAsyncJobQueueItemId(long asyncJobId);
|
||||||
|
|
||||||
|
public void cleanupActiveQueueItems(Long msid, boolean exclusive);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -26,7 +26,6 @@ import org.apache.log4j.Logger;
|
|||||||
|
|
||||||
import org.apache.cloudstack.framework.jobs.dao.SyncQueueDao;
|
import org.apache.cloudstack.framework.jobs.dao.SyncQueueDao;
|
||||||
import org.apache.cloudstack.framework.jobs.dao.SyncQueueItemDao;
|
import org.apache.cloudstack.framework.jobs.dao.SyncQueueItemDao;
|
||||||
|
|
||||||
import com.cloud.utils.DateUtil;
|
import com.cloud.utils.DateUtil;
|
||||||
import com.cloud.utils.component.ManagerBase;
|
import com.cloud.utils.component.ManagerBase;
|
||||||
import com.cloud.utils.db.DB;
|
import com.cloud.utils.db.DB;
|
||||||
@ -260,4 +259,16 @@ public class SyncQueueManagerImpl extends ManagerBase implements SyncQueueManage
|
|||||||
purgeItem(itemId);
|
purgeItem(itemId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void cleanupActiveQueueItems(Long msid, boolean exclusive) {
|
||||||
|
List<SyncQueueItemVO> l = getActiveQueueItems(msid, false);
|
||||||
|
for (SyncQueueItemVO item : l) {
|
||||||
|
if (s_logger.isInfoEnabled()) {
|
||||||
|
s_logger.info("Discard left-over queue item: " + item.toString());
|
||||||
|
}
|
||||||
|
purgeItem(item.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user