diff --git a/engine/api/src/org/apache/cloudstack/engine/orchestration/service/VolumeOrchestrationService.java b/engine/api/src/org/apache/cloudstack/engine/orchestration/service/VolumeOrchestrationService.java index 281de9281de..1f198a2710a 100644 --- a/engine/api/src/org/apache/cloudstack/engine/orchestration/service/VolumeOrchestrationService.java +++ b/engine/api/src/org/apache/cloudstack/engine/orchestration/service/VolumeOrchestrationService.java @@ -24,6 +24,7 @@ import java.util.Set; import org.apache.cloudstack.engine.subsystem.api.storage.DataObject; import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.apache.cloudstack.framework.config.ConfigKey; import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.dc.DataCenter; @@ -46,7 +47,6 @@ import com.cloud.utils.fsm.NoTransitionException; import com.cloud.vm.DiskProfile; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachineProfile; -import org.apache.cloudstack.framework.config.ConfigKey; /** * VolumeOrchestrationService is a PURE orchestration service on CloudStack @@ -86,6 +86,8 @@ public interface VolumeOrchestrationService { Volume migrateVolume(Volume volume, StoragePool destPool) throws StorageUnavailableException; + void cleanupStorageJobs(); + void destroyVolume(Volume volume); DiskProfile allocateRawVolume(Type type, String name, DiskOffering offering, Long size, Long minIops, Long maxIops, VirtualMachine vm, VirtualMachineTemplate template, Account owner); diff --git a/engine/api/src/org/apache/cloudstack/engine/subsystem/api/storage/SnapshotService.java b/engine/api/src/org/apache/cloudstack/engine/subsystem/api/storage/SnapshotService.java index 000b9ec4e60..4edeb55ab12 100644 --- a/engine/api/src/org/apache/cloudstack/engine/subsystem/api/storage/SnapshotService.java +++ b/engine/api/src/org/apache/cloudstack/engine/subsystem/api/storage/SnapshotService.java @@ -27,4 +27,6 @@ public interface SnapshotService { boolean revertSnapshot(Long snapshotId); void syncVolumeSnapshotsToRegionStore(long volumeId, DataStore store); + + void cleanupVolumeDuringSnapshotFailure(Long volumeId, Long snapshotId); } diff --git a/server/src/com/cloud/storage/VmWorkAttachVolume.java b/engine/components-api/src/com/cloud/vm/VmWorkAttachVolume.java similarity index 96% rename from server/src/com/cloud/storage/VmWorkAttachVolume.java rename to engine/components-api/src/com/cloud/vm/VmWorkAttachVolume.java index 3cdfbb52a2d..2d2fefbc9aa 100644 --- a/server/src/com/cloud/storage/VmWorkAttachVolume.java +++ b/engine/components-api/src/com/cloud/vm/VmWorkAttachVolume.java @@ -14,9 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -package com.cloud.storage; - -import com.cloud.vm.VmWork; +package com.cloud.vm; public class VmWorkAttachVolume extends VmWork { private static final long serialVersionUID = 553291814854451740L; diff --git a/server/src/com/cloud/storage/VmWorkDetachVolume.java b/engine/components-api/src/com/cloud/vm/VmWorkDetachVolume.java similarity index 95% rename from server/src/com/cloud/storage/VmWorkDetachVolume.java rename to engine/components-api/src/com/cloud/vm/VmWorkDetachVolume.java index 18262d254aa..91831e4850b 100644 --- a/server/src/com/cloud/storage/VmWorkDetachVolume.java +++ b/engine/components-api/src/com/cloud/vm/VmWorkDetachVolume.java @@ -14,9 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -package com.cloud.storage; - -import com.cloud.vm.VmWork; +package com.cloud.vm; public class VmWorkDetachVolume extends VmWork { private static final long serialVersionUID = -8722243207385263101L; diff --git a/server/src/com/cloud/storage/VmWorkMigrateVolume.java b/engine/components-api/src/com/cloud/vm/VmWorkMigrateVolume.java similarity index 96% rename from server/src/com/cloud/storage/VmWorkMigrateVolume.java rename to engine/components-api/src/com/cloud/vm/VmWorkMigrateVolume.java index c83e02df3ba..994b492ebf5 100644 --- a/server/src/com/cloud/storage/VmWorkMigrateVolume.java +++ b/engine/components-api/src/com/cloud/vm/VmWorkMigrateVolume.java @@ -14,9 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -package com.cloud.storage; - -import com.cloud.vm.VmWork; +package com.cloud.vm; public class VmWorkMigrateVolume extends VmWork { private static final long serialVersionUID = -565778516928408602L; diff --git a/server/src/com/cloud/storage/VmWorkResizeVolume.java b/engine/components-api/src/com/cloud/vm/VmWorkResizeVolume.java similarity index 97% rename from server/src/com/cloud/storage/VmWorkResizeVolume.java rename to engine/components-api/src/com/cloud/vm/VmWorkResizeVolume.java index 4047145e0cf..d2691129613 100644 --- a/server/src/com/cloud/storage/VmWorkResizeVolume.java +++ b/engine/components-api/src/com/cloud/vm/VmWorkResizeVolume.java @@ -14,9 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -package com.cloud.storage; - -import com.cloud.vm.VmWork; +package com.cloud.vm; public class VmWorkResizeVolume extends VmWork { private static final long serialVersionUID = 6112366316907642498L; diff --git a/server/src/com/cloud/storage/VmWorkTakeVolumeSnapshot.java b/engine/components-api/src/com/cloud/vm/VmWorkTakeVolumeSnapshot.java similarity index 96% rename from server/src/com/cloud/storage/VmWorkTakeVolumeSnapshot.java rename to engine/components-api/src/com/cloud/vm/VmWorkTakeVolumeSnapshot.java index 8b238080bce..3160be1686b 100644 --- a/server/src/com/cloud/storage/VmWorkTakeVolumeSnapshot.java +++ b/engine/components-api/src/com/cloud/vm/VmWorkTakeVolumeSnapshot.java @@ -14,9 +14,7 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -package com.cloud.storage; - -import com.cloud.vm.VmWork; +package com.cloud.vm; public class VmWorkTakeVolumeSnapshot extends VmWork { diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java index dc6ad3fe370..fa8b7b6ca20 100755 --- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -566,6 +566,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac _executor.scheduleAtFixedRate(new TransitionTask(), VmOpCleanupInterval.value(), VmOpCleanupInterval.value(), TimeUnit.SECONDS); cancelWorkItems(_nodeId); + volumeMgr.cleanupStorageJobs(); // cleanup left over place holder works _workJobDao.expungeLeftoverWorkJobs(ManagementServerNode.getManagementServerId()); return true; diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java b/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java index 63d967b90af..515d5ec9b9a 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java @@ -54,6 +54,8 @@ import org.apache.cloudstack.framework.async.AsyncCallFuture; import org.apache.cloudstack.framework.config.ConfigDepot; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.framework.jobs.AsyncJobManager; +import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO; import org.apache.cloudstack.storage.command.CommandResult; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; @@ -64,6 +66,7 @@ import com.cloud.agent.api.to.DataTO; import com.cloud.agent.api.to.DiskTO; import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.agent.manager.allocator.PodAllocator; +import com.cloud.cluster.ClusterManager; import com.cloud.configuration.Resource.ResourceType; import com.cloud.dc.DataCenter; import com.cloud.dc.Pod; @@ -117,6 +120,10 @@ import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.VirtualMachineProfileImpl; +import com.cloud.vm.VmWorkAttachVolume; +import com.cloud.vm.VmWorkMigrateVolume; +import com.cloud.vm.VmWorkSerializer; +import com.cloud.vm.VmWorkTakeVolumeSnapshot; import com.cloud.vm.dao.UserVmDao; public class VolumeOrchestrator extends ManagerBase implements VolumeOrchestrationService, Configurable { @@ -156,6 +163,10 @@ public class VolumeOrchestrator extends ManagerBase implements VolumeOrchestrati SnapshotService _snapshotSrv; @Inject protected UserVmDao _userVmDao; + @Inject + protected AsyncJobManager _jobMgr; + @Inject + ClusterManager clusterManager; private final StateMachine2 _volStateMachine; protected List _storagePoolAllocators; @@ -1355,9 +1366,71 @@ public class VolumeOrchestrator extends ManagerBase implements VolumeOrchestrati return true; } + private void cleanupVolumeDuringAttachFailure(Long volumeId) { + VolumeVO volume = _volsDao.findById(volumeId); + if (volume == null) { + return; + } + + if (volume.getState().equals(Volume.State.Creating)) { + s_logger.debug("Remove volume: " + volume.getId() + ", as it's leftover from last mgt server stop"); + _volsDao.remove(volume.getId()); + } + } + + private void cleanupVolumeDuringMigrationFailure(Long volumeId, Long destPoolId) { + StoragePool destPool = (StoragePool)dataStoreMgr.getDataStore(destPoolId, DataStoreRole.Primary); + if (destPool == null) { + return; + } + + VolumeVO volume = _volsDao.findById(volumeId); + if (volume.getState() == Volume.State.Migrating) { + VolumeVO duplicateVol = _volsDao.findByPoolIdName(destPoolId, volume.getName()); + if (duplicateVol != null) { + s_logger.debug("Remove volume " + duplicateVol.getId() + " on storage pool " + destPoolId); + _volsDao.remove(duplicateVol.getId()); + } + + s_logger.debug("change volume state to ready from migrating in case migration failure for vol: " + volumeId); + volume.setState(Volume.State.Ready); + _volsDao.update(volumeId, volume); + } + + } + + private void cleanupVolumeDuringSnapshotFailure(Long volumeId, Long snapshotId) { + _snapshotSrv.cleanupVolumeDuringSnapshotFailure(volumeId, snapshotId); + VolumeVO volume = _volsDao.findById(volumeId); + if (volume.getState() == Volume.State.Snapshotting) { + s_logger.debug("change volume state back to Ready: " + volume.getId()); + volume.setState(Volume.State.Ready); + _volsDao.update(volume.getId(), volume); + } + } + @Override - public boolean start() { - return true; + public void cleanupStorageJobs() { + //clean up failure jobs related to volume + List jobs = _jobMgr.findFailureAsyncJobs(VmWorkAttachVolume.class.getName(), + VmWorkMigrateVolume.class.getName(), VmWorkTakeVolumeSnapshot.class.getName()); + + for (AsyncJobVO job : jobs) { + try { + if (job.getCmd().equalsIgnoreCase(VmWorkAttachVolume.class.getName())) { + VmWorkAttachVolume work = VmWorkSerializer.deserialize(VmWorkAttachVolume.class, job.getCmdInfo()); + cleanupVolumeDuringAttachFailure(work.getVolumeId()); + } else if (job.getCmd().equalsIgnoreCase(VmWorkMigrateVolume.class.getName())) { + VmWorkMigrateVolume work = VmWorkSerializer.deserialize(VmWorkMigrateVolume.class, job.getCmdInfo()); + cleanupVolumeDuringMigrationFailure(work.getVolumeId(), work.getDestPoolId()); + } else if (job.getCmd().equalsIgnoreCase(VmWorkTakeVolumeSnapshot.class.getName())) { + VmWorkTakeVolumeSnapshot work = VmWorkSerializer.deserialize(VmWorkTakeVolumeSnapshot.class, job.getCmdInfo()); + cleanupVolumeDuringSnapshotFailure(work.getVolumeId(), work.getSnapshotId()); + } + } catch (Exception e) { + s_logger.debug("clean up job failure, will continue", e); + } + } } @Override diff --git a/engine/schema/src/com/cloud/storage/dao/VolumeDao.java b/engine/schema/src/com/cloud/storage/dao/VolumeDao.java index da0a5de2d17..343ec2a56de 100755 --- a/engine/schema/src/com/cloud/storage/dao/VolumeDao.java +++ b/engine/schema/src/com/cloud/storage/dao/VolumeDao.java @@ -61,6 +61,8 @@ public interface VolumeDao extends GenericDao, StateDao findByPoolId(long poolId); + VolumeVO findByPoolIdName(long poolId, String name); + List findByPoolId(long poolId, Volume.Type volumeType); List findByInstanceAndDeviceId(long instanceId, long deviceId); diff --git a/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java b/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java index 4ad196c51ec..24de7172e06 100755 --- a/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java +++ b/engine/schema/src/com/cloud/storage/dao/VolumeDaoImpl.java @@ -125,6 +125,14 @@ public class VolumeDaoImpl extends GenericDaoBase implements Vol return listBy(sc); } + @Override + public VolumeVO findByPoolIdName(long poolId, String name) { + SearchCriteria sc = AllFieldsSearch.create(); + sc.setParameters("poolId", poolId); + sc.setParameters("name", name); + return findOneBy(sc); + } + @Override public List findByPoolId(long poolId, Volume.Type volumeType) { SearchCriteria sc = AllFieldsSearch.create(); @@ -306,6 +314,7 @@ public class VolumeDaoImpl extends GenericDaoBase implements Vol AllFieldsSearch.and("destroyed", AllFieldsSearch.entity().getState(), Op.EQ); AllFieldsSearch.and("notDestroyed", AllFieldsSearch.entity().getState(), Op.NEQ); AllFieldsSearch.and("updatedCount", AllFieldsSearch.entity().getUpdatedCount(), Op.EQ); + AllFieldsSearch.and("name", AllFieldsSearch.entity().getName(), Op.EQ); AllFieldsSearch.done(); DetachedAccountIdSearch = createSearchBuilder(); diff --git a/engine/storage/snapshot/src/org/apache/cloudstack/storage/snapshot/SnapshotServiceImpl.java b/engine/storage/snapshot/src/org/apache/cloudstack/storage/snapshot/SnapshotServiceImpl.java index 259c51e6505..5a8afc68931 100644 --- a/engine/storage/snapshot/src/org/apache/cloudstack/storage/snapshot/SnapshotServiceImpl.java +++ b/engine/storage/snapshot/src/org/apache/cloudstack/storage/snapshot/SnapshotServiceImpl.java @@ -446,6 +446,28 @@ public class SnapshotServiceImpl implements SnapshotService { } } + @Override + public void cleanupVolumeDuringSnapshotFailure(Long volumeId, Long snapshotId) { + SnapshotVO snaphsot = _snapshotDao.findById(snapshotId); + + if (snaphsot != null) { + if (snaphsot.getState() != Snapshot.State.BackedUp) { + List snapshotDataStoreVOs = _snapshotStoreDao.findBySnapshotId(snapshotId); + for (SnapshotDataStoreVO snapshotDataStoreVO : snapshotDataStoreVOs) { + s_logger.debug("Remove snapshot " + snapshotId + ", status " + snapshotDataStoreVO.getState() + + " on snapshot_store_ref table with id: " + snapshotDataStoreVO.getId()); + + _snapshotStoreDao.remove(snapshotDataStoreVO.getId()); + } + + s_logger.debug("Remove snapshot " + snapshotId + " status " + snaphsot.getState() + " from snapshot table"); + _snapshotDao.remove(snapshotId); + } + } + + + } + // push one individual snapshots currently on cache store to region store if it is not there already private void syncSnapshotToRegionStore(long snapshotId, DataStore store){ // if snapshot is already on region wide object store, check if it is really downloaded there (by checking install_path). Sync snapshot to region diff --git a/framework/cluster/src/com/cloud/cluster/ClusterManagerImpl.java b/framework/cluster/src/com/cloud/cluster/ClusterManagerImpl.java index 5a70ca5d4bd..158a6c3941e 100644 --- a/framework/cluster/src/com/cloud/cluster/ClusterManagerImpl.java +++ b/framework/cluster/src/com/cloud/cluster/ClusterManagerImpl.java @@ -41,12 +41,13 @@ import javax.ejb.Local; import javax.inject.Inject; import javax.naming.ConfigurationException; +import org.apache.log4j.Logger; + import org.apache.cloudstack.framework.config.ConfigDepot; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.utils.identity.ManagementServerNode; -import org.apache.log4j.Logger; import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.cluster.dao.ManagementServerHostPeerDao; @@ -958,7 +959,6 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C mshost.setAlertCount(0); mshost.setState(ManagementServerHost.State.Up); _mshostDao.persist(mshost); - if (s_logger.isInfoEnabled()) { s_logger.info("New instance of management server msid " + _msId + ", runId " + _runId + " is being started"); } diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/AsyncJobManager.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/AsyncJobManager.java index fe5c067ddc2..bce99d084a5 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/AsyncJobManager.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/AsyncJobManager.java @@ -129,4 +129,6 @@ public interface AsyncJobManager extends Manager { String marshallResultObject(Serializable obj); Object unmarshallResultObject(AsyncJob job); + + List findFailureAsyncJobs(String... cmds); } diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDao.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDao.java index 169cae90a97..8778bef7b7b 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDao.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDao.java @@ -41,4 +41,6 @@ public interface AsyncJobDao extends GenericDao { List getExpiredCompletedJobs(Date cutTime, int limit); List getResetJobs(long msid); + + List getFailureJobsSinceLastMsStart(long msId, String... cmds); } diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDaoImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDaoImpl.java index 0d024eb342e..a050407cd97 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDaoImpl.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/dao/AsyncJobDaoImpl.java @@ -44,6 +44,7 @@ public class AsyncJobDaoImpl extends GenericDaoBase implements private final SearchBuilder pseudoJobCleanupSearch; private final SearchBuilder expiringUnfinishedAsyncJobSearch; private final SearchBuilder expiringCompletedAsyncJobSearch; + private final SearchBuilder failureMsidAsyncJobSearch; public AsyncJobDaoImpl() { pendingAsyncJobSearch = createSearchBuilder(); @@ -84,6 +85,13 @@ public class AsyncJobDaoImpl extends GenericDaoBase implements pseudoJobCleanupSearch.and("initMsid", pseudoJobCleanupSearch.entity().getInitMsid(), Op.EQ); pseudoJobCleanupSearch.done(); + failureMsidAsyncJobSearch = createSearchBuilder(); + failureMsidAsyncJobSearch.and("initMsid", failureMsidAsyncJobSearch.entity().getInitMsid(), Op.EQ); + failureMsidAsyncJobSearch.and("instanceType", failureMsidAsyncJobSearch.entity().getInstanceType(), SearchCriteria.Op.EQ); + failureMsidAsyncJobSearch.and("status", failureMsidAsyncJobSearch.entity().getStatus(), SearchCriteria.Op.EQ); + failureMsidAsyncJobSearch.and("job_cmd", failureMsidAsyncJobSearch.entity().getCmd(), Op.IN); + failureMsidAsyncJobSearch.done(); + } @Override @@ -206,4 +214,13 @@ public class AsyncJobDaoImpl extends GenericDaoBase implements return listIncludingRemovedBy(sc, filter); } + + @Override + public List getFailureJobsSinceLastMsStart(long msId, String... cmds) { + SearchCriteria sc = failureMsidAsyncJobSearch.create(); + sc.setParameters("initMsid", msId); + sc.setParameters("status", AsyncJobVO.Status.FAILED); + sc.setParameters("job_cmd", (Object[])cmds); + return listBy(sc); + } } diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java index 3e26fc1c005..63a5864d993 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/AsyncJobManagerImpl.java @@ -1055,4 +1055,9 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager, _messageBus.publish(null, AsyncJob.Topics.JOB_EVENT_PUBLISH, PublishScope.LOCAL, new Pair(job, jobEvent)); } + + @Override + public List findFailureAsyncJobs(String... cmds) { + return _jobDao.getFailureJobsSinceLastMsStart(getMsid(), cmds); + } }