Merge pull request #1825 from Accelerite/CLOUDSTACK-9660

CLOUDSTACK-9660: NPE while destroying volumes during 1000 VMs deploy and destroy tests

NPE is seen as VM destroy and storage cleanup threads try to remove the same root volume. Fix is to handle
only non-root volumes in storage cleanup thread, root volumes will be handled as part of VM destroy.

* pr/1825:
  CLOUDSTACK-9660: NPE while destroying volumes during 1000 VMs deploy and destroy tests NPE is seen as VM destroy and storage cleanup threads try to remove the same root volume. Fix is to handle only non-root volumes in storage cleanup thread, root volumes will be handled as part of VM destroy.

Signed-off-by: Rajani Karuturi <rajani.karuturi@accelerite.com>
This commit is contained in:
Rajani Karuturi 2017-02-28 06:00:02 +05:30
commit 6a18cdd6ef
5 changed files with 21 additions and 8 deletions

View File

@ -80,7 +80,7 @@ public interface VolumeDao extends GenericDao<VolumeVO, Long>, StateDao<Volume.S
List<VolumeVO> listVolumesToBeDestroyed();
List<VolumeVO> listVolumesToBeDestroyed(Date date);
List<VolumeVO> listNonRootVolumesToBeDestroyed(Date date);
ImageFormat getImageFormat(Long volumeId);

View File

@ -325,6 +325,7 @@ public class VolumeDaoImpl extends GenericDaoBase<VolumeVO, Long> implements Vol
AllFieldsSearch.and("deviceId", AllFieldsSearch.entity().getDeviceId(), Op.EQ);
AllFieldsSearch.and("poolId", AllFieldsSearch.entity().getPoolId(), Op.EQ);
AllFieldsSearch.and("vType", AllFieldsSearch.entity().getVolumeType(), Op.EQ);
AllFieldsSearch.and("notVolumeType", AllFieldsSearch.entity().getVolumeType(), Op.NEQ);
AllFieldsSearch.and("id", AllFieldsSearch.entity().getId(), Op.EQ);
AllFieldsSearch.and("destroyed", AllFieldsSearch.entity().getState(), Op.EQ);
AllFieldsSearch.and("notDestroyed", AllFieldsSearch.entity().getState(), Op.NEQ);
@ -481,9 +482,10 @@ public class VolumeDaoImpl extends GenericDaoBase<VolumeVO, Long> implements Vol
}
@Override
public List<VolumeVO> listVolumesToBeDestroyed(Date date) {
public List<VolumeVO> listNonRootVolumesToBeDestroyed(Date date) {
SearchCriteria<VolumeVO> sc = AllFieldsSearch.create();
sc.setParameters("state", Volume.State.Destroy);
sc.setParameters("notVolumeType", Volume.Type.ROOT.toString());
sc.setParameters("updateTime", date);
return listBy(sc);

View File

@ -174,11 +174,11 @@ public class VolumeObject implements VolumeInfo {
}
@Override
public boolean stateTransit(Volume.Event event) {
public boolean stateTransit(Volume.Event event) {
boolean result = false;
try {
volumeVO = volumeDao.findById(volumeVO.getId());
if(volumeVO != null) {
if (volumeVO != null) {
result = _volStateMachine.transitTo(volumeVO, event, null, volumeDao);
volumeVO = volumeDao.findById(volumeVO.getId());
}
@ -332,8 +332,9 @@ public class VolumeObject implements VolumeInfo {
throw new CloudRuntimeException("Failed to update state:" + e.toString());
} finally {
// in case of OperationFailed, expunge the entry
// state transit call reloads the volume from DB and so check for null as well
if (event == ObjectInDataStoreStateMachine.Event.OperationFailed &&
(volumeVO.getState() != Volume.State.Copying && volumeVO.getState() != Volume.State.Uploaded && volumeVO.getState() != Volume.State.UploadError)) {
(volumeVO != null && volumeVO.getState() != Volume.State.Copying && volumeVO.getState() != Volume.State.Uploaded && volumeVO.getState() != Volume.State.UploadError)) {
objectInStoreMgr.deleteIfNotReady(this);
}
}

View File

@ -316,6 +316,11 @@ public class VolumeServiceImpl implements VolumeService {
}
VolumeVO vol = volDao.findById(volume.getId());
if (vol == null) {
s_logger.debug("Volume " + volume.getId() + " is not found");
future.complete(result);
return future;
}
if (!volumeExistsOnPrimary(vol)) {
// not created on primary store

View File

@ -1080,8 +1080,8 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C
cleanupSecondaryStorage(recurring);
List<VolumeVO> vols = _volsDao.listVolumesToBeDestroyed(new Date(System.currentTimeMillis() - ((long) StorageCleanupDelay.value() << 10)));
// ROOT volumes will be destroyed as part of VM cleanup
List<VolumeVO> vols = _volsDao.listNonRootVolumesToBeDestroyed(new Date(System.currentTimeMillis() - ((long) StorageCleanupDelay.value() << 10)));
for (VolumeVO vol : vols) {
try {
// If this fails, just log a warning. It's ideal if we clean up the host-side clustered file
@ -1092,7 +1092,12 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C
}
try {
volService.expungeVolumeAsync(volFactory.getVolume(vol.getId()));
VolumeInfo volumeInfo = volFactory.getVolume(vol.getId());
if (volumeInfo != null) {
volService.expungeVolumeAsync(volumeInfo);
} else {
s_logger.debug("Volume " + vol.getUuid() + " is already destroyed");
}
} catch (Exception e) {
s_logger.warn("Unable to destroy volume " + vol.getUuid(), e);
}