From 4b10c6116763bed6f853a2bfdfbfbb4c60c81696 Mon Sep 17 00:00:00 2001 From: Harikrishna Patnala Date: Tue, 28 Jul 2020 09:08:59 +0530 Subject: [PATCH] Datastore cluster maintenance failure case handling. --- .../com/cloud/storage/StorageManagerImpl.java | 51 ++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/com/cloud/storage/StorageManagerImpl.java b/server/src/main/java/com/cloud/storage/StorageManagerImpl.java index bc94dfc2a8f..2381e8eec26 100644 --- a/server/src/main/java/com/cloud/storage/StorageManagerImpl.java +++ b/server/src/main/java/com/cloud/storage/StorageManagerImpl.java @@ -1507,29 +1507,46 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C DataStore store = _dataStoreMgr.getDataStore(primaryStorage.getId(), DataStoreRole.Primary); if (primaryStorage.getPoolType() == StoragePoolType.DatastoreCluster) { - // Before preparing the datastorecluster to maintenance mode, the storagepools in the datastore cluster needs to put in maintenance - List childDatastores = _storagePoolDao.listChildStoragePoolsInDatastoreCluster(primaryStorageId); - Transaction.execute(new TransactionCallbackNoReturn() { - @Override - public void doInTransactionWithoutResult(TransactionStatus status) { - for (StoragePoolVO childDatastore : childDatastores) { - // set the pool state to prepare for maintenance, so that VMs will not migrate to the storagepools in the same cluster - childDatastore.setStatus(StoragePoolStatus.PrepareForMaintenance); - _storagePoolDao.update(childDatastore.getId(), childDatastore); - } - } - }); - for (StoragePoolVO childDatastore : childDatastores) { - //FR41 need to handle when one of the primary stores is unable to put in maintenance mode - DataStore childStore = _dataStoreMgr.getDataStore(childDatastore.getId(), DataStoreRole.Primary); - lifeCycle.maintain(childStore); - } + handlePrepareDatastoreCluserMaintenance(lifeCycle, primaryStorageId); } lifeCycle.maintain(store); return (PrimaryDataStoreInfo)_dataStoreMgr.getDataStore(primaryStorage.getId(), DataStoreRole.Primary); } + private void handlePrepareDatastoreCluserMaintenance(DataStoreLifeCycle lifeCycle, Long primaryStorageId) { + // Before preparing the datastorecluster to maintenance mode, the storagepools in the datastore cluster needs to put in maintenance + List childDatastores = _storagePoolDao.listChildStoragePoolsInDatastoreCluster(primaryStorageId); + Transaction.execute(new TransactionCallbackNoReturn() { + @Override + public void doInTransactionWithoutResult(TransactionStatus status) { + for (StoragePoolVO childDatastore : childDatastores) { + // set the pool state to prepare for maintenance, so that VMs will not migrate to the storagepools in the same cluster + childDatastore.setStatus(StoragePoolStatus.PrepareForMaintenance); + _storagePoolDao.update(childDatastore.getId(), childDatastore); + } + } + }); + List maintenanceSuccessfulStoragePools = new ArrayList<>(); + for (StoragePoolVO childDatastore : childDatastores) { + //FR41 need to handle when one of the primary stores is unable to put in maintenance mode + DataStore childStore = _dataStoreMgr.getDataStore(childDatastore.getId(), DataStoreRole.Primary); + try { + lifeCycle.maintain(childStore); + } catch (Exception e) { + if (s_logger.isDebugEnabled()) { + s_logger.debug(String.format("Exception on maintenance preparation of one of the child datastores in datastore cluster %d with error %s", primaryStorageId, e)); + s_logger.debug(String.format("Cancelling the maintenance mode of child datastores in datastore cluster %d", primaryStorageId)); + } + for (DataStore dataStore: maintenanceSuccessfulStoragePools) { + lifeCycle.cancelMaintain(dataStore); + } + throw new CloudRuntimeException(String.format("Failed to prepare maintenance mode for datastore cluster %d with error %s %s", primaryStorageId, e.getMessage(), e)); + } + maintenanceSuccessfulStoragePools.add(childStore); + } + } + @Override @DB public PrimaryDataStoreInfo cancelPrimaryStorageForMaintenance(CancelPrimaryStorageMaintenanceCmd cmd) throws ResourceUnavailableException {