PowerFlex/ScaleIO - Wait after SDC service start/restart/stop, and retry to fetch SDC id/guid (#11099)

* [PowerFlex/ScaleIO] Added wait time after SDC service start/restart/stop, and retries to fetch SDC id/guid

* Added agent property 'powerflex.sdc.service.wait' for the time (in secs) to wait after SDC service start/restart/stop

* code improvements
This commit is contained in:
Suresh Kumar Anaparti 2025-07-16 12:32:09 +05:30 committed by GitHub
parent 9688cbb095
commit c94f75c7ea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 96 additions and 27 deletions

View File

@ -823,7 +823,7 @@ public class AgentProperties{
private T defaultValue;
private Class<T> typeClass;
Property(String name, T value) {
public Property(String name, T value) {
init(name, value);
}

View File

@ -37,6 +37,7 @@ import org.apache.cloudstack.utils.qemu.QemuImg;
import org.apache.cloudstack.utils.qemu.QemuImgException;
import org.apache.cloudstack.utils.qemu.QemuImgFile;
import org.apache.cloudstack.utils.qemu.QemuObject;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.io.filefilter.WildcardFileFilter;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
@ -581,14 +582,23 @@ public class ScaleIOStorageAdaptor implements StorageAdaptor {
}
if (!ScaleIOUtil.isSDCServiceActive()) {
logger.debug("SDC service is not active on host, starting it");
if (!ScaleIOUtil.startSDCService()) {
return new Ternary<>(false, null, "Couldn't start SDC service on host");
}
} else if (!ScaleIOUtil.restartSDCService()) {
} else {
logger.debug("SDC service is active on host, re-starting it");
if (!ScaleIOUtil.restartSDCService()) {
return new Ternary<>(false, null, "Couldn't restart SDC service on host");
}
}
return new Ternary<>( true, getSDCDetails(details), "Prepared client successfully");
Map<String, String> sdcDetails = getSDCDetails(details);
if (MapUtils.isEmpty(sdcDetails)) {
return new Ternary<>(false, null, "Couldn't get the SDC details on the host");
}
return new Ternary<>( true, sdcDetails, "Prepared client successfully");
}
public Pair<Boolean, String> unprepareStorageClient(Storage.StoragePoolType type, String uuid) {
@ -611,20 +621,40 @@ public class ScaleIOStorageAdaptor implements StorageAdaptor {
private Map<String, String> getSDCDetails(Map<String, String> details) {
Map<String, String> sdcDetails = new HashMap<String, String>();
if (details == null || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) {
if (MapUtils.isEmpty(details) || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) {
return sdcDetails;
}
String storageSystemId = details.get(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID);
if (StringUtils.isEmpty(storageSystemId)) {
return sdcDetails;
}
int numberOfTries = 5;
int timeBetweenTries = 1000; // Try more frequently (every sec) and return early when SDC Id or Guid found
int attempt = 1;
do {
logger.debug("Get SDC details, attempt #{}", attempt);
String sdcId = ScaleIOUtil.getSdcId(storageSystemId);
if (sdcId != null) {
sdcDetails.put(ScaleIOGatewayClient.SDC_ID, sdcId);
return sdcDetails;
} else {
String sdcGuId = ScaleIOUtil.getSdcGuid();
if (sdcGuId != null) {
sdcDetails.put(ScaleIOGatewayClient.SDC_GUID, sdcGuId);
return sdcDetails;
}
}
try {
Thread.sleep(timeBetweenTries);
} catch (Exception ignore) {
}
numberOfTries--;
attempt++;
} while (numberOfTries > 0);
return sdcDetails;
}

View File

@ -116,9 +116,9 @@ public class ScaleIOStorageAdaptorTest {
Ternary<Boolean, Map<String, String>, String> result = scaleIOStorageAdaptor.prepareStorageClient(Storage.StoragePoolType.PowerFlex, poolUuid, new HashMap<>());
Assert.assertTrue(result.first());
Assert.assertNotNull(result.second());
Assert.assertTrue(result.second().isEmpty());
Assert.assertFalse(result.first());
Assert.assertNull(result.second());
Assert.assertEquals("Couldn't get the SDC details on the host", result.third());
}
@Test

View File

@ -183,12 +183,13 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable {
storagePoolHost.setLocalPath(sdcId);
storagePoolHostDao.update(storagePoolHost.getId(), storagePoolHost);
}
}
int waitTimeInSecs = 15; // Wait for 15 secs (usual tests with SDC service start took 10-15 secs)
if (hostSdcConnected(sdcId, dataStore, waitTimeInSecs)) {
return sdcId;
}
}
return null;
} finally {
if (storageSystemIdLock != null) {
@ -246,7 +247,7 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable {
}
if (StringUtils.isBlank(sdcId)) {
logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, try (re)install SDC and restart agent", host);
logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, add MDMs if not or try (re)install SDC & restart agent", host);
return null;
}
@ -381,6 +382,9 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable {
private ScaleIOGatewayClient getScaleIOClient(final Long storagePoolId) throws Exception {
StoragePoolVO storagePool = storagePoolDao.findById(storagePoolId);
if (storagePool == null) {
throw new CloudRuntimeException("Unable to find the storage pool with id " + storagePoolId);
}
return ScaleIOGatewayClientConnectionPool.getInstance().getClient(storagePool, storagePoolDetailsDao);
}

View File

@ -102,12 +102,12 @@ public class ScaleIOHostListener implements HypervisorHostListener {
if (systemId == null) {
throw new CloudRuntimeException("Failed to get the system id for PowerFlex storage pool " + storagePool.getName());
}
Map<String,String> details = new HashMap<>();
Map<String, String> details = new HashMap<>();
details.put(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID, systemId);
ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, storagePool, storagePool.getPath(), details);
ModifyStoragePoolAnswer answer = sendModifyStoragePoolCommand(cmd, storagePool, host);
Map<String,String> poolDetails = answer.getPoolInfo().getDetails();
Map<String, String> poolDetails = answer.getPoolInfo().getDetails();
if (MapUtils.isEmpty(poolDetails)) {
String msg = String.format("PowerFlex storage SDC details not found on the host: %s, (re)install SDC and restart agent", host);
logger.warn(msg);
@ -124,7 +124,7 @@ public class ScaleIOHostListener implements HypervisorHostListener {
}
if (StringUtils.isBlank(sdcId)) {
String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, (re)install SDC and restart agent", host);
String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, add MDMs if not or try (re)install SDC & restart agent", host);
logger.warn(msg);
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "SDC details not found on host: " + host.getUuid(), msg);
return null;

View File

@ -17,6 +17,8 @@
package org.apache.cloudstack.storage.datastore.util;
import com.cloud.agent.properties.AgentProperties;
import com.cloud.agent.properties.AgentPropertiesFileHandler;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
@ -60,6 +62,14 @@ public class ScaleIOUtil {
private static final String SDC_SERVICE_ENABLE_CMD = "systemctl enable scini";
public static final String CONNECTED_SDC_COUNT_STAT = "ConnectedSDCCount";
/**
* Time (in seconds) to wait after SDC service 'scini' start/restart/stop.<br>
* Data type: Integer.<br>
* Default value: <code>3</code>
*/
public static final AgentProperties.Property<Integer> SDC_SERVICE_ACTION_WAIT = new AgentProperties.Property<>("powerflex.sdc.service.wait", 3);
/**
* Cmd for querying volumes in SDC
* Sample output for cmd: drv_cfg --query_vols:
@ -216,16 +226,41 @@ public class ScaleIOUtil {
public static boolean startSDCService() {
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_START_CMD);
return exitValue == 0;
if (exitValue != 0) {
return false;
}
waitForSdcServiceActionToComplete();
return true;
}
public static boolean stopSDCService() {
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_STOP_CMD);
return exitValue == 0;
if (exitValue != 0) {
return false;
}
waitForSdcServiceActionToComplete();
return true;
}
public static boolean restartSDCService() {
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_RESTART_CMD);
return exitValue == 0;
if (exitValue != 0) {
return false;
}
waitForSdcServiceActionToComplete();
return true;
}
private static void waitForSdcServiceActionToComplete() {
// Wait for the SDC service to settle after start/restart/stop and reaches a stable state
int waitTimeInSecs = AgentPropertiesFileHandler.getPropertyValue(SDC_SERVICE_ACTION_WAIT);
if (waitTimeInSecs < 0) {
waitTimeInSecs = SDC_SERVICE_ACTION_WAIT.getDefaultValue();
}
try {
LOGGER.debug(String.format("Waiting for %d secs after SDC service action, to reach a stable state", waitTimeInSecs));
Thread.sleep(waitTimeInSecs * 1000L);
} catch (InterruptedException ignore) {
}
}
}