mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
PowerFlex/ScaleIO - Wait after SDC service start/restart/stop, and retry to fetch SDC id/guid (#11099)
* [PowerFlex/ScaleIO] Added wait time after SDC service start/restart/stop, and retries to fetch SDC id/guid * Added agent property 'powerflex.sdc.service.wait' for the time (in secs) to wait after SDC service start/restart/stop * code improvements
This commit is contained in:
parent
9688cbb095
commit
c94f75c7ea
@ -823,7 +823,7 @@ public class AgentProperties{
|
||||
private T defaultValue;
|
||||
private Class<T> typeClass;
|
||||
|
||||
Property(String name, T value) {
|
||||
public Property(String name, T value) {
|
||||
init(name, value);
|
||||
}
|
||||
|
||||
|
||||
@ -37,6 +37,7 @@ import org.apache.cloudstack.utils.qemu.QemuImg;
|
||||
import org.apache.cloudstack.utils.qemu.QemuImgException;
|
||||
import org.apache.cloudstack.utils.qemu.QemuImgFile;
|
||||
import org.apache.cloudstack.utils.qemu.QemuObject;
|
||||
import org.apache.commons.collections.MapUtils;
|
||||
import org.apache.commons.io.filefilter.WildcardFileFilter;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -581,14 +582,23 @@ public class ScaleIOStorageAdaptor implements StorageAdaptor {
|
||||
}
|
||||
|
||||
if (!ScaleIOUtil.isSDCServiceActive()) {
|
||||
logger.debug("SDC service is not active on host, starting it");
|
||||
if (!ScaleIOUtil.startSDCService()) {
|
||||
return new Ternary<>(false, null, "Couldn't start SDC service on host");
|
||||
}
|
||||
} else if (!ScaleIOUtil.restartSDCService()) {
|
||||
} else {
|
||||
logger.debug("SDC service is active on host, re-starting it");
|
||||
if (!ScaleIOUtil.restartSDCService()) {
|
||||
return new Ternary<>(false, null, "Couldn't restart SDC service on host");
|
||||
}
|
||||
}
|
||||
|
||||
return new Ternary<>( true, getSDCDetails(details), "Prepared client successfully");
|
||||
Map<String, String> sdcDetails = getSDCDetails(details);
|
||||
if (MapUtils.isEmpty(sdcDetails)) {
|
||||
return new Ternary<>(false, null, "Couldn't get the SDC details on the host");
|
||||
}
|
||||
|
||||
return new Ternary<>( true, sdcDetails, "Prepared client successfully");
|
||||
}
|
||||
|
||||
public Pair<Boolean, String> unprepareStorageClient(Storage.StoragePoolType type, String uuid) {
|
||||
@ -611,20 +621,40 @@ public class ScaleIOStorageAdaptor implements StorageAdaptor {
|
||||
|
||||
private Map<String, String> getSDCDetails(Map<String, String> details) {
|
||||
Map<String, String> sdcDetails = new HashMap<String, String>();
|
||||
if (details == null || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) {
|
||||
if (MapUtils.isEmpty(details) || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) {
|
||||
return sdcDetails;
|
||||
}
|
||||
|
||||
String storageSystemId = details.get(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID);
|
||||
if (StringUtils.isEmpty(storageSystemId)) {
|
||||
return sdcDetails;
|
||||
}
|
||||
|
||||
int numberOfTries = 5;
|
||||
int timeBetweenTries = 1000; // Try more frequently (every sec) and return early when SDC Id or Guid found
|
||||
int attempt = 1;
|
||||
do {
|
||||
logger.debug("Get SDC details, attempt #{}", attempt);
|
||||
String sdcId = ScaleIOUtil.getSdcId(storageSystemId);
|
||||
if (sdcId != null) {
|
||||
sdcDetails.put(ScaleIOGatewayClient.SDC_ID, sdcId);
|
||||
return sdcDetails;
|
||||
} else {
|
||||
String sdcGuId = ScaleIOUtil.getSdcGuid();
|
||||
if (sdcGuId != null) {
|
||||
sdcDetails.put(ScaleIOGatewayClient.SDC_GUID, sdcGuId);
|
||||
return sdcDetails;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
Thread.sleep(timeBetweenTries);
|
||||
} catch (Exception ignore) {
|
||||
}
|
||||
numberOfTries--;
|
||||
attempt++;
|
||||
} while (numberOfTries > 0);
|
||||
|
||||
return sdcDetails;
|
||||
}
|
||||
|
||||
|
||||
@ -116,9 +116,9 @@ public class ScaleIOStorageAdaptorTest {
|
||||
|
||||
Ternary<Boolean, Map<String, String>, String> result = scaleIOStorageAdaptor.prepareStorageClient(Storage.StoragePoolType.PowerFlex, poolUuid, new HashMap<>());
|
||||
|
||||
Assert.assertTrue(result.first());
|
||||
Assert.assertNotNull(result.second());
|
||||
Assert.assertTrue(result.second().isEmpty());
|
||||
Assert.assertFalse(result.first());
|
||||
Assert.assertNull(result.second());
|
||||
Assert.assertEquals("Couldn't get the SDC details on the host", result.third());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@ -183,12 +183,13 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable {
|
||||
storagePoolHost.setLocalPath(sdcId);
|
||||
storagePoolHostDao.update(storagePoolHost.getId(), storagePoolHost);
|
||||
}
|
||||
}
|
||||
|
||||
int waitTimeInSecs = 15; // Wait for 15 secs (usual tests with SDC service start took 10-15 secs)
|
||||
if (hostSdcConnected(sdcId, dataStore, waitTimeInSecs)) {
|
||||
return sdcId;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
} finally {
|
||||
if (storageSystemIdLock != null) {
|
||||
@ -246,7 +247,7 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable {
|
||||
}
|
||||
|
||||
if (StringUtils.isBlank(sdcId)) {
|
||||
logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, try (re)install SDC and restart agent", host);
|
||||
logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, add MDMs if not or try (re)install SDC & restart agent", host);
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -381,6 +382,9 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable {
|
||||
|
||||
private ScaleIOGatewayClient getScaleIOClient(final Long storagePoolId) throws Exception {
|
||||
StoragePoolVO storagePool = storagePoolDao.findById(storagePoolId);
|
||||
if (storagePool == null) {
|
||||
throw new CloudRuntimeException("Unable to find the storage pool with id " + storagePoolId);
|
||||
}
|
||||
return ScaleIOGatewayClientConnectionPool.getInstance().getClient(storagePool, storagePoolDetailsDao);
|
||||
}
|
||||
|
||||
|
||||
@ -102,12 +102,12 @@ public class ScaleIOHostListener implements HypervisorHostListener {
|
||||
if (systemId == null) {
|
||||
throw new CloudRuntimeException("Failed to get the system id for PowerFlex storage pool " + storagePool.getName());
|
||||
}
|
||||
Map<String,String> details = new HashMap<>();
|
||||
Map<String, String> details = new HashMap<>();
|
||||
details.put(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID, systemId);
|
||||
|
||||
ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, storagePool, storagePool.getPath(), details);
|
||||
ModifyStoragePoolAnswer answer = sendModifyStoragePoolCommand(cmd, storagePool, host);
|
||||
Map<String,String> poolDetails = answer.getPoolInfo().getDetails();
|
||||
Map<String, String> poolDetails = answer.getPoolInfo().getDetails();
|
||||
if (MapUtils.isEmpty(poolDetails)) {
|
||||
String msg = String.format("PowerFlex storage SDC details not found on the host: %s, (re)install SDC and restart agent", host);
|
||||
logger.warn(msg);
|
||||
@ -124,7 +124,7 @@ public class ScaleIOHostListener implements HypervisorHostListener {
|
||||
}
|
||||
|
||||
if (StringUtils.isBlank(sdcId)) {
|
||||
String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, (re)install SDC and restart agent", host);
|
||||
String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, add MDMs if not or try (re)install SDC & restart agent", host);
|
||||
logger.warn(msg);
|
||||
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "SDC details not found on host: " + host.getUuid(), msg);
|
||||
return null;
|
||||
|
||||
@ -17,6 +17,8 @@
|
||||
|
||||
package org.apache.cloudstack.storage.datastore.util;
|
||||
|
||||
import com.cloud.agent.properties.AgentProperties;
|
||||
import com.cloud.agent.properties.AgentPropertiesFileHandler;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
|
||||
@ -60,6 +62,14 @@ public class ScaleIOUtil {
|
||||
private static final String SDC_SERVICE_ENABLE_CMD = "systemctl enable scini";
|
||||
|
||||
public static final String CONNECTED_SDC_COUNT_STAT = "ConnectedSDCCount";
|
||||
|
||||
/**
|
||||
* Time (in seconds) to wait after SDC service 'scini' start/restart/stop.<br>
|
||||
* Data type: Integer.<br>
|
||||
* Default value: <code>3</code>
|
||||
*/
|
||||
public static final AgentProperties.Property<Integer> SDC_SERVICE_ACTION_WAIT = new AgentProperties.Property<>("powerflex.sdc.service.wait", 3);
|
||||
|
||||
/**
|
||||
* Cmd for querying volumes in SDC
|
||||
* Sample output for cmd: drv_cfg --query_vols:
|
||||
@ -216,16 +226,41 @@ public class ScaleIOUtil {
|
||||
|
||||
public static boolean startSDCService() {
|
||||
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_START_CMD);
|
||||
return exitValue == 0;
|
||||
if (exitValue != 0) {
|
||||
return false;
|
||||
}
|
||||
waitForSdcServiceActionToComplete();
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean stopSDCService() {
|
||||
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_STOP_CMD);
|
||||
return exitValue == 0;
|
||||
if (exitValue != 0) {
|
||||
return false;
|
||||
}
|
||||
waitForSdcServiceActionToComplete();
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean restartSDCService() {
|
||||
int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_RESTART_CMD);
|
||||
return exitValue == 0;
|
||||
if (exitValue != 0) {
|
||||
return false;
|
||||
}
|
||||
waitForSdcServiceActionToComplete();
|
||||
return true;
|
||||
}
|
||||
|
||||
private static void waitForSdcServiceActionToComplete() {
|
||||
// Wait for the SDC service to settle after start/restart/stop and reaches a stable state
|
||||
int waitTimeInSecs = AgentPropertiesFileHandler.getPropertyValue(SDC_SERVICE_ACTION_WAIT);
|
||||
if (waitTimeInSecs < 0) {
|
||||
waitTimeInSecs = SDC_SERVICE_ACTION_WAIT.getDefaultValue();
|
||||
}
|
||||
try {
|
||||
LOGGER.debug(String.format("Waiting for %d secs after SDC service action, to reach a stable state", waitTimeInSecs));
|
||||
Thread.sleep(waitTimeInSecs * 1000L);
|
||||
} catch (InterruptedException ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user