From 06c80cdbe9149abdf86a20df27b620b379e5e1b4 Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Mon, 14 Jul 2025 15:36:30 +0530 Subject: [PATCH 1/3] Remove unfinished usage job entries of the host (#10848) --- .../java/com/cloud/usage/dao/UsageJobDao.java | 2 + .../com/cloud/usage/dao/UsageJobDaoImpl.java | 37 ++++++++++++++++++- .../com/cloud/usage/UsageManagerImpl.java | 23 +++++------- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDao.java b/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDao.java index f22a906054d..d4038d4ceeb 100644 --- a/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDao.java +++ b/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDao.java @@ -37,4 +37,6 @@ public interface UsageJobDao extends GenericDao { UsageJobVO isOwner(String hostname, int pid); void updateJobSuccess(Long jobId, long startMillis, long endMillis, long execTime, boolean success); + + void removeLastOpenJobsOwned(String hostname, int pid); } diff --git a/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDaoImpl.java b/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDaoImpl.java index 065dc309ebe..4c58062413d 100644 --- a/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/usage/dao/UsageJobDaoImpl.java @@ -22,6 +22,7 @@ import java.util.Date; import java.util.List; +import org.apache.commons.collections.CollectionUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; @@ -116,7 +117,7 @@ public class UsageJobDaoImpl extends GenericDaoBase implements public UsageJobVO isOwner(String hostname, int pid) { TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.USAGE_DB); try { - if ((hostname == null) || (pid <= 0)) { + if (hostname == null || pid <= 0) { return null; } @@ -176,7 +177,7 @@ public class UsageJobDaoImpl extends GenericDaoBase implements SearchCriteria sc = createSearchCriteria(); sc.addAnd("endMillis", SearchCriteria.Op.EQ, Long.valueOf(0)); sc.addAnd("jobType", SearchCriteria.Op.EQ, Integer.valueOf(UsageJobVO.JOB_TYPE_SINGLE)); - sc.addAnd("scheduled", SearchCriteria.Op.EQ, Integer.valueOf(0)); + sc.addAnd("scheduled", SearchCriteria.Op.EQ, Integer.valueOf(UsageJobVO.JOB_NOT_SCHEDULED)); List jobs = search(sc, filter); if ((jobs == null) || jobs.isEmpty()) { @@ -196,4 +197,36 @@ public class UsageJobDaoImpl extends GenericDaoBase implements } return jobs.get(0).getHeartbeat(); } + + private List getLastOpenJobsOwned(String hostname, int pid) { + SearchCriteria sc = createSearchCriteria(); + sc.addAnd("endMillis", SearchCriteria.Op.EQ, Long.valueOf(0)); + sc.addAnd("host", SearchCriteria.Op.EQ, hostname); + if (pid > 0) { + sc.addAnd("pid", SearchCriteria.Op.EQ, Integer.valueOf(pid)); + } + return listBy(sc); + } + + @Override + public void removeLastOpenJobsOwned(String hostname, int pid) { + if (hostname == null) { + return; + } + + TransactionLegacy txn = TransactionLegacy.open(TransactionLegacy.USAGE_DB); + try { + List jobs = getLastOpenJobsOwned(hostname, pid); + if (CollectionUtils.isNotEmpty(jobs)) { + s_logger.info(String.format("Found %s opens job, to remove", jobs.size())); + for (UsageJobVO job : jobs) { + s_logger.debug(String.format("Removing job - id: %d, pid: %d, job type: %d, scheduled: %d, heartbeat: %s", + job.getId(), job.getPid(), job.getJobType(), job.getScheduled(), job.getHeartbeat())); + remove(job.getId()); + } + } + } finally { + txn.close(); + } + } } diff --git a/usage/src/main/java/com/cloud/usage/UsageManagerImpl.java b/usage/src/main/java/com/cloud/usage/UsageManagerImpl.java index cc129a9ec5e..95475452164 100644 --- a/usage/src/main/java/com/cloud/usage/UsageManagerImpl.java +++ b/usage/src/main/java/com/cloud/usage/UsageManagerImpl.java @@ -319,6 +319,9 @@ public class UsageManagerImpl extends ManagerBase implements UsageManager, Runna s_logger.info("Starting Usage Manager"); } + _usageJobDao.removeLastOpenJobsOwned(_hostname, 0); + Runtime.getRuntime().addShutdownHook(new AbandonJob()); + // use the configured exec time and aggregation duration for scheduling the job _scheduledFuture = _executor.scheduleAtFixedRate(this, _jobExecTime.getTimeInMillis() - System.currentTimeMillis(), _aggregationDuration * 60 * 1000, TimeUnit.MILLISECONDS); @@ -331,7 +334,6 @@ public class UsageManagerImpl extends ManagerBase implements UsageManager, Runna _sanity = _sanityExecutor.scheduleAtFixedRate(new SanityCheck(), 1, _sanityCheckInterval, TimeUnit.DAYS); } - Runtime.getRuntime().addShutdownHook(new AbandonJob()); TransactionLegacy usageTxn = TransactionLegacy.open(TransactionLegacy.USAGE_DB); try { if (_heartbeatLock.lock(3)) { // 3 second timeout @@ -2255,17 +2257,17 @@ public class UsageManagerImpl extends ManagerBase implements UsageManager, Runna // the aggregation range away from executing the next job long now = System.currentTimeMillis(); long timeToJob = _jobExecTime.getTimeInMillis() - now; - long timeSinceJob = 0; + long timeSinceLastSuccessJob = 0; long aggregationDurationMillis = _aggregationDuration * 60L * 1000L; long lastSuccess = _usageJobDao.getLastJobSuccessDateMillis(); if (lastSuccess > 0) { - timeSinceJob = now - lastSuccess; + timeSinceLastSuccessJob = now - lastSuccess; } - if ((timeSinceJob > 0) && (timeSinceJob > (aggregationDurationMillis - 100))) { + if ((timeSinceLastSuccessJob > 0) && (timeSinceLastSuccessJob > (aggregationDurationMillis - 100))) { if (timeToJob > (aggregationDurationMillis / 2)) { if (s_logger.isDebugEnabled()) { - s_logger.debug("it's been " + timeSinceJob + " ms since last usage job and " + timeToJob + + s_logger.debug("it's been " + timeSinceLastSuccessJob + " ms since last usage job and " + timeToJob + " ms until next job, scheduling an immediate job to catch up (aggregation duration is " + _aggregationDuration + " minutes)"); } scheduleParse(); @@ -2352,17 +2354,12 @@ public class UsageManagerImpl extends ManagerBase implements UsageManager, Runna } } } + private class AbandonJob extends Thread { @Override public void run() { - s_logger.info("exitting Usage Manager"); - deleteOpenjob(); - } - private void deleteOpenjob() { - UsageJobVO job = _usageJobDao.isOwner(_hostname, _pid); - if (job != null) { - _usageJobDao.remove(job.getId()); - } + s_logger.info("exiting Usage Manager"); + _usageJobDao.removeLastOpenJobsOwned(_hostname, _pid); } } } From 9688cbb0953494346a519a661c7bb4374688cb7d Mon Sep 17 00:00:00 2001 From: Wei Zhou Date: Tue, 15 Jul 2025 20:09:41 +0800 Subject: [PATCH 2/3] systemvm: build 4.20.2 template with 'depmod -a' (#11128) --- pom.xml | 2 +- tools/appliance/systemvmtemplate/scripts/finalize.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index af0ac94a1f2..17385ab5a8c 100644 --- a/pom.xml +++ b/pom.xml @@ -50,7 +50,7 @@ UTF-8 UTF-8 https://download.cloudstack.org/systemvm - 4.20.1.0 + 4.20.2.0 apache https://sonarcloud.io diff --git a/tools/appliance/systemvmtemplate/scripts/finalize.sh b/tools/appliance/systemvmtemplate/scripts/finalize.sh index e5d15ecb61c..507d4a4133a 100644 --- a/tools/appliance/systemvmtemplate/scripts/finalize.sh +++ b/tools/appliance/systemvmtemplate/scripts/finalize.sh @@ -68,6 +68,7 @@ function zero_disk() { } function finalize() { + depmod -a configure_misc configure_rundisk_size configure_sudoers From c94f75c7ea7dc01b636aec9dfc8c6632ca1a5419 Mon Sep 17 00:00:00 2001 From: Suresh Kumar Anaparti Date: Wed, 16 Jul 2025 12:32:09 +0530 Subject: [PATCH 3/3] PowerFlex/ScaleIO - Wait after SDC service start/restart/stop, and retry to fetch SDC id/guid (#11099) * [PowerFlex/ScaleIO] Added wait time after SDC service start/restart/stop, and retries to fetch SDC id/guid * Added agent property 'powerflex.sdc.service.wait' for the time (in secs) to wait after SDC service start/restart/stop * code improvements --- .../agent/properties/AgentProperties.java | 2 +- .../kvm/storage/ScaleIOStorageAdaptor.java | 54 ++++++++++++++----- .../storage/ScaleIOStorageAdaptorTest.java | 6 +-- .../manager/ScaleIOSDCManagerImpl.java | 14 +++-- .../provider/ScaleIOHostListener.java | 6 +-- .../storage/datastore/util/ScaleIOUtil.java | 41 ++++++++++++-- 6 files changed, 96 insertions(+), 27 deletions(-) diff --git a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java index e5593f10460..c781c07c227 100644 --- a/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java +++ b/agent/src/main/java/com/cloud/agent/properties/AgentProperties.java @@ -823,7 +823,7 @@ public class AgentProperties{ private T defaultValue; private Class typeClass; - Property(String name, T value) { + public Property(String name, T value) { init(name, value); } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptor.java index 335ea0d03d2..195ce6c9984 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptor.java @@ -37,6 +37,7 @@ import org.apache.cloudstack.utils.qemu.QemuImg; import org.apache.cloudstack.utils.qemu.QemuImgException; import org.apache.cloudstack.utils.qemu.QemuImgFile; import org.apache.cloudstack.utils.qemu.QemuObject; +import org.apache.commons.collections.MapUtils; import org.apache.commons.io.filefilter.WildcardFileFilter; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; @@ -581,14 +582,23 @@ public class ScaleIOStorageAdaptor implements StorageAdaptor { } if (!ScaleIOUtil.isSDCServiceActive()) { + logger.debug("SDC service is not active on host, starting it"); if (!ScaleIOUtil.startSDCService()) { return new Ternary<>(false, null, "Couldn't start SDC service on host"); } - } else if (!ScaleIOUtil.restartSDCService()) { - return new Ternary<>(false, null, "Couldn't restart SDC service on host"); + } else { + logger.debug("SDC service is active on host, re-starting it"); + if (!ScaleIOUtil.restartSDCService()) { + return new Ternary<>(false, null, "Couldn't restart SDC service on host"); + } } - return new Ternary<>( true, getSDCDetails(details), "Prepared client successfully"); + Map sdcDetails = getSDCDetails(details); + if (MapUtils.isEmpty(sdcDetails)) { + return new Ternary<>(false, null, "Couldn't get the SDC details on the host"); + } + + return new Ternary<>( true, sdcDetails, "Prepared client successfully"); } public Pair unprepareStorageClient(Storage.StoragePoolType type, String uuid) { @@ -611,20 +621,40 @@ public class ScaleIOStorageAdaptor implements StorageAdaptor { private Map getSDCDetails(Map details) { Map sdcDetails = new HashMap(); - if (details == null || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) { + if (MapUtils.isEmpty(details) || !details.containsKey(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID)) { return sdcDetails; } String storageSystemId = details.get(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID); - String sdcId = ScaleIOUtil.getSdcId(storageSystemId); - if (sdcId != null) { - sdcDetails.put(ScaleIOGatewayClient.SDC_ID, sdcId); - } else { - String sdcGuId = ScaleIOUtil.getSdcGuid(); - if (sdcGuId != null) { - sdcDetails.put(ScaleIOGatewayClient.SDC_GUID, sdcGuId); - } + if (StringUtils.isEmpty(storageSystemId)) { + return sdcDetails; } + + int numberOfTries = 5; + int timeBetweenTries = 1000; // Try more frequently (every sec) and return early when SDC Id or Guid found + int attempt = 1; + do { + logger.debug("Get SDC details, attempt #{}", attempt); + String sdcId = ScaleIOUtil.getSdcId(storageSystemId); + if (sdcId != null) { + sdcDetails.put(ScaleIOGatewayClient.SDC_ID, sdcId); + return sdcDetails; + } else { + String sdcGuId = ScaleIOUtil.getSdcGuid(); + if (sdcGuId != null) { + sdcDetails.put(ScaleIOGatewayClient.SDC_GUID, sdcGuId); + return sdcDetails; + } + } + + try { + Thread.sleep(timeBetweenTries); + } catch (Exception ignore) { + } + numberOfTries--; + attempt++; + } while (numberOfTries > 0); + return sdcDetails; } diff --git a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptorTest.java b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptorTest.java index 07aea0cfbee..c2002f56560 100644 --- a/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptorTest.java +++ b/plugins/hypervisors/kvm/src/test/java/com/cloud/hypervisor/kvm/storage/ScaleIOStorageAdaptorTest.java @@ -116,9 +116,9 @@ public class ScaleIOStorageAdaptorTest { Ternary, String> result = scaleIOStorageAdaptor.prepareStorageClient(Storage.StoragePoolType.PowerFlex, poolUuid, new HashMap<>()); - Assert.assertTrue(result.first()); - Assert.assertNotNull(result.second()); - Assert.assertTrue(result.second().isEmpty()); + Assert.assertFalse(result.first()); + Assert.assertNull(result.second()); + Assert.assertEquals("Couldn't get the SDC details on the host", result.third()); } @Test diff --git a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/manager/ScaleIOSDCManagerImpl.java b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/manager/ScaleIOSDCManagerImpl.java index f1177acc7b4..5f098badaa1 100644 --- a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/manager/ScaleIOSDCManagerImpl.java +++ b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/manager/ScaleIOSDCManagerImpl.java @@ -183,12 +183,13 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable { storagePoolHost.setLocalPath(sdcId); storagePoolHostDao.update(storagePoolHost.getId(), storagePoolHost); } + + int waitTimeInSecs = 15; // Wait for 15 secs (usual tests with SDC service start took 10-15 secs) + if (hostSdcConnected(sdcId, dataStore, waitTimeInSecs)) { + return sdcId; + } } - int waitTimeInSecs = 15; // Wait for 15 secs (usual tests with SDC service start took 10-15 secs) - if (hostSdcConnected(sdcId, dataStore, waitTimeInSecs)) { - return sdcId; - } return null; } finally { if (storageSystemIdLock != null) { @@ -246,7 +247,7 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable { } if (StringUtils.isBlank(sdcId)) { - logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, try (re)install SDC and restart agent", host); + logger.warn("Couldn't retrieve PowerFlex storage SDC details from the host: {}, add MDMs if not or try (re)install SDC & restart agent", host); return null; } @@ -381,6 +382,9 @@ public class ScaleIOSDCManagerImpl implements ScaleIOSDCManager, Configurable { private ScaleIOGatewayClient getScaleIOClient(final Long storagePoolId) throws Exception { StoragePoolVO storagePool = storagePoolDao.findById(storagePoolId); + if (storagePool == null) { + throw new CloudRuntimeException("Unable to find the storage pool with id " + storagePoolId); + } return ScaleIOGatewayClientConnectionPool.getInstance().getClient(storagePool, storagePoolDetailsDao); } diff --git a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/provider/ScaleIOHostListener.java b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/provider/ScaleIOHostListener.java index 5fc4868902e..4f4400ffacc 100644 --- a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/provider/ScaleIOHostListener.java +++ b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/provider/ScaleIOHostListener.java @@ -102,12 +102,12 @@ public class ScaleIOHostListener implements HypervisorHostListener { if (systemId == null) { throw new CloudRuntimeException("Failed to get the system id for PowerFlex storage pool " + storagePool.getName()); } - Map details = new HashMap<>(); + Map details = new HashMap<>(); details.put(ScaleIOGatewayClient.STORAGE_POOL_SYSTEM_ID, systemId); ModifyStoragePoolCommand cmd = new ModifyStoragePoolCommand(true, storagePool, storagePool.getPath(), details); ModifyStoragePoolAnswer answer = sendModifyStoragePoolCommand(cmd, storagePool, host); - Map poolDetails = answer.getPoolInfo().getDetails(); + Map poolDetails = answer.getPoolInfo().getDetails(); if (MapUtils.isEmpty(poolDetails)) { String msg = String.format("PowerFlex storage SDC details not found on the host: %s, (re)install SDC and restart agent", host); logger.warn(msg); @@ -124,7 +124,7 @@ public class ScaleIOHostListener implements HypervisorHostListener { } if (StringUtils.isBlank(sdcId)) { - String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, (re)install SDC and restart agent", host); + String msg = String.format("Couldn't retrieve PowerFlex storage SDC details from the host: %s, add MDMs if not or try (re)install SDC & restart agent", host); logger.warn(msg); _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "SDC details not found on host: " + host.getUuid(), msg); return null; diff --git a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/util/ScaleIOUtil.java b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/util/ScaleIOUtil.java index 4bb8df9b60d..d91321a907f 100644 --- a/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/util/ScaleIOUtil.java +++ b/plugins/storage/volume/scaleio/src/main/java/org/apache/cloudstack/storage/datastore/util/ScaleIOUtil.java @@ -17,6 +17,8 @@ package org.apache.cloudstack.storage.datastore.util; +import com.cloud.agent.properties.AgentProperties; +import com.cloud.agent.properties.AgentPropertiesFileHandler; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; @@ -60,6 +62,14 @@ public class ScaleIOUtil { private static final String SDC_SERVICE_ENABLE_CMD = "systemctl enable scini"; public static final String CONNECTED_SDC_COUNT_STAT = "ConnectedSDCCount"; + + /** + * Time (in seconds) to wait after SDC service 'scini' start/restart/stop.
+ * Data type: Integer.
+ * Default value: 3 + */ + public static final AgentProperties.Property SDC_SERVICE_ACTION_WAIT = new AgentProperties.Property<>("powerflex.sdc.service.wait", 3); + /** * Cmd for querying volumes in SDC * Sample output for cmd: drv_cfg --query_vols: @@ -216,16 +226,41 @@ public class ScaleIOUtil { public static boolean startSDCService() { int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_START_CMD); - return exitValue == 0; + if (exitValue != 0) { + return false; + } + waitForSdcServiceActionToComplete(); + return true; } public static boolean stopSDCService() { int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_STOP_CMD); - return exitValue == 0; + if (exitValue != 0) { + return false; + } + waitForSdcServiceActionToComplete(); + return true; } public static boolean restartSDCService() { int exitValue = Script.runSimpleBashScriptForExitValue(SDC_SERVICE_RESTART_CMD); - return exitValue == 0; + if (exitValue != 0) { + return false; + } + waitForSdcServiceActionToComplete(); + return true; + } + + private static void waitForSdcServiceActionToComplete() { + // Wait for the SDC service to settle after start/restart/stop and reaches a stable state + int waitTimeInSecs = AgentPropertiesFileHandler.getPropertyValue(SDC_SERVICE_ACTION_WAIT); + if (waitTimeInSecs < 0) { + waitTimeInSecs = SDC_SERVICE_ACTION_WAIT.getDefaultValue(); + } + try { + LOGGER.debug(String.format("Waiting for %d secs after SDC service action, to reach a stable state", waitTimeInSecs)); + Thread.sleep(waitTimeInSecs * 1000L); + } catch (InterruptedException ignore) { + } } }