diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java index be5ab396d19..f180848a8d5 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHABase.java @@ -34,7 +34,8 @@ public class KVMHABase { protected static String s_heartBeatPath; protected long _heartBeatUpdateTimeout = 60000; protected long _heartBeatUpdateFreq = 60000; - protected long _heartBeatUpdateMaxRetry = 3; + protected long _heartBeatUpdateMaxTries = 5; + protected long _heartBeatUpdateRetrySleep = 15000; public static enum PoolType { PrimaryStorage, SecondaryStorage diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java index 0cebb4c9b00..8a11b7fc962 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java @@ -119,7 +119,8 @@ public class KVMHAMonitor extends KVMHABase implements Runnable { } String result = null; - for (int i = 0; i < 5; i++) { + // Try multiple times, but sleep in between tries to ensure it isn't a short lived transient error + for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) { Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger); cmd.add("-i", primaryStoragePool._poolIp); cmd.add("-p", primaryStoragePool._poolMountSourcePath); @@ -127,14 +128,21 @@ public class KVMHAMonitor extends KVMHABase implements Runnable { cmd.add("-h", _hostIP); result = cmd.execute(); if (result != null) { - s_logger.warn("write heartbeat failed: " + result + ", retry: " + i); + s_logger.warn("write heartbeat failed: " + result + ", try: " + i + " of " + _heartBeatUpdateMaxTries); + try { + Thread.sleep(_heartBeatUpdateRetrySleep); + } catch (InterruptedException e) { + s_logger.debug("[ignored] interupted between heartbeat retries."); + } } else { break; } } if (result != null) { - s_logger.warn("write heartbeat failed: " + result + "; reboot the host"); + // Stop cloudstack-agent if can't write to heartbeat file. + // This will raise an alert on the mgmt server + s_logger.warn("write heartbeat failed: " + result + "; stopping cloudstack-agent"); Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger); cmd.add("-i", primaryStoragePool._poolIp); cmd.add("-p", primaryStoragePool._poolMountSourcePath); diff --git a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java index ef21dd7960f..ee14077eeca 100644 --- a/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java +++ b/plugins/hypervisors/vmware/src/main/java/com/cloud/hypervisor/vmware/resource/VmwareResource.java @@ -1668,6 +1668,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa String existingVmName = null; VirtualMachineFileInfo existingVmFileInfo = null; VirtualMachineFileLayoutEx existingVmFileLayout = null; + List existingDatastores = new ArrayList(); Pair names = composeVmNames(vmSpec); String vmInternalCSName = names.first(); @@ -1790,6 +1791,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa existingVmName = existingVmInDc.getName(); existingVmFileInfo = existingVmInDc.getFileInfo(); existingVmFileLayout = existingVmInDc.getFileLayout(); + existingDatastores = existingVmInDc.getAllDatastores(); existingVmInDc.unregisterVm(); } Pair rootDiskDataStoreDetails = null; @@ -2256,7 +2258,18 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa // Since VM was successfully powered-on, if there was an existing VM in a different cluster that was unregistered, delete all the files associated with it. if (existingVmName != null && existingVmFileLayout != null) { - deleteUnregisteredVmFiles(existingVmFileLayout, dcMo, true); + List vmDatastoreNames = new ArrayList(); + for (DatastoreMO vmDatastore : vmMo.getAllDatastores()) { + vmDatastoreNames.add(vmDatastore.getName()); + } + // Don't delete files that are in a datastore that is being used by the new VM as well (zone-wide datastore). + List skipDatastores = new ArrayList(); + for (DatastoreMO existingDatastore : existingDatastores) { + if (vmDatastoreNames.contains(existingDatastore.getName())) { + skipDatastores.add(existingDatastore.getName()); + } + } + deleteUnregisteredVmFiles(existingVmFileLayout, dcMo, true, skipDatastores); } return startAnswer; @@ -2944,7 +2957,14 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa } } - private void deleteUnregisteredVmFiles(VirtualMachineFileLayoutEx vmFileLayout, DatacenterMO dcMo, boolean deleteDisks) throws Exception { + private void checkAndDeleteDatastoreFile(String filePath, List skipDatastores, DatastoreMO dsMo, DatacenterMO dcMo) throws Exception { + if (dsMo != null && dcMo != null && (skipDatastores == null || !skipDatastores.contains(dsMo.getName()))) { + s_logger.debug("Deleting file: " + filePath); + dsMo.deleteFile(filePath, dcMo.getMor(), true); + } + } + + private void deleteUnregisteredVmFiles(VirtualMachineFileLayoutEx vmFileLayout, DatacenterMO dcMo, boolean deleteDisks, List skipDatastores) throws Exception { s_logger.debug("Deleting files associated with an existing VM that was unregistered"); DatastoreFile vmFolder = null; try { @@ -2957,8 +2977,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa else if (file.getType().equals("config")) vmFolder = new DatastoreFile(fileInDatastore.getDatastoreName(), fileInDatastore.getDir()); DatastoreMO dsMo = new DatastoreMO(dcMo.getContext(), dcMo.findDatastore(fileInDatastore.getDatastoreName())); - s_logger.debug("Deleting file: " + file.getName()); - dsMo.deleteFile(file.getName(), dcMo.getMor(), true, VmwareManager.s_vmwareSearchExcludeFolder.value()); + checkAndDeleteDatastoreFile(file.getName(), skipDatastores, dsMo, dcMo); } // Delete files that are present in the VM folder - this will take care of the VM disks as well. DatastoreMO vmFolderDsMo = new DatastoreMO(dcMo.getContext(), dcMo.findDatastore(vmFolder.getDatastoreName())); @@ -2966,14 +2985,12 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa if (deleteDisks) { for (String file : files) { String vmDiskFileFullPath = String.format("%s/%s", vmFolder.getPath(), file); - s_logger.debug("Deleting file: " + vmDiskFileFullPath); - vmFolderDsMo.deleteFile(vmDiskFileFullPath, dcMo.getMor(), true, VmwareManager.s_vmwareSearchExcludeFolder.value()); + checkAndDeleteDatastoreFile(vmDiskFileFullPath, skipDatastores, vmFolderDsMo, dcMo); } } // Delete VM folder if (deleteDisks || files.length == 0) { - s_logger.debug("Deleting folder: " + vmFolder.getPath()); - vmFolderDsMo.deleteFolder(vmFolder.getPath(), dcMo.getMor()); + checkAndDeleteDatastoreFile(vmFolder.getPath(), skipDatastores, vmFolderDsMo, dcMo); } } catch (Exception e) { String message = "Failed to delete files associated with an existing VM that was unregistered due to " + VmwareHelper.getExceptionMessage(e); @@ -4908,7 +4925,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa VirtualMachineFileLayoutEx vmFileLayout = vmMo.getFileLayout(); context.getService().unregisterVM(vmMo.getMor()); if (cmd.getCleanupVmFiles()) { - deleteUnregisteredVmFiles(vmFileLayout, dataCenterMo, false); + deleteUnregisteredVmFiles(vmFileLayout, dataCenterMo, false, null); } return new Answer(cmd, true, "unregister succeeded"); } catch (Exception e) { diff --git a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh index 7c8ee67f30c..30ca72a2aa9 100755 --- a/scripts/vm/hypervisor/kvm/kvmheartbeat.sh +++ b/scripts/vm/hypervisor/kvm/kvmheartbeat.sh @@ -155,10 +155,10 @@ then exit 0 elif [ "$cflag" == "1" ] then - /usr/bin/logger -t heartbeat "kvmheartbeat.sh rebooted system because it was unable to write the heartbeat to the storage." + /usr/bin/logger -t heartbeat "kvmheartbeat.sh stopped cloudstack-agent because it was unable to write the heartbeat to the storage." sync & sleep 5 - echo b > /proc/sysrq-trigger + service cloudstack-agent stop exit $? else write_hbLog diff --git a/vmware-base/src/main/java/com/cloud/hypervisor/vmware/mo/VirtualMachineMO.java b/vmware-base/src/main/java/com/cloud/hypervisor/vmware/mo/VirtualMachineMO.java index 2fb772e0daf..0deb2dc29e6 100644 --- a/vmware-base/src/main/java/com/cloud/hypervisor/vmware/mo/VirtualMachineMO.java +++ b/vmware-base/src/main/java/com/cloud/hypervisor/vmware/mo/VirtualMachineMO.java @@ -34,6 +34,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import org.apache.commons.collections.CollectionUtils; import org.apache.log4j.Logger; import com.google.gson.Gson; @@ -932,6 +933,38 @@ public class VirtualMachineMO extends BaseMO { return networks; } + public List getAllDatastores() throws Exception { + PropertySpec pSpec = new PropertySpec(); + pSpec.setType("Datastore"); + pSpec.getPathSet().add("name"); + + TraversalSpec vmDatastoreTraversal = new TraversalSpec(); + vmDatastoreTraversal.setType("VirtualMachine"); + vmDatastoreTraversal.setPath("datastore"); + vmDatastoreTraversal.setName("vmDatastoreTraversal"); + + ObjectSpec oSpec = new ObjectSpec(); + oSpec.setObj(_mor); + oSpec.setSkip(Boolean.TRUE); + oSpec.getSelectSet().add(vmDatastoreTraversal); + + PropertyFilterSpec pfSpec = new PropertyFilterSpec(); + pfSpec.getPropSet().add(pSpec); + pfSpec.getObjectSet().add(oSpec); + List pfSpecArr = new ArrayList(); + pfSpecArr.add(pfSpec); + + List ocs = _context.getService().retrieveProperties(_context.getPropertyCollector(), pfSpecArr); + + List datastores = new ArrayList(); + if (CollectionUtils.isNotEmpty(ocs)) { + for (ObjectContent oc : ocs) { + datastores.add(new DatastoreMO(_context, oc.getObj())); + } + } + return datastores; + } + /** * Retrieve path info to access VM files via vSphere web interface * @return [0] vm-name, [1] data-center-name, [2] datastore-name