diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index fcd8b5c2995..daad05f3b65 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -133,6 +133,22 @@ hypervisor.type=kvm # Disable memory ballooning on vm guests for overcommit, by default overcommit # feature enables balloon and sets currentMemory to a minimum value. # +# vm.diskactivity.checkenabled=false +# Set to true to check disk activity on VM's disks before starting a VM. This only applies +# to QCOW2 files, and ensures that there is no other running instance accessing +# the file before starting. It works by checking the modify time against the current time, +# so care must be taken to ensure the cluster has time synced, otherwise VMs may fail to start. +# +# vm.diskactivity.checktimeout_s=120 +# Timeout for giving up on waiting for VM's disk files to become inactive. Hitting +# this timeout will result in failure to start VM. +# +# vm.diskactivity.inactivetime_ms=30000 +# This is the length of time that the disk needs to be inactive in order to pass the check. +# This means current time minus mtime of disk file needs to be greater than this number. +# It also has the side effect of setting the minimum threshold between a stop and start of +# a given VM. +# # kvmclock.disable=false # Some newer linux kernels are incapable of reliably migrating vms with kvmclock # This is a workaround for the bug, admin can set this to true per-host diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index ee5f2971756..343e45cc4ed 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -46,6 +46,7 @@ import javax.naming.ConfigurationException; import org.apache.cloudstack.storage.to.PrimaryDataStoreTO; import org.apache.cloudstack.storage.to.VolumeObjectTO; +import org.apache.cloudstack.utils.hypervisor.HypervisorUtils; import org.apache.cloudstack.utils.linux.CPUStat; import org.apache.cloudstack.utils.linux.MemStat; import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat; @@ -235,6 +236,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv protected int _migrateSpeed; protected int _migrateDowntime; protected int _migratePauseAfter; + protected boolean _diskActivityCheckEnabled; + protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB + protected int _diskActivityCheckTimeoutSeconds = 120; // 120s + protected long _diskActivityInactiveThresholdMilliseconds = 30000; // 30s private final Map _pifs = new HashMap(); private final Map _vmStats = new ConcurrentHashMap(); @@ -943,6 +948,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv params.put("libvirtVersion", _hypervisorLibvirtVersion); configureVifDrivers(params); + configureDiskActivityChecks(params); final KVMStorageProcessor storageProcessor = new KVMStorageProcessor(_storagePoolMgr, this); storageProcessor.configure(name, params); @@ -958,6 +964,20 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv return true; } + protected void configureDiskActivityChecks(final Map params) { + _diskActivityCheckEnabled = Boolean.parseBoolean((String)params.get("vm.diskactivity.checkenabled")); + if (_diskActivityCheckEnabled) { + int timeout = NumbersUtil.parseInt((String)params.get("vm.diskactivity.checktimeout_s"), 0); + if (timeout > 0) { + _diskActivityCheckTimeoutSeconds = timeout; + } + long inactiveTime = NumbersUtil.parseLong((String)params.get("vm.diskactivity.inactivetime_ms"), 0L); + if (inactiveTime > 0) { + _diskActivityInactiveThresholdMilliseconds = inactiveTime; + } + } + } + protected void configureVifDrivers(final Map params) throws ConfigurationException { final String LIBVIRT_VIF_DRIVER = "libvirt.vif.driver"; @@ -2023,6 +2043,17 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv volPath = physicalDisk.getPath(); } + // check for disk activity, if detected we should exit because vm is running elsewhere + if (_diskActivityCheckEnabled && physicalDisk != null && physicalDisk.getFormat() == PhysicalDiskFormat.QCOW2) { + s_logger.debug("Checking physical disk file at path " + volPath + " for disk activity to ensure vm is not running elsewhere"); + try { + HypervisorUtils.checkVolumeFileForActivity(volPath, _diskActivityCheckTimeoutSeconds, _diskActivityInactiveThresholdMilliseconds, _diskActivityCheckFileSizeMin); + } catch (IOException ex) { + throw new CloudRuntimeException("Unable to check physical disk file for activity", ex); + } + s_logger.debug("Disk activity check cleared"); + } + // if params contains a rootDiskController key, use its value (this is what other HVs are doing) DiskDef.DiskBus diskBusType = null; final Map params = vmSpec.getDetails(); diff --git a/utils/src/main/java/org/apache/cloudstack/utils/hypervisor/HypervisorUtils.java b/utils/src/main/java/org/apache/cloudstack/utils/hypervisor/HypervisorUtils.java new file mode 100644 index 00000000000..a0a20936bfc --- /dev/null +++ b/utils/src/main/java/org/apache/cloudstack/utils/hypervisor/HypervisorUtils.java @@ -0,0 +1,68 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package org.apache.cloudstack.utils.hypervisor; + +import com.cloud.utils.exception.CloudRuntimeException; +import org.apache.log4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.attribute.BasicFileAttributes; +import java.util.concurrent.TimeUnit; + +public class HypervisorUtils { + public static final Logger s_logger = Logger.getLogger(HypervisorUtils.class); + + public static void checkVolumeFileForActivity(final String filePath, int timeoutSeconds, long inactiveThresholdMilliseconds, long minimumFileSize) throws IOException { + File file = new File(filePath); + if (!file.exists()) { + throw new CloudRuntimeException("File " + file.getAbsolutePath() + " not found"); + } + if (file.length() < minimumFileSize) { + s_logger.debug("VM disk file too small, fresh clone? skipping modify check"); + return; + } + int waitedSeconds = 0; + int intervalSeconds = 1; + while (true) { + BasicFileAttributes attrs = Files.readAttributes(file.toPath(), BasicFileAttributes.class); + long modifyIdle = System.currentTimeMillis() - attrs.lastModifiedTime().toMillis(); + long accessIdle = System.currentTimeMillis() - attrs.lastAccessTime().toMillis(); + if (modifyIdle > inactiveThresholdMilliseconds && accessIdle > inactiveThresholdMilliseconds) { + s_logger.debug("File " + filePath + " has not been accessed or modified for at least " + inactiveThresholdMilliseconds + " ms"); + return; + } else { + s_logger.debug("File was modified " + modifyIdle + "ms ago, accessed " + accessIdle + "ms ago, waiting for inactivity threshold of " + + inactiveThresholdMilliseconds + "ms or timeout of " + timeoutSeconds + "s (waited " + waitedSeconds + "s)"); + } + try { + TimeUnit.SECONDS.sleep(intervalSeconds); + } catch (InterruptedException ex) { + throw new CloudRuntimeException("Interrupted while waiting for activity on " + filePath + " to subside", ex); + } + waitedSeconds += intervalSeconds; + if (waitedSeconds >= timeoutSeconds) { + throw new CloudRuntimeException("Reached timeout while waiting for activity on " + filePath + " to subside"); + } + } + } + +} diff --git a/utils/src/test/java/org/apache/cloudstack/utils/hypervisor/HypervisorUtilsTest.java b/utils/src/test/java/org/apache/cloudstack/utils/hypervisor/HypervisorUtilsTest.java new file mode 100644 index 00000000000..3b13c6b9b7d --- /dev/null +++ b/utils/src/test/java/org/apache/cloudstack/utils/hypervisor/HypervisorUtilsTest.java @@ -0,0 +1,111 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package org.apache.cloudstack.utils.hypervisor; + +import com.cloud.utils.exception.CloudRuntimeException; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.runners.MockitoJUnitRunner; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.Arrays; + +@RunWith(MockitoJUnitRunner.class) +public class HypervisorUtilsTest { + + final long _minFileSize = 10485760L; + + @Test + public void checkVolumeFileForActivitySmallFileTest() throws IOException { + System.out.print("Testing don't block on newly created clones - "); + String filePath = "./testsmallfileinactive"; + int timeoutSeconds = 5; + long thresholdMilliseconds = 2000; + File file = new File(filePath); + setupcheckVolumeFileForActivityFile(file, 0); + + long startTime = System.currentTimeMillis(); + HypervisorUtils.checkVolumeFileForActivity(filePath, timeoutSeconds, thresholdMilliseconds, _minFileSize); + long endTime = System.currentTimeMillis(); + + Assert.assertEquals(startTime, endTime, 1000L); + System.out.println("pass"); + + file.delete(); + } + + @Test + public void checkVolumeFileForActivityTest() throws IOException { + System.out.print("Testing block on modified files - "); + String filePath = "./testfileinactive"; + int timeoutSeconds = 5; + long thresholdMilliseconds = 2000; + File file = new File(filePath); + setupcheckVolumeFileForActivityFile(file, _minFileSize); + + long startTime = System.currentTimeMillis(); + HypervisorUtils.checkVolumeFileForActivity(filePath, timeoutSeconds, thresholdMilliseconds, _minFileSize); + long duration = System.currentTimeMillis() - startTime; + + Assert.assertFalse("Didn't block long enough, expected at least " + thresholdMilliseconds + " and got " + duration, duration < thresholdMilliseconds); + System.out.println("pass"); + + file.delete(); + } + + @Test(expected=CloudRuntimeException.class) + public void checkVolumeFileForActivityTimeoutTest() throws IOException { + System.out.print("Testing timeout of blocking on modified files - "); + String filePath = "./testfileinactive"; + int timeoutSeconds = 3; + long thresholdMilliseconds = 5000; + File file = new File(filePath); + setupcheckVolumeFileForActivityFile(file, _minFileSize); + + try { + HypervisorUtils.checkVolumeFileForActivity(filePath, timeoutSeconds, thresholdMilliseconds, _minFileSize); + } catch (CloudRuntimeException ex) { + System.out.println("pass"); + throw ex; + } finally { + file.delete(); + } + System.out.println("Fail"); + } + + private void setupcheckVolumeFileForActivityFile(File file, long minSize) throws IOException { + if (file.exists()) { + file.delete(); + } + file.createNewFile(); + char[] chars = new char[1048576]; + Arrays.fill(chars, 'X'); + long written = 0; + FileWriter writer = new FileWriter(file); + while (written < minSize) { + writer.write(chars); + written += chars.length; + } + writer.close(); + } +}