Externalize KVM Agent storage's timeout configuration (#5239)

* Externalize KVM Agent storage's timeout configuration

* Address @nvazquez review

* Add empty line at the end of the agent.properties file

Co-authored-by: Daniel Augusto Veronezi Salvador <daniel@scclouds.com.br>
This commit is contained in:
Daniel Augusto Veronezi Salvador 2021-07-28 10:45:27 -03:00 committed by GitHub
parent 1f5ee5b3e3
commit 7b752c3077
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 362 additions and 95 deletions

View File

@ -266,3 +266,7 @@ iscsi.session.cleanup.enabled=false
# Automatically clean up iscsi sessions not attached to any VM. # Automatically clean up iscsi sessions not attached to any VM.
# Should be enabled for users using managed storage for example solidfire. # Should be enabled for users using managed storage for example solidfire.
# Should be disabled for users with unmanaged iscsi connections on their hosts # Should be disabled for users with unmanaged iscsi connections on their hosts
# This parameter specifies the heartbeat update timeout in ms; The default value is 60000ms (1 min).
# Depending on the use case, this timeout might need increasing/decreasing.
# heartbeat.update.timeout=60000

View File

@ -0,0 +1,52 @@
/*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloud.agent.properties;
/**
* Class of constant agent's properties available to configure on
* "agent.properties".
*<br><br>
* Not all available agent properties are defined here, but we should work to
* migrate them on demand to this class.
*
* @param <T> type of the default value.
*/
public class AgentProperties{
/**
* Heartbeat update timeout. <br>
* Data type: int. <br>
* Default value: 60000 (ms).
*/
public static final Property<Integer> HEARTBEAT_UPDATE_TIMEOUT = new Property<Integer>("heartbeat.update.timeout", 60000);
public static class Property <T>{
private final String name;
private final T defaultValue;
private Property(String name, T value) {
this.name = name;
this.defaultValue = value;
}
public String getName() {
return name;
}
public T getDefaultValue() {
return defaultValue;
}
}
}

View File

@ -0,0 +1,70 @@
/*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cloud.agent.properties;
import com.cloud.utils.PropertiesUtil;
import java.io.File;
import java.io.IOException;
import org.apache.cloudstack.utils.security.KeyStoreUtils;
import org.apache.commons.beanutils.ConvertUtils;
import org.apache.commons.beanutils.converters.IntegerConverter;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
/**
* This class provides a facility to read the agent's properties file and get
* its properties, according to the {@link AgentProperties} properties constants.
*
*/
public class AgentPropertiesFileHandler {
private static final Logger logger = Logger.getLogger(AgentPropertiesFileHandler.class);
/**
* This method reads the property in the agent.properties file.
*
* @param property the property to retrieve.
* @return The value of the property. If the property is not available, the
* default defined value will be used.
*/
public static <T> T getPropertyValue(AgentProperties.Property<T> property) {
T defaultValue = property.getDefaultValue();
String name = property.getName();
File agentPropertiesFile = PropertiesUtil.findConfigFile(KeyStoreUtils.AGENT_PROPSFILE);
if (agentPropertiesFile != null) {
try {
String configValue = PropertiesUtil.loadFromFile(agentPropertiesFile).getProperty(name);
if (StringUtils.isNotBlank(configValue)) {
if (defaultValue instanceof Integer) {
ConvertUtils.register(new IntegerConverter(defaultValue), Integer.class);
}
return (T)ConvertUtils.convert(configValue, defaultValue.getClass());
} else {
logger.debug(String.format("Property [%s] has empty or null value. Using default value [%s].", name, defaultValue));
}
} catch (IOException ex) {
logger.debug(String.format("Failed to get property [%s]. Using default value [%s].", name, defaultValue), ex);
}
} else {
logger.debug(String.format("File [%s] was not found, we will use default defined values. Property [%s]: [%s].", KeyStoreUtils.AGENT_PROPSFILE, name, defaultValue));
}
return defaultValue;
}
}

View File

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.cloud.agent.properties;
import com.cloud.utils.PropertiesUtil;
import java.io.File;
import java.io.IOException;
import java.util.Properties;
import junit.framework.TestCase;
import org.apache.commons.beanutils.ConvertUtils;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.powermock.api.mockito.PowerMockito;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.junit4.PowerMockRunner;
@RunWith(PowerMockRunner.class)
@PrepareForTest({PropertiesUtil.class, ConvertUtils.class})
public class AgentPropertiesFileHandlerTest extends TestCase {
@Mock
AgentProperties.Property<String> agentPropertiesStringMock;
@Mock
AgentProperties.Property<Integer> agentPropertiesIntegerMock;
@Mock
File fileMock;
@Mock
Properties propertiesMock;
@Test
public void validateGetPropertyValueFileNotFoundReturnDefaultValue() throws Exception{
String expectedResult = "default value";
Mockito.doReturn(expectedResult).when(agentPropertiesStringMock).getDefaultValue();
PowerMockito.mockStatic(PropertiesUtil.class);
PowerMockito.doReturn(null).when(PropertiesUtil.class, "findConfigFile", Mockito.anyString());
String result = AgentPropertiesFileHandler.getPropertyValue(agentPropertiesStringMock);
Assert.assertEquals(expectedResult, result);
}
@Test
public void validateGetPropertyValueLoadFromFileThrowsIOExceptionReturnDefaultValue() throws Exception{
String expectedResult = "default value";
Mockito.doReturn(expectedResult).when(agentPropertiesStringMock).getDefaultValue();
PowerMockito.mockStatic(PropertiesUtil.class);
PowerMockito.doReturn(fileMock).when(PropertiesUtil.class, "findConfigFile", Mockito.anyString());
PowerMockito.doThrow(new IOException()).when(PropertiesUtil.class, "loadFromFile", Mockito.any());
String result = AgentPropertiesFileHandler.getPropertyValue(agentPropertiesStringMock);
Assert.assertEquals(expectedResult, result);
}
@Test
public void validateGetPropertyValuePropertyIsEmptyReturnDefaultValue() throws Exception{
String expectedResult = "default value";
Mockito.doReturn(expectedResult).when(agentPropertiesStringMock).getDefaultValue();
Mockito.doReturn("name").when(agentPropertiesStringMock).getName();
PowerMockito.mockStatic(PropertiesUtil.class);
PowerMockito.doReturn(fileMock).when(PropertiesUtil.class, "findConfigFile", Mockito.anyString());
PowerMockito.doReturn(propertiesMock).when(PropertiesUtil.class, "loadFromFile", Mockito.any());
PowerMockito.doReturn("").when(propertiesMock).getProperty(Mockito.anyString());
String result = AgentPropertiesFileHandler.getPropertyValue(agentPropertiesStringMock);
Assert.assertEquals(expectedResult, result);
}
@Test
public void validateGetPropertyValuePropertyIsNullReturnDefaultValue() throws Exception{
String expectedResult = "default value";
Mockito.doReturn(expectedResult).when(agentPropertiesStringMock).getDefaultValue();
Mockito.doReturn("name").when(agentPropertiesStringMock).getName();
PowerMockito.mockStatic(PropertiesUtil.class);
PowerMockito.doReturn(fileMock).when(PropertiesUtil.class, "findConfigFile", Mockito.anyString());
PowerMockito.doReturn(propertiesMock).when(PropertiesUtil.class, "loadFromFile", Mockito.any());
PowerMockito.doReturn(null).when(propertiesMock).getProperty(Mockito.anyString());
String result = AgentPropertiesFileHandler.getPropertyValue(agentPropertiesStringMock);
Assert.assertEquals(expectedResult, result);
}
@Test
public void validateGetPropertyValueValidPropertyReturnPropertyValue() throws Exception{
String expectedResult = "test";
Mockito.doReturn("default value").when(agentPropertiesStringMock).getDefaultValue();
Mockito.doReturn("name").when(agentPropertiesStringMock).getName();
PowerMockito.mockStatic(PropertiesUtil.class);
PowerMockito.doReturn(fileMock).when(PropertiesUtil.class, "findConfigFile", Mockito.anyString());
PowerMockito.doReturn(propertiesMock).when(PropertiesUtil.class, "loadFromFile", Mockito.any());
Mockito.doReturn(expectedResult).when(propertiesMock).getProperty(Mockito.anyString());
String result = AgentPropertiesFileHandler.getPropertyValue(agentPropertiesStringMock);
Assert.assertEquals(expectedResult, result);
}
@Test
public void validateGetPropertyValueValidIntegerPropertyReturnPropertyValue() throws Exception{
Integer expectedResult = 2;
Mockito.doReturn(1).when(agentPropertiesIntegerMock).getDefaultValue();
Mockito.doReturn("name").when(agentPropertiesIntegerMock).getName();
PowerMockito.mockStatic(PropertiesUtil.class);
PowerMockito.doReturn(fileMock).when(PropertiesUtil.class, "findConfigFile", Mockito.anyString());
PowerMockito.doReturn(propertiesMock).when(PropertiesUtil.class, "loadFromFile", Mockito.any());
Mockito.doReturn(String.valueOf(expectedResult)).when(propertiesMock).getProperty(Mockito.anyString());
Integer result = AgentPropertiesFileHandler.getPropertyValue(agentPropertiesIntegerMock);
Assert.assertEquals(expectedResult, result);
}
}

View File

@ -16,8 +16,9 @@
// under the License. // under the License.
package com.cloud.hypervisor.kvm.resource; package com.cloud.hypervisor.kvm.resource;
import com.cloud.agent.properties.AgentProperties;
import com.cloud.agent.properties.AgentPropertiesFileHandler;
import com.cloud.utils.script.Script; import com.cloud.utils.script.Script;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.libvirt.Connect; import org.libvirt.Connect;
import org.libvirt.LibvirtException; import org.libvirt.LibvirtException;
@ -32,17 +33,20 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
public class KVMHAMonitor extends KVMHABase implements Runnable { public class KVMHAMonitor extends KVMHABase implements Runnable {
private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class);
private final Map<String, NfsStoragePool> _storagePool = new ConcurrentHashMap<String, NfsStoragePool>();
private final String _hostIP; /* private ip address */ private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class);
private final Map<String, NfsStoragePool> storagePool = new ConcurrentHashMap<>();
private final String hostPrivateIp;
public KVMHAMonitor(NfsStoragePool pool, String host, String scriptPath) { public KVMHAMonitor(NfsStoragePool pool, String host, String scriptPath) {
if (pool != null) { if (pool != null) {
_storagePool.put(pool._poolUUID, pool); storagePool.put(pool._poolUUID, pool);
} }
_hostIP = host; hostPrivateIp = host;
configureHeartBeatPath(scriptPath); configureHeartBeatPath(scriptPath);
_heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT);
} }
private static synchronized void configureHeartBeatPath(String scriptPath) { private static synchronized void configureHeartBeatPath(String scriptPath) {
@ -50,135 +54,129 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
} }
public void addStoragePool(NfsStoragePool pool) { public void addStoragePool(NfsStoragePool pool) {
synchronized (_storagePool) { synchronized (storagePool) {
_storagePool.put(pool._poolUUID, pool); storagePool.put(pool._poolUUID, pool);
} }
} }
public void removeStoragePool(String uuid) { public void removeStoragePool(String uuid) {
synchronized (_storagePool) { synchronized (storagePool) {
NfsStoragePool pool = _storagePool.get(uuid); NfsStoragePool pool = storagePool.get(uuid);
if (pool != null) { if (pool != null) {
Script.runSimpleBashScript("umount " + pool._mountDestPath); Script.runSimpleBashScript("umount " + pool._mountDestPath);
_storagePool.remove(uuid); storagePool.remove(uuid);
} }
} }
} }
public List<NfsStoragePool> getStoragePools() { public List<NfsStoragePool> getStoragePools() {
synchronized (_storagePool) { synchronized (storagePool) {
return new ArrayList<NfsStoragePool>(_storagePool.values()); return new ArrayList<>(storagePool.values());
} }
} }
public NfsStoragePool getStoragePool(String uuid) { public NfsStoragePool getStoragePool(String uuid) {
synchronized (_storagePool) { synchronized (storagePool) {
return _storagePool.get(uuid); return storagePool.get(uuid);
} }
} }
private class Monitor extends ManagedContextRunnable { protected void runHeartBeat() {
synchronized (storagePool) {
@Override Set<String> removedPools = new HashSet<>();
protected void runInContext() { for (String uuid : storagePool.keySet()) {
synchronized (_storagePool) { NfsStoragePool primaryStoragePool = storagePool.get(uuid);
Set<String> removedPools = new HashSet<String>(); StoragePool storage;
for (String uuid : _storagePool.keySet()) { try {
NfsStoragePool primaryStoragePool = _storagePool.get(uuid); Connect conn = LibvirtConnection.getConnection();
storage = conn.storagePoolLookupByUUIDString(uuid);
// check for any that have been deregistered with libvirt and if (storage == null || storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
// skip,remove them
StoragePool storage = null;
try {
Connect conn = LibvirtConnection.getConnection();
storage = conn.storagePoolLookupByUUIDString(uuid);
if (storage == null) { if (storage == null) {
s_logger.debug("Libvirt storage pool " + uuid + " not found, removing from HA list"); s_logger.debug(String.format("Libvirt storage pool [%s] not found, removing from HA list.", uuid));
removedPools.add(uuid);
continue;
} else if (storage.getInfo().state != StoragePoolState.VIR_STORAGE_POOL_RUNNING) {
s_logger.debug("Libvirt storage pool " + uuid + " found, but not running, removing from HA list");
removedPools.add(uuid);
continue;
}
s_logger.debug("Found NFS storage pool " + uuid + " in libvirt, continuing");
} catch (LibvirtException e) {
s_logger.debug("Failed to lookup libvirt storage pool " + uuid + " due to: " + e);
// we only want to remove pool if it's not found, not if libvirt
// connection fails
if (e.toString().contains("pool not found")) {
s_logger.debug("removing pool from HA monitor since it was deleted");
removedPools.add(uuid);
continue;
}
}
String result = null;
// Try multiple times, but sleep in between tries to ensure it isn't a short lived transient error
for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp);
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
cmd.add("-m", primaryStoragePool._mountDestPath);
cmd.add("-h", _hostIP);
result = cmd.execute();
if (result != null) {
s_logger.warn("write heartbeat failed: " + result + ", try: " + i + " of " + _heartBeatUpdateMaxTries);
try {
Thread.sleep(_heartBeatUpdateRetrySleep);
} catch (InterruptedException e) {
s_logger.debug("[ignored] interupted between heartbeat retries.");
}
} else { } else {
break; s_logger.debug(String.format("Libvirt storage pool [%s] found, but not running, removing from HA list.", uuid));
} }
removedPools.add(uuid);
continue;
} }
if (result != null) { s_logger.debug(String.format("Found NFS storage pool [%s] in libvirt, continuing.", uuid));
// Stop cloudstack-agent if can't write to heartbeat file.
// This will raise an alert on the mgmt server } catch (LibvirtException e) {
s_logger.warn("write heartbeat failed: " + result + "; stopping cloudstack-agent"); s_logger.debug(String.format("Failed to lookup libvirt storage pool [%s].", uuid), e);
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp); if (e.toString().contains("pool not found")) {
cmd.add("-p", primaryStoragePool._poolMountSourcePath); s_logger.debug(String.format("Removing pool [%s] from HA monitor since it was deleted.", uuid));
cmd.add("-m", primaryStoragePool._mountDestPath); removedPools.add(uuid);
cmd.add("-c"); continue;
result = cmd.execute();
} }
} }
if (!removedPools.isEmpty()) { String result = null;
for (String uuid : removedPools) { for (int i = 1; i <= _heartBeatUpdateMaxTries; i++) {
removeStoragePool(uuid); Script cmd = createHeartBeatCommand(primaryStoragePool, hostPrivateIp, true);
result = cmd.execute();
s_logger.debug(String.format("The command (%s), to the pool [%s], has the result [%s].", cmd.toString(), uuid, result));
if (result != null) {
s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; try: %s of %s.", uuid, result, i, _heartBeatUpdateMaxTries));
try {
Thread.sleep(_heartBeatUpdateRetrySleep);
} catch (InterruptedException e) {
s_logger.debug("[IGNORED] Interrupted between heartbeat retries.", e);
}
} else {
break;
} }
}
if (result != null) {
s_logger.warn(String.format("Write heartbeat for pool [%s] failed: %s; stopping cloudstack-agent.", uuid, result));
Script cmd = createHeartBeatCommand(primaryStoragePool, null, false);
result = cmd.execute();
} }
} }
if (!removedPools.isEmpty()) {
for (String uuid : removedPools) {
removeStoragePool(uuid);
}
}
} }
}
private Script createHeartBeatCommand(NfsStoragePool primaryStoragePool, String hostPrivateIp, boolean hostValidation) {
Script cmd = new Script(s_heartBeatPath, _heartBeatUpdateTimeout, s_logger);
cmd.add("-i", primaryStoragePool._poolIp);
cmd.add("-p", primaryStoragePool._poolMountSourcePath);
cmd.add("-m", primaryStoragePool._mountDestPath);
if (hostValidation) {
cmd.add("-h", hostPrivateIp);
}
if (!hostValidation) {
cmd.add("-c");
}
return cmd;
} }
@Override @Override
public void run() { public void run() {
// s_logger.addAppender(new org.apache.log4j.ConsoleAppender(new
// org.apache.log4j.PatternLayout(), "System.out"));
while (true) { while (true) {
Thread monitorThread = new Thread(new Monitor());
monitorThread.start(); runHeartBeat();
try {
monitorThread.join();
} catch (InterruptedException e) {
s_logger.debug("[ignored] interupted joining monitor.");
}
try { try {
Thread.sleep(_heartBeatUpdateFreq); Thread.sleep(_heartBeatUpdateFreq);
} catch (InterruptedException e) { } catch (InterruptedException e) {
s_logger.debug("[ignored] interupted between heartbeats."); s_logger.debug("[IGNORED] Interrupted between heartbeats.", e);
} }
} }
} }