Merge release branch 4.11 to 4.12

* 4.11:
  KVM: Fix agents dont reconnect post maintenance (#3239)
This commit is contained in:
Daan Hoogland 2019-05-23 14:29:41 +02:00
commit 29918e25e3
7 changed files with 532 additions and 91 deletions

View File

@ -606,9 +606,7 @@ public class Agent implements HandlerFactory, IAgentControl {
System.exit(1); System.exit(1);
return; return;
} else if (cmd instanceof MaintainCommand) { } else if (cmd instanceof MaintainCommand) {
s_logger.debug("Received maintainCommand"); s_logger.debug("Received maintainCommand, do not cancel current tasks");
cancelTasks();
_reconnectAllowed = false;
answer = new MaintainAnswer((MaintainCommand)cmd); answer = new MaintainAnswer((MaintainCommand)cmd);
} else if (cmd instanceof AgentControlCommand) { } else if (cmd instanceof AgentControlCommand) {
answer = null; answer = null;

View File

@ -52,6 +52,11 @@ public interface ResourceManager extends ResourceService, Configurable {
"Number of retries when preparing a host into Maintenance Mode is faulty before failing", "Number of retries when preparing a host into Maintenance Mode is faulty before failing",
true, ConfigKey.Scope.Cluster); true, ConfigKey.Scope.Cluster);
ConfigKey<Boolean> KvmSshToAgentEnabled = new ConfigKey<>("Advanced", Boolean.class,
"kvm.ssh.to.agent","true",
"Number of retries when preparing a host into Maintenance Mode is faulty before failing",
false);
/** /**
* Register a listener for different types of resource life cycle events. * Register a listener for different types of resource life cycle events.
* There can only be one type of listener per type of host. * There can only be one type of listener per type of host.

View File

@ -32,6 +32,7 @@ import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import com.cloud.agent.api.ModifySshKeysCommand; import com.cloud.agent.api.ModifySshKeysCommand;
import com.cloud.agent.api.ModifyStoragePoolCommand;
import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
@ -115,7 +116,7 @@ public abstract class AgentAttache {
StopCommand.class.toString(), CheckVirtualMachineCommand.class.toString(), PingTestCommand.class.toString(), CheckHealthCommand.class.toString(), StopCommand.class.toString(), CheckVirtualMachineCommand.class.toString(), PingTestCommand.class.toString(), CheckHealthCommand.class.toString(),
ReadyCommand.class.toString(), ShutdownCommand.class.toString(), SetupCommand.class.toString(), ReadyCommand.class.toString(), ShutdownCommand.class.toString(), SetupCommand.class.toString(),
CleanupNetworkRulesCmd.class.toString(), CheckNetworkCommand.class.toString(), PvlanSetupCommand.class.toString(), CheckOnHostCommand.class.toString(), CleanupNetworkRulesCmd.class.toString(), CheckNetworkCommand.class.toString(), PvlanSetupCommand.class.toString(), CheckOnHostCommand.class.toString(),
ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString()}; ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString()};
protected final static String[] s_commandsNotAllowedInConnectingMode = new String[] { StartCommand.class.toString(), CreateCommand.class.toString() }; protected final static String[] s_commandsNotAllowedInConnectingMode = new String[] { StartCommand.class.toString(), CreateCommand.class.toString() };
static { static {
Arrays.sort(s_commandsAllowedInMaintenanceMode); Arrays.sort(s_commandsAllowedInMaintenanceMode);

View File

@ -1202,14 +1202,6 @@ public enum Config {
KvmPublicNetwork("Hidden", ManagementServer.class, String.class, "kvm.public.network.device", null, "Specify the public bridge on host for public network", null), KvmPublicNetwork("Hidden", ManagementServer.class, String.class, "kvm.public.network.device", null, "Specify the public bridge on host for public network", null),
KvmPrivateNetwork("Hidden", ManagementServer.class, String.class, "kvm.private.network.device", null, "Specify the private bridge on host for private network", null), KvmPrivateNetwork("Hidden", ManagementServer.class, String.class, "kvm.private.network.device", null, "Specify the private bridge on host for private network", null),
KvmGuestNetwork("Hidden", ManagementServer.class, String.class, "kvm.guest.network.device", null, "Specify the private bridge on host for private network", null), KvmGuestNetwork("Hidden", ManagementServer.class, String.class, "kvm.guest.network.device", null, "Specify the private bridge on host for private network", null),
KvmSshToAgentEnabled(
"Advanced",
ManagementServer.class,
Boolean.class,
"kvm.ssh.to.agent",
"true",
"Specify whether or not the management server is allowed to SSH into KVM Agents",
null),
// Hyperv // Hyperv
HypervPublicNetwork( HypervPublicNetwork(

View File

@ -31,6 +31,7 @@ import java.util.concurrent.ConcurrentHashMap;
import javax.inject.Inject; import javax.inject.Inject;
import javax.naming.ConfigurationException; import javax.naming.ConfigurationException;
import com.cloud.utils.Pair;
import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.UserVmDetailsDao;
import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.commons.lang.ObjectUtils; import org.apache.commons.lang.ObjectUtils;
@ -2343,46 +2344,77 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
} }
} }
handleAgentIfNotConnected(host, vms_migrating);
try { try {
resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId); resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId);
_agentMgr.pullAgentOutMaintenance(hostId); _agentMgr.pullAgentOutMaintenance(hostId);
retryHostMaintenance.remove(hostId); retryHostMaintenance.remove(hostId);
// for kvm, need to log into kvm host, restart cloudstack-agent
if ((host.getHypervisorType() == HypervisorType.KVM && !vms_migrating) || host.getHypervisorType() == HypervisorType.LXC) {
final boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(Config.KvmSshToAgentEnabled.key()));
if (!sshToAgent) {
s_logger.info("Configuration tells us not to SSH into Agents. Please restart the Agent (" + hostId + ") manually");
return true;
}
_hostDao.loadDetails(host);
final String password = host.getDetail("password");
final String username = host.getDetail("username");
if (password == null || username == null) {
s_logger.debug("Can't find password/username");
return false;
}
final com.trilead.ssh2.Connection connection = SSHCmdHelper.acquireAuthorizedConnection(host.getPrivateIpAddress(), 22, username, password);
if (connection == null) {
s_logger.debug("Failed to connect to host: " + host.getPrivateIpAddress());
return false;
}
try {
SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(connection, "service cloudstack-agent restart");
s_logger.debug("cloudstack-agent restart result: " + result.toString());
} catch (final SshException e) {
return false;
}
}
return true;
} catch (final NoTransitionException e) { } catch (final NoTransitionException e) {
s_logger.debug("Cannot transmit host " + host.getId() + "to Enabled state", e); s_logger.debug("Cannot transmit host " + host.getId() + "to Enabled state", e);
return false; return false;
} }
return true;
}
/**
* Handle agent (if available) if its not connected before cancelling maintenance.
* Agent must be connected before cancelling maintenance.
* If the host status is not Up:
* - If kvm.ssh.to.agent is true, then SSH into the host and restart the agent.
* - If kvm.shh.to.agent is false, then fail cancelling maintenance
*/
protected void handleAgentIfNotConnected(HostVO host, boolean vmsMigrating) {
final boolean isAgentOnHost = host.getHypervisorType() == HypervisorType.KVM ||
host.getHypervisorType() == HypervisorType.LXC;
if (!isAgentOnHost || vmsMigrating || host.getStatus() == Status.Up) {
return;
}
final boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(KvmSshToAgentEnabled.key()));
if (sshToAgent) {
Pair<String, String> credentials = getHostCredentials(host);
connectAndRestartAgentOnHost(host, credentials.first(), credentials.second());
} else {
throw new CloudRuntimeException("SSH access is disabled, cannot cancel maintenance mode as " +
"host agent is not connected");
}
}
/**
* Get host credentials
* @throws CloudRuntimeException if username or password are not found
*/
protected Pair<String, String> getHostCredentials(HostVO host) {
_hostDao.loadDetails(host);
final String password = host.getDetail("password");
final String username = host.getDetail("username");
if (password == null || username == null) {
throw new CloudRuntimeException("SSH to agent is enabled, but username/password credentials are not found");
}
return new Pair<>(username, password);
}
/**
* True if agent is restarted via SSH. Assumes kvm.ssh.to.agent = true and host status is not Up
*/
protected void connectAndRestartAgentOnHost(HostVO host, String username, String password) {
final com.trilead.ssh2.Connection connection = SSHCmdHelper.acquireAuthorizedConnection(
host.getPrivateIpAddress(), 22, username, password);
if (connection == null) {
throw new CloudRuntimeException("SSH to agent is enabled, but failed to connect to host: " + host.getPrivateIpAddress());
}
try {
SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(
connection, "service cloudstack-agent restart");
if (result.getReturnCode() != 0) {
throw new CloudRuntimeException("Could not restart agent on host " + host.getId() + " due to: " + result.getStdErr());
}
s_logger.debug("cloudstack-agent restart result: " + result.toString());
} catch (final SshException e) {
throw new CloudRuntimeException("SSH to agent is enabled, but agent restart failed", e);
}
} }
private boolean cancelMaintenance(final long hostId) { private boolean cancelMaintenance(final long hostId) {

View File

@ -25,13 +25,20 @@ import com.cloud.event.ActionEventUtils;
import com.cloud.ha.HighAvailabilityManager; import com.cloud.ha.HighAvailabilityManager;
import com.cloud.host.Host; import com.cloud.host.Host;
import com.cloud.host.HostVO; import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao; import com.cloud.host.dao.HostDao;
import com.cloud.hypervisor.Hypervisor; import com.cloud.hypervisor.Hypervisor;
import com.cloud.storage.StorageManager; import com.cloud.storage.StorageManager;
import com.cloud.utils.Pair;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.fsm.NoTransitionException; import com.cloud.utils.fsm.NoTransitionException;
import com.cloud.utils.ssh.SSHCmdHelper;
import com.cloud.utils.ssh.SshException;
import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.UserVmDetailsDao;
import com.cloud.vm.dao.VMInstanceDao; import com.cloud.vm.dao.VMInstanceDao;
import com.trilead.ssh2.Connection;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -56,12 +63,13 @@ import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyLong; import static org.mockito.Matchers.anyLong;
import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq; import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times; import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
@RunWith(PowerMockRunner.class) @RunWith(PowerMockRunner.class)
@PrepareForTest({ActionEventUtils.class, ResourceManagerImpl.class}) @PrepareForTest({ActionEventUtils.class, ResourceManagerImpl.class, SSHCmdHelper.class})
public class ResourceManagerImplTest { public class ResourceManagerImplTest {
@Mock @Mock
@ -78,6 +86,8 @@ public class ResourceManagerImplTest {
private HostDao hostDao; private HostDao hostDao;
@Mock @Mock
private VMInstanceDao vmInstanceDao; private VMInstanceDao vmInstanceDao;
@Mock
private ConfigurationDao configurationDao;
@Spy @Spy
@InjectMocks @InjectMocks
@ -99,7 +109,13 @@ public class ResourceManagerImplTest {
@Mock @Mock
private GetVncPortCommand getVncPortCommandVm2; private GetVncPortCommand getVncPortCommandVm2;
@Mock
private Connection sshConnection;
private static long hostId = 1L; private static long hostId = 1L;
private static final String hostUsername = "user";
private static final String hostPassword = "password";
private static final String hostPrivateIp = "192.168.1.10";
private static long vm1Id = 1L; private static long vm1Id = 1L;
private static String vm1InstanceName = "i-1-VM"; private static String vm1InstanceName = "i-1-VM";
@ -117,9 +133,13 @@ public class ResourceManagerImplTest {
when(host.getType()).thenReturn(Host.Type.Routing); when(host.getType()).thenReturn(Host.Type.Routing);
when(host.getId()).thenReturn(hostId); when(host.getId()).thenReturn(hostId);
when(host.getResourceState()).thenReturn(ResourceState.Enabled); when(host.getResourceState()).thenReturn(ResourceState.Enabled);
when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.VMware); when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.KVM);
when(host.getClusterId()).thenReturn(1L); when(host.getClusterId()).thenReturn(1L);
when(hostDao.findById(hostId)).thenReturn(host); when(hostDao.findById(hostId)).thenReturn(host);
when(host.getDetail("username")).thenReturn(hostUsername);
when(host.getDetail("password")).thenReturn(hostPassword);
when(host.getStatus()).thenReturn(Status.Up);
when(host.getPrivateIpAddress()).thenReturn(hostPrivateIp);
when(vm1.getId()).thenReturn(vm1Id); when(vm1.getId()).thenReturn(vm1Id);
when(vm2.getId()).thenReturn(vm2Id); when(vm2.getId()).thenReturn(vm2Id);
when(vm1.getInstanceName()).thenReturn(vm1InstanceName); when(vm1.getInstanceName()).thenReturn(vm1InstanceName);
@ -138,6 +158,15 @@ public class ResourceManagerImplTest {
PowerMockito.whenNew(GetVncPortCommand.class).withArguments(vm2Id, vm2InstanceName).thenReturn(getVncPortCommandVm2); PowerMockito.whenNew(GetVncPortCommand.class).withArguments(vm2Id, vm2InstanceName).thenReturn(getVncPortCommandVm2);
when(agentManager.easySend(eq(hostId), eq(getVncPortCommandVm1))).thenReturn(getVncPortAnswerVm1); when(agentManager.easySend(eq(hostId), eq(getVncPortCommandVm1))).thenReturn(getVncPortAnswerVm1);
when(agentManager.easySend(eq(hostId), eq(getVncPortCommandVm2))).thenReturn(getVncPortAnswerVm2); when(agentManager.easySend(eq(hostId), eq(getVncPortCommandVm2))).thenReturn(getVncPortAnswerVm2);
PowerMockito.mockStatic(SSHCmdHelper.class);
BDDMockito.given(SSHCmdHelper.acquireAuthorizedConnection(eq(hostPrivateIp), eq(22),
eq(hostUsername), eq(hostPassword))).willReturn(sshConnection);
BDDMockito.given(SSHCmdHelper.sshExecuteCmdOneShot(eq(sshConnection),
eq("service cloudstack-agent restart"))).
willReturn(new SSHCmdHelper.SSHCmdResult(0,"",""));
when(configurationDao.getValue(ResourceManager.KvmSshToAgentEnabled.key())).thenReturn("true");
} }
@Test @Test
@ -206,4 +235,76 @@ public class ResourceManagerImplTest {
verify(resourceManager, times(retries + 1)).isHostInMaintenance(host, failedMigrations, new ArrayList<>(), failedMigrations); verify(resourceManager, times(retries + 1)).isHostInMaintenance(host, failedMigrations, new ArrayList<>(), failedMigrations);
verify(resourceManager).setHostIntoErrorInMaintenance(host, failedMigrations); verify(resourceManager).setHostIntoErrorInMaintenance(host, failedMigrations);
} }
@Test(expected = CloudRuntimeException.class)
public void testGetHostCredentialsMissingParameter() {
when(host.getDetail("password")).thenReturn(null);
resourceManager.getHostCredentials(host);
}
@Test
public void testGetHostCredentials() {
Pair<String, String> credentials = resourceManager.getHostCredentials(host);
Assert.assertNotNull(credentials);
Assert.assertEquals(hostUsername, credentials.first());
Assert.assertEquals(hostPassword, credentials.second());
}
@Test(expected = CloudRuntimeException.class)
public void testConnectAndRestartAgentOnHostCannotConnect() {
BDDMockito.given(SSHCmdHelper.acquireAuthorizedConnection(eq(hostPrivateIp), eq(22),
eq(hostUsername), eq(hostPassword))).willReturn(null);
resourceManager.connectAndRestartAgentOnHost(host, hostUsername, hostPassword);
}
@Test(expected = CloudRuntimeException.class)
public void testConnectAndRestartAgentOnHostCannotRestart() throws Exception {
BDDMockito.given(SSHCmdHelper.sshExecuteCmdOneShot(eq(sshConnection),
eq("service cloudstack-agent restart"))).willThrow(new SshException("exception"));
resourceManager.connectAndRestartAgentOnHost(host, hostUsername, hostPassword);
}
@Test
public void testConnectAndRestartAgentOnHost() {
resourceManager.connectAndRestartAgentOnHost(host, hostUsername, hostPassword);
}
@Test
public void testHandleAgentSSHEnabledNotConnectedAgent() {
when(host.getStatus()).thenReturn(Status.Disconnected);
resourceManager.handleAgentIfNotConnected(host, false);
verify(resourceManager).getHostCredentials(eq(host));
verify(resourceManager).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
}
@Test
public void testHandleAgentSSHEnabledConnectedAgent() {
when(host.getStatus()).thenReturn(Status.Up);
resourceManager.handleAgentIfNotConnected(host, false);
verify(resourceManager, never()).getHostCredentials(eq(host));
verify(resourceManager, never()).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
}
@Test(expected = CloudRuntimeException.class)
public void testHandleAgentSSHDisabledNotConnectedAgent() {
when(host.getStatus()).thenReturn(Status.Disconnected);
when(configurationDao.getValue(ResourceManager.KvmSshToAgentEnabled.key())).thenReturn("false");
resourceManager.handleAgentIfNotConnected(host, false);
}
@Test
public void testHandleAgentSSHDisabledConnectedAgent() {
when(host.getStatus()).thenReturn(Status.Up);
when(configurationDao.getValue(ResourceManager.KvmSshToAgentEnabled.key())).thenReturn("false");
resourceManager.handleAgentIfNotConnected(host, false);
verify(resourceManager, never()).getHostCredentials(eq(host));
verify(resourceManager, never()).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
}
@Test
public void testHandleAgentVMsMigrating() {
resourceManager.handleAgentIfNotConnected(host, true);
verify(resourceManager, never()).getHostCredentials(eq(host));
verify(resourceManager, never()).connectAndRestartAgentOnHost(eq(host), eq(hostUsername), eq(hostPassword));
}
} }

View File

@ -18,15 +18,14 @@
""" """
# Import Local Modules # Import Local Modules
from marvin.codes import FAILED
from marvin.cloudstackTestCase import * from marvin.cloudstackTestCase import *
from marvin.cloudstackAPI import *
from marvin.lib.utils import * from marvin.lib.utils import *
from marvin.lib.base import * from marvin.lib.base import *
from marvin.lib.common import * from marvin.lib.common import (get_zone, get_pod, get_template)
from nose.plugins.attrib import attr from nose.plugins.attrib import attr
from marvin.lib.decoratorGenerators import skipTestIf
from time import sleep from distutils.util import strtobool
from marvin.sshClient import SshClient
_multiprocess_shared_ = False _multiprocess_shared_ = False
@ -45,37 +44,6 @@ class TestHostMaintenance(cloudstackTestCase):
self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests()) self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests())
self.pod = get_pod(self.apiclient, self.zone.id) self.pod = get_pod(self.apiclient, self.zone.id)
self.cleanup = [] self.cleanup = []
self.services = {
"service_offering": {
"name": "Ultra Tiny Instance",
"displaytext": "Ultra Tiny Instance",
"cpunumber": 1,
"cpuspeed": 100,
"memory": 128,
},
"vm": {
"username": "root",
"password": "password",
"ssh_port": 22,
# Hypervisor type should be same as
# hypervisor type of cluster
"privateport": 22,
"publicport": 22,
"protocol": 'TCP',
},
"natrule": {
"privateport": 22,
"publicport": 22,
"startport": 22,
"endport": 22,
"protocol": "TCP",
"cidrlist": '0.0.0.0/0',
},
"ostype": 'CentOS 5.3 (64-bit)',
"sleep": 60,
"timeout": 10,
}
def tearDown(self): def tearDown(self):
try: try:
@ -89,38 +57,54 @@ class TestHostMaintenance(cloudstackTestCase):
def createVMs(self, hostId, number): def createVMs(self, hostId, number):
self.template = get_test_template( self.template = get_template(
self.apiclient, self.apiclient,
self.zone.id, self.zone.id,
self.hypervisor self.hypervisor
) )
if self.template == FAILED: if self.template == FAILED:
assert False, "get_test_template() failed to return template" assert False, "get_template() failed to return template"
self.logger.debug("Using template %s " % self.template.id) self.logger.debug("Using template %s " % self.template.id)
self.service_offering = ServiceOffering.create( self.service_offering = ServiceOffering.create(
self.apiclient, self.apiclient,
self.services["service_offering"] self.services["service_offerings"]["tiny"]
) )
self.logger.debug("Using service offering %s " % self.service_offering.id) self.logger.debug("Using service offering %s " % self.service_offering.id)
self.network_offering = NetworkOffering.create(
self.apiclient,
self.services["l2-network_offering"],
)
self.network_offering.update(self.apiclient, state='Enabled')
self.services["network"]["networkoffering"] = self.network_offering.id
self.l2_network = Network.create(
self.apiclient,
self.services["l2-network"],
zoneid=self.zone.id,
networkofferingid=self.network_offering.id
)
vms=[] vms=[]
for i in range(0, number): for i in range(0, number):
self.services["vm"]["zoneid"] = self.zone.id self.services["virtual_machine"]["zoneid"] = self.zone.id
self.services["vm"]["template"] = self.template.id self.services["virtual_machine"]["template"] = self.template.id
self.services["vm"]["displayname"] = 'vm' + str(i) self.services["virtual_machine"]["displayname"] = 'vm' + str(i)
self.services["vm"]["hypervisor"] = self.hypervisor self.services["virtual_machine"]["hypervisor"] = self.hypervisor
vm = VirtualMachine.create( vm = VirtualMachine.create(
self.apiclient, self.apiclient,
self.services["vm"], self.services["virtual_machine"],
serviceofferingid=self.service_offering.id, serviceofferingid=self.service_offering.id,
networkids=self.l2_network.id,
hostid=hostId hostid=hostId
) )
vms.append(vm) vms.append(vm)
self.cleanup.append(vm) self.cleanup.append(vm)
self.logger.debug("VM create = {}".format(vm.id)) self.logger.debug("VM create = {}".format(vm.id))
self.cleanup.append(self.l2_network)
self.cleanup.append(self.network_offering)
self.cleanup.append(self.service_offering)
return vms return vms
def checkVmMigratingOnHost(self, hostId): def checkVmMigratingOnHost(self, hostId):
@ -290,3 +274,331 @@ class TestHostMaintenance(cloudstackTestCase):
return return
class TestHostMaintenanceAgents(cloudstackTestCase):
@classmethod
def setUpClass(cls):
cls.testClient = super(TestHostMaintenanceAgents, cls).getClsTestClient()
cls.apiclient = cls.testClient.getApiClient()
cls.hypervisor = cls.testClient.getHypervisorInfo()
cls.dbclient = cls.testClient.getDbConnection()
cls.zone = get_zone(cls.apiclient, cls.testClient.getZoneForTests())
cls.pod = get_pod(cls.apiclient, cls.zone.id)
cls.services = cls.testClient.getParsedTestDataConfig()
cls.logger = logging.getLogger('TestHMAgents')
cls.stream_handler = logging.StreamHandler()
cls.logger.setLevel(logging.DEBUG)
cls.logger.addHandler(cls.stream_handler)
cls._cleanup = []
cls.hypervisorNotSupported = False
if cls.hypervisor.lower() not in ['kvm', 'lxc']:
cls.hypervisorNotSupported = True
if not cls.hypervisorNotSupported:
cls.initialsshvalue = cls.is_ssh_enabled()
cls.template = get_template(
cls.apiclient,
cls.zone.id,
cls.hypervisor
)
cls.services["virtual_machine"]["zoneid"] = cls.zone.id
cls.services["virtual_machine"]["template"] = cls.template.id
cls.services["virtual_machine"]["hypervisor"] = cls.hypervisor
cls.service_offering = ServiceOffering.create(
cls.apiclient,
cls.services["service_offerings"]["tiny"]
)
cls._cleanup.append(cls.service_offering)
cls.network_offering = NetworkOffering.create(
cls.apiclient,
cls.services["l2-network_offering"],
)
cls.network_offering.update(cls.apiclient, state='Enabled')
cls.services["network"]["networkoffering"] = cls.network_offering.id
cls.l2_network = Network.create(
cls.apiclient,
cls.services["l2-network"],
zoneid=cls.zone.id,
networkofferingid=cls.network_offering.id
)
cls._cleanup.append(cls.l2_network)
cls._cleanup.append(cls.network_offering)
cls.hostConfig = cls.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
@classmethod
def tearDownClass(cls):
try:
if not cls.hypervisorNotSupported:
# Revert setting value to the original
cls.set_ssh_enabled(cls.initialsshvalue)
cleanup_resources(cls.apiclient, cls._cleanup)
except Exception as e:
raise Exception("Warning: Exception during cleanup : %s" % e)
def setUp(self):
if not self.hypervisorNotSupported:
self.host = self.get_enabled_host_connected_agent()
self.cleanup = []
def tearDown(self):
try:
cleanup_resources(self.apiclient, self.cleanup)
except Exception as e:
raise Exception("Warning: Exception during cleanup : %s" % e)
@classmethod
def is_ssh_enabled(cls):
conf = Configurations.list(cls.apiclient, name="kvm.ssh.to.agent")
if not conf:
return False
else:
return bool(strtobool(conf[0].value)) if conf[0].value else False
@classmethod
def updateConfiguration(self, name, value):
cmd = updateConfiguration.updateConfigurationCmd()
cmd.name = name
cmd.value = value
self.apiclient.updateConfiguration(cmd)
@classmethod
def set_ssh_enabled(cls, on):
value = "true" if on else "false"
cls.updateConfiguration('kvm.ssh.to.agent', value)
def prepare_host_for_maintenance(self, hostid):
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
cmd.id = hostid
self.apiclient.prepareHostForMaintenance(cmd)
self.logger.debug('Host with id %s is in prepareHostForMaintenance' % hostid)
def wait_until_host_is_in_state(self, hostid, resourcestate, interval=3, retries=20):
def check_resource_state():
response = Host.list(
self.apiclient,
id=hostid
)
if isinstance(response, list):
if response[0].resourcestate == resourcestate:
self.logger.debug('Host with id %s is in resource state = %s' % (hostid, resourcestate))
return True, None
return False, None
done, _ = wait_until(interval, retries, check_resource_state)
if not done:
raise Exception("Failed to wait for host %s to be on resource state %s" % (hostid, resourcestate))
return True
def wait_until_agent_is_in_state(self, hostid, state, interval=3, retries=20):
def check_agent_state():
response = Host.list(
self.apiclient,
id=hostid
)
if isinstance(response, list):
if response[0].state == state:
self.logger.debug('Host agent with id %s is in state = %s' % (hostid, state))
return True, None
return False, None
done, _ = wait_until(interval, retries, check_agent_state)
if not done:
raise Exception("Failed to wait for host agent %s to be on state %s" % (hostid, state))
return True
def cancel_host_maintenance(self, hostid):
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
cmd.id = hostid
self.apiclient.cancelHostMaintenance(cmd)
self.logger.debug('Host with id %s is cancelling maintenance' % hostid)
def get_enabled_host_connected_agent(self):
hosts = Host.list(
self.apiclient,
type='Routing',
zoneid=self.zone.id,
podid=self.pod.id,
hypervisor=self.hypervisor,
resourcestate='Enabled',
state='Up'
)
if len(hosts) < 2:
raise unittest.SkipTest("Cancel host maintenance must be tested for 2 or more hosts")
return hosts[0]
def deploy_vm_on_host(self, hostid):
return VirtualMachine.create(
self.apiclient,
self.services["virtual_machine"],
serviceofferingid=self.service_offering.id,
networkids=self.l2_network.id,
hostid=hostid
)
def assert_host_is_functional_after_cancelling_maintenance(self, hostid):
self.wait_until_agent_is_in_state(hostid, "Up")
self.logger.debug('Deploying VM on host %s' % hostid)
vm = self.deploy_vm_on_host(hostid)
self.assertEqual(
vm.state,
"Running",
"Check VM is running on the host"
)
self.cleanup.append(vm)
def revert_host_state_on_failure(self, host):
cmd = updateHost.updateHostCmd()
cmd.id = host.id
cmd.allocationstate = "Enable"
response = self.apiclient.updateHost(cmd)
self.assertEqual(response.resourcestate, "Enabled")
@skipTestIf("hypervisorNotSupported")
@attr(tags=["advanced", "advancedns", "smoke", "basic", "eip", "sg"], required_hardware="true")
def test_01_cancel_host_maintenance_ssh_enabled_agent_connected(self):
"""
Test cancel maintenance when: 'kvm.ssh.to.agent' = true, agent state = 'Up'
1) Put host on Maintenance
2) Cancel maintenance on host
4) Assert agent is still connected after cancelling maintenance
3) Deploy VM on the host after cancelling maintenance
"""
if not self.is_ssh_enabled():
self.set_ssh_enabled(True)
try:
self.prepare_host_for_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Maintenance")
self.cancel_host_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Enabled")
self.assert_host_is_functional_after_cancelling_maintenance(self.host.id)
except Exception as e:
self.revert_host_state_on_failure(self.host)
self.fail(e)
def get_ssh_client(self, ip, username, password, retries=10):
""" Setup ssh client connection and return connection """
try:
ssh_client = SshClient(ip, 22, username, password, retries)
except Exception as e:
raise unittest.SkipTest("Unable to create ssh connection: " % e)
self.assertIsNotNone(
ssh_client, "Failed to setup ssh connection to ip=%s" % ip)
return ssh_client
@skipTestIf("hypervisorNotSupported")
@attr(tags=["boris", "advancedns", "smoke", "basic", "eip", "sg"], required_hardware="true")
def test_02_cancel_host_maintenance_ssh_enabled_agent_disconnected(self):
"""
Test cancel maintenance when: 'kvm.ssh.to.agent' = true, agent state != 'Up'
1) Put host on maintenance
2) SSH into host and stop cloudstack-agent service - host gets Disconnected
3) Cancel maintenance on host
4) Assert agent is connected after cancelling maintenance
5) Deploy VM on the host
"""
if not self.is_ssh_enabled():
self.set_ssh_enabled(True)
# username, password = self.get_host_credentials(self.host.id)
username = self.hostConfig["username"]
password = self.hostConfig["password"]
try:
self.prepare_host_for_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Maintenance")
ssh_client = self.get_ssh_client(self.host.ipaddress, self.hostConfig["username"],
self.hostConfig["password"])
ssh_client.execute("service cloudstack-agent stop")
self.wait_until_agent_is_in_state(self.host.id, "Disconnected")
self.cancel_host_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Enabled")
self.assert_host_is_functional_after_cancelling_maintenance(self.host.id)
except Exception as e:
self.revert_host_state_on_failure(self.host)
self.fail(e)
@skipTestIf("hypervisorNotSupported")
@attr(tags=["advanced", "advancedns", "smoke", "basic", "eip", "sg"], required_hardware="true")
def test_03_cancel_host_maintenance_ssh_disabled_agent_connected(self):
"""
Test cancel maintenance when: 'kvm.ssh.to.agent' = false, agent state = 'Up'
1) Put host on Maintenance
2) Cancel maintenance on host
4) Assert agent is still connected after cancelling maintenance
3) Deploy VM on the host after cancelling maintenance
"""
if self.is_ssh_enabled():
self.set_ssh_enabled(False)
try:
self.prepare_host_for_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Maintenance")
self.cancel_host_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Enabled")
self.assert_host_is_functional_after_cancelling_maintenance(self.host.id)
except Exception as e:
self.revert_host_state_on_failure(self.host)
self.fail(e)
@skipTestIf("hypervisorNotSupported")
@attr(tags=["advanced", "advancedns", "smoke", "basic", "eip", "sg"], required_hardware="true")
def test_04_cancel_host_maintenance_ssh_disabled_agent_disconnected(self):
"""
Test cancel maintenance when: 'kvm.ssh.to.agent' = false, agent state != 'Up'
1) Put host on maintenance
2) SSH into host (if possible) and stop cloudstack-agent service - host gets Disconnected.
Skip test if not possible to SSH into host
3) Cancel maintenance on host - assert cannot cancel maintenance on disconnected host (exception thwown)
4( SSH into host and start cloudstack-agent service - host gets connected
5) Cancel maintenance on host
4) Assert agent is connected after cancelling maintenance
5) Deploy VM on the host
"""
if self.is_ssh_enabled():
self.set_ssh_enabled(False)
try:
self.prepare_host_for_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Maintenance")
ssh_client = self.get_ssh_client(self.host.ipaddress, self.hostConfig["username"],
self.hostConfig["password"])
ssh_client.execute("service cloudstack-agent stop")
self.wait_until_agent_is_in_state(self.host.id, "Disconnected")
except Exception as e:
self.revert_host_state_on_failure(self.host)
self.fail(e)
self.assertRaises(Exception, self.cancel_host_maintenance, self.host.id)
try:
ssh_client = self.get_ssh_client(self.host.ipaddress, self.hostConfig["username"],
self.hostConfig["password"])
ssh_client.execute("service cloudstack-agent start")
self.wait_until_agent_is_in_state(self.host.id, "Up")
self.cancel_host_maintenance(self.host.id)
self.wait_until_host_is_in_state(self.host.id, "Enabled")
self.assert_host_is_functional_after_cancelling_maintenance(self.host.id)
except Exception as e:
self.revert_host_state_on_failure(self.host)
self.fail(e)