From f917ab660e8813be654353bee84f078734b42691 Mon Sep 17 00:00:00 2001 From: Boris Stoyanov Date: Fri, 25 Aug 2017 12:07:38 +0200 Subject: [PATCH] CLOUDSTACK-9782: Improve host HA tests - All tests should pass on KVM, Simulator - Add test cases covering FSM state transitions and actions Signed-off-by: Rohit Yadav --- .travis.yml | 17 +- .../admin/host/PrepareForMaintenanceCmd.java | 4 - .../cloudstack/ha/SimulatorHAProvider.java | 25 +- scripts/vm/hypervisor/kvm/kvmvmactivity.sh | 3 +- .../cloud/resource/ResourceManagerImpl.java | 2 +- .../apache/cloudstack/ha/HAManagerImpl.java | 27 +- test/integration/smoke/test_ha_for_host.py | 247 -------- test/integration/smoke/test_ha_kvm_agent.py | 535 ------------------ .../{test_ha_kvm.py => test_hostha_kvm.py} | 509 ++++++++--------- .../smoke/test_hostha_simulator.py | 182 +++++- tools/travis/install.sh | 2 +- tools/travis/script.sh | 1 + 12 files changed, 433 insertions(+), 1121 deletions(-) delete mode 100644 test/integration/smoke/test_ha_for_host.py delete mode 100644 test/integration/smoke/test_ha_kvm_agent.py rename test/integration/smoke/{test_ha_kvm.py => test_hostha_kvm.py} (61%) diff --git a/.travis.yml b/.travis.yml index f9ef0fcfe17..9ea204e5d59 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,10 +43,9 @@ env: smoke/test_disk_offerings smoke/test_dynamicroles smoke/test_global_settings - smoke/test_guest_vlan_range - smoke/test_ha_for_host - smoke/test_ha_kvm_agent - smoke/test_ha_kvm + smoke/test_guest_vlan_range" + + - TESTS="smoke/test_hostha_kvm smoke/test_hostha_simulator smoke/test_hosts smoke/test_internal_lb @@ -61,9 +60,8 @@ env: smoke/test_network_acl smoke/test_nic smoke/test_nic_adapter_type - smoke/test_non_contigiousvlan" - - - TESTS="smoke/test_outofbandmanagement + smoke/test_non_contigiousvlan + smoke/test_outofbandmanagement smoke/test_outofbandmanagement_nestedplugin smoke/test_over_provisioning smoke/test_password_server @@ -71,8 +69,9 @@ env: smoke/test_primary_storage smoke/test_privategw_acl smoke/test_public_ip_range - smoke/test_pvlan - smoke/test_regions + smoke/test_pvlan" + + - TESTS="smoke/test_regions smoke/test_reset_vm_on_reboot smoke/test_resource_detail smoke/test_router_dhcphosts diff --git a/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java index aa7cfed1e8f..e49aabc49d4 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java @@ -108,8 +108,4 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd { throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to prepare host for maintenance"); } } - - public void setHostId(final Long hostId) { - id = hostId; - } } diff --git a/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java index 02f4e653115..1189e9a5e1f 100644 --- a/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java +++ b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java @@ -133,15 +133,8 @@ public class SimulatorHAProvider extends HAAbstractHostProvider implements HAPro } } - @Override - public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event, - final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) { - return false; - } - - @Override - public boolean postStateTransitionEvent(final StateMachine2.Transition transition, - final HAConfig vo, final boolean status, final Object opaque) { + private boolean addStateTransition(final HAConfig vo, final boolean status, + final HAConfig.HAState oldState, final HAConfig.HAState newState, final HAConfig.Event event) { if (vo.getResourceType() != HAResource.ResourceType.Host) { return false; } @@ -150,6 +143,18 @@ public class SimulatorHAProvider extends HAAbstractHostProvider implements HAPro return false; } final HAResourceCounter counter = haManager.getHACounter(vo.getResourceId(), vo.getResourceType()); - return haState.addStateTransition(transition.getToState(), transition.getCurrentState(), transition.getEvent(), counter); + return haState.addStateTransition(newState, oldState, event, counter); + } + + @Override + public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event, + final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) { + return addStateTransition(vo, status, oldState, newState, event); + } + + @Override + public boolean postStateTransitionEvent(final StateMachine2.Transition transition, + final HAConfig vo, final boolean status, final Object opaque) { + return addStateTransition(vo, status, transition.getCurrentState(), transition.getToState(), transition.getEvent()); } } \ No newline at end of file diff --git a/scripts/vm/hypervisor/kvm/kvmvmactivity.sh b/scripts/vm/hypervisor/kvm/kvmvmactivity.sh index 2e0b535b901..88ee8f380bb 100755 --- a/scripts/vm/hypervisor/kvm/kvmvmactivity.sh +++ b/scripts/vm/hypervisor/kvm/kvmvmactivity.sh @@ -116,7 +116,8 @@ else lastUpdateTime=${arrTime[1]} echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile - if [[ $lastSuspectTime -ne $SuspectTime ]]; then + suspectTimeDiff=$(expr $SuspectTime - $lastSuspectTime) + if [[ $suspectTimeDiff -lt 0 ]]; then if [[ $latestUpdateTime -gt $SuspectTime ]]; then echo "=====> ALIVE <=====" else diff --git a/server/src/com/cloud/resource/ResourceManagerImpl.java b/server/src/com/cloud/resource/ResourceManagerImpl.java index 5eee2469bbb..87e0bc5d132 100755 --- a/server/src/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/com/cloud/resource/ResourceManagerImpl.java @@ -2272,7 +2272,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } try { - SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(connection, "service cloudstack-agent restart"); + SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot(connection, "service cloudstack-agent restart || systemctl restart cloudstack-agent"); s_logger.debug("cloudstack-agent restart result: " + result.toString()); } catch (final SshException e) { return false; diff --git a/server/src/org/apache/cloudstack/ha/HAManagerImpl.java b/server/src/org/apache/cloudstack/ha/HAManagerImpl.java index c2ba528068f..49d9432fa15 100644 --- a/server/src/org/apache/cloudstack/ha/HAManagerImpl.java +++ b/server/src/org/apache/cloudstack/ha/HAManagerImpl.java @@ -156,7 +156,9 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust if (result) { final String message = String.format("Transitioned host HA state from:%s to:%s due to event:%s for the host id:%d", currentHAState, nextState, event, haConfig.getResourceId()); - LOG.debug(message); + if (LOG.isTraceEnabled()) { + LOG.trace(message); + } if (nextState == HAConfig.HAState.Recovering || nextState == HAConfig.HAState.Fencing || nextState == HAConfig.HAState.Fenced) { ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), Domain.ROOT_DOMAIN, EventTypes.EVENT_HA_STATE_TRANSITION, message); @@ -475,7 +477,7 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust public void onManagementNodeIsolated() { } - private boolean processHAStateChange(final HAConfig haConfig, final boolean status) { + private boolean processHAStateChange(final HAConfig haConfig, final HAConfig.HAState newState, final boolean status) { if (!status || !checkHAOwnership(haConfig)) { return false; } @@ -493,14 +495,14 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); // Perform activity checks - if (haConfig.getState() == HAConfig.HAState.Checking) { + if (newState == HAConfig.HAState.Checking) { final ActivityCheckTask job = ComponentContext.inject(new ActivityCheckTask(resource, haProvider, haConfig, HAProviderConfig.ActivityCheckTimeout, activityCheckExecutor, counter.getSuspectTimeStamp())); activityCheckExecutor.submit(job); } // Attempt recovery - if (haConfig.getState() == HAConfig.HAState.Recovering) { + if (newState == HAConfig.HAState.Recovering) { if (counter.getRecoveryCounter() >= (Long) (haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) { return false; } @@ -511,7 +513,7 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust } // Fencing - if (haConfig.getState() == HAConfig.HAState.Fencing) { + if (newState == HAConfig.HAState.Fencing) { final FenceTask task = ComponentContext.inject(new FenceTask(resource, haProvider, haConfig, HAProviderConfig.FenceTimeout, fenceExecutor)); final Future fenceFuture = fenceExecutor.submit(task); @@ -528,7 +530,10 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust if (LOG.isTraceEnabled()) { LOG.trace("HA state pre-transition:: new state=" + newState + ", old state=" + oldState + ", for resource id=" + haConfig.getResourceId() + ", status=" + status + ", ha config state=" + haConfig.getState()); } - return processHAStateChange(haConfig, status); + if (status && haConfig.getState() != newState) { + LOG.warn("HA state pre-transition:: HA state is not equal to transition state, HA state=" + haConfig.getState() + ", new state=" + newState); + } + return processHAStateChange(haConfig, newState, status); } @Override @@ -536,7 +541,10 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust if (LOG.isTraceEnabled()) { LOG.trace("HA state post-transition:: new state=" + transition.getToState() + ", old state=" + transition.getCurrentState() + ", for resource id=" + haConfig.getResourceId() + ", status=" + status + ", ha config state=" + haConfig.getState()); } - return processHAStateChange(haConfig, status); + if (status && haConfig.getState() != transition.getToState()) { + LOG.warn("HA state post-transition:: HA state is not equal to transition state, HA state=" + haConfig.getState() + ", new state=" + transition.getToState()); + } + return processHAStateChange(haConfig, transition.getToState(), status); } /////////////////////////////////////////////////// @@ -697,5 +705,10 @@ public final class HAManagerImpl extends ManagerBase implements HAManager, Clust LOG.error("Error trying to perform health checks in HA manager", t); } } + + @Override + public Long getDelay() { + return null; + } } } diff --git a/test/integration/smoke/test_ha_for_host.py b/test/integration/smoke/test_ha_for_host.py deleted file mode 100644 index efc4f1f1b41..00000000000 --- a/test/integration/smoke/test_ha_for_host.py +++ /dev/null @@ -1,247 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -from marvin.cloudstackTestCase import * -from marvin.cloudstackAPI import * -from marvin.lib.utils import * -from marvin.lib.common import * -from nose.plugins.attrib import attr -import cmd -from cmd import Cmd - - -class TestHaForHost(cloudstackTestCase): - """ Test cases for configuring HA for Host - """ - - def setUp(self): - testClient = super(TestHaForHost, self).getClsTestClient() - - self.apiclient = testClient.getApiClient() - self.dbclient = testClient.getDbConnection() - self.services = testClient.getParsedTestDataConfig() - - self.zone = get_zone(self.apiclient, testClient.getZoneForTests()) - self.host = None - self.server = None - - self.cleanup = [] - - def tearDown(self): - try: - self.dbclient.execute("delete from ha_config where resource_type='Host'") - cleanup_resources(self.apiclient, self.cleanup) - except Exception as e: - raise Exception("Warning: Exception during cleanup : %s" % e) - - - def getHost(self, hostId=None): - if self.host and hostId is None: - return self.host - - response = list_hosts( - self.apiclient, - zoneid=self.zone.id, - type='Routing', - id=hostId - ) - if len(response) > 0: - self.host = response[0] - return self.host - raise self.skipTest("No hosts found, skipping HA for Host test") - - - def getHaProvider(self, host): - cmd = listHostHAProviders.listHostHAProvidersCmd() - cmd.hypervisor = host.hypervisor - response = self.apiclient.listHostHAProviders(cmd) - return response[0].haprovider - - - def configureHaProvider(self): - cmd = configureHAForHost.configureHAForHostCmd() - cmd.hostid = self.getHost().id - cmd.provider = self.getHaProvider(self.getHost()) - return self.apiclient.configureHAForHost(cmd) - - - def getHaForHostEnableCmd(self): - cmd = enableHAForHost.enableHAForHostCmd() - cmd.hostid = self.getHost().id - return cmd - - - def getHaForHostDisableCmd(self): - cmd = disableHAForHost.disableHAForHostCmd() - cmd.hostid = self.getHost().id - return cmd - - - def getListHostHAResources(self): - cmd = listHostHAResources.listHostHAResourcesCmd() - cmd.hostid = self.getHost().id - return cmd - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_enable_ha_for_host(self): - """ - This test enables HA for a host - """ - - self.configureHaProvider() - cmd = self.getHaForHostEnableCmd() - response = self.apiclient.enableHAForHost(cmd) - - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, True) - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_enable_ha_for_host_invalid(self): - """ - This is a negative test for enable HA for a host - """ - - self.configureHaProvider() - cmd = self.getHaForHostEnableCmd() - cmd.hostid = -1 - - try: - response = self.apiclient.enableHAForHost(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_disable_ha_for_host(self): - """ - This test disables HA for a host - """ - - self.configureHaProvider() - cmd = self.getHaForHostDisableCmd() - - response = self.apiclient.disableHAForHost(cmd) - - self.assertTrue(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, False) - - response = self.getHost(cmd.hostid) - - self.assertEqual(response.hostha.hastate, "Disabled") - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_disable_ha_for_host_invalid(self): - """ - This is a negative test for disable HA for a host - """ - - self.configureHaProvider() - cmd = self.getHaForHostDisableCmd() - cmd.hostid = -1 - - try: - response = self.apiclient.disableHAForHost(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_list_ha_for_host(self): - """ - Test that verifies the listHAForHost API - """ - self.configureHaProvider() - db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config") - - cmd = self.getListHostHAResources() - del cmd.hostid - response = self.apiclient.listHostHAResources(cmd) - - self.assertEqual(db_count[0][0], len(response)) - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_list_ha_for_host_valid(self): - """ - Valid test for listing a specific host HA resources - """ - - self.configureHaProvider() - cmd = self.getListHostHAResources() - response = self.apiclient.listHostHAResources(cmd) - self.assertEqual(response[0].hostid, cmd.hostid) - - - @attr(tags=["advanced", - "advancedns", - "smoke", - "basic", - "sg"], - required_hardware="false") - def test_list_ha_for_host_invalid(self): - """ - Test that listHostHAResources is returning exception when called with invalid data - """ - - self.configureHaProvider() - cmd = self.getListHostHAResources() - cmd.hostid = "someinvalidvalue" - - try: - response = self.apiclient.listHostHAResources(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") diff --git a/test/integration/smoke/test_ha_kvm_agent.py b/test/integration/smoke/test_ha_kvm_agent.py deleted file mode 100644 index 3efde0a97ec..00000000000 --- a/test/integration/smoke/test_ha_kvm_agent.py +++ /dev/null @@ -1,535 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -from marvin.cloudstackTestCase import * -from marvin.lib.utils import * -from marvin.lib.base import * -from marvin.lib.common import * -from nose.plugins.attrib import attr - -from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer - -import random -import socket -import thread - - -class TestHaKVMAgent(cloudstackTestCase): - """ Test cases for out of band management - """ - - def setUp(self): - testClient = super(TestHaKVMAgent, self).getClsTestClient() - - self.apiClient = testClient.getApiClient() - self.dbclient = testClient.getDbConnection() - self.services = testClient.getParsedTestDataConfig() - - self.zone = get_zone(self.apiClient, testClient.getZoneForTests()) - self.host = self.getHost() - self.cluster_id = self.host.clusterid - self.server = None - - self.hypervisor = self.testClient.getHypervisorInfo() - self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ - self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ - self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) - - # Cleanup any existing configs - self.dbclient.execute("delete from ha_config where resource_type='Host'") - - # use random port for ipmisim - s = socket.socket() - s.bind(('', 0)) - self.serverPort = s.getsockname()[1] - s.close() - - # Set Cluster-level setting in order to run tests faster - self.update_configuration("kvm.ha.activity.check.failure.ratio", "0.7") - self.update_configuration("kvm.ha.activity.check.interval", "10") - self.update_configuration("kvm.ha.activity.check.max.attempts", "5") - self.update_configuration("kvm.ha.activity.check.timeout", "60") - self.update_configuration("kvm.ha.degraded.max.period", "30") - self.update_configuration("kvm.ha.fence.timeout", "60") - self.update_configuration("kvm.ha.health.check.timeout", "10") - self.update_configuration("kvm.ha.recover.failure.threshold", "1") - self.update_configuration("kvm.ha.recover.timeout", "120") - self.update_configuration("kvm.ha.recover.wait.period", "60") - - self.service_offering = ServiceOffering.create( - self.apiClient, - self.services["service_offerings"] - ) - - self.template = get_template( - self.apiClient, - self.zone.id, - self.services["ostype"] - ) - - self.cleanup = [self.service_offering] - - def tearDown(self): - try: - self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) - self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) - self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") - self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") - self.dbclient.execute("delete from ha_config where resource_type='Host'") - self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort()) - self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) - self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) - self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'") - self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'") - cleanup_resources(self.apiClient, self.cleanup) - if self.server: - self.server.shutdown() - self.server.server_close() - except Exception as e: - raise Exception("Warning: Exception during cleanup : %s" % e) - - def getFakeMsId(self): - return self.fakeMsId - - def getFakeMsRunId(self): - return self.fakeMsId * 1000 - - def getHostHaConfigCmd(self, provider='kvmhaprovider'): - cmd = configureHAForHost.configureHAForHostCmd() - cmd.provider = provider - cmd.hostid = self.host.id - return cmd - - def getHostHaEnableCmd(self): - cmd = enableHAForHost.enableHAForHostCmd() - cmd.hostid = self.host.id - return cmd - - def getHost(self, hostId=None): - response = list_hosts( - self.apiClient, - zoneid=self.zone.id, - type='Routing', - id=hostId - ) - if len(response) > 0: - self.host = response[0] - return self.host - raise self.skipTest("No hosts found, skipping out-of-band management test") - - def getIpmiServerIp(self): - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"])) - return s.getsockname()[0] - - def getIpmiServerPort(self): - return self.serverPort - - def getOobmConfigCmd(self): - cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd() - cmd.driver = 'ipmitool' # The default available driver - cmd.address = self.getIpmiServerIp() - cmd.port = self.getIpmiServerPort() - cmd.username = 'admin' - cmd.password = 'password' - cmd.hostid = self.host.id - return cmd - - def getOobmEnableCmd(self): - cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd() - cmd.hostid = self.host.id - return cmd - - def getOobmDisableCmd(self): - cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd() - cmd.hostid = self.host.id - return cmd - - def getOobmIssueActionCmd(self): - cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd() - cmd.hostid = self.host.id - cmd.action = 'STATUS' - return cmd - - def issue_power_action_cmd(self, action, timeout=None): - cmd = self.getOobmIssueActionCmd() - cmd.action = action - if timeout: - cmd.timeout = timeout - - try: - return self.apiClient.issueOutOfBandManagementPowerAction(cmd) - except Exception as e: - if "packet session id 0x0 does not match active session" in str(e): - raise self.skipTest("Known ipmitool issue hit, skipping test") - raise e - - def configure_and_enable_oobm(self): - self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd()) - response = self.apiClient.enableOutOfBandManagementForHost(self.getOobmEnableCmd()) - self.assertEqual(response.enabled, True) - - def start_ipmi_server(self): - def startIpmiServer(tname, server): - self.debug("Starting ipmisim server") - try: - server.serve_forever() - except Exception: pass - IpmiServerContext('reset') - ThreadedIpmiServer.allow_reuse_address = False - server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer) - thread.start_new_thread(startIpmiServer, ("ipmi-server", server,)) - self.server = server - - def checkSyncToState(self, state, interval): - def checkForStateSync(expectedState): - response = self.getHost(hostId=self.host.id).outofbandmanagement - return response.powerstate == expectedState, None - - sync_interval = 1 + int(interval)/1000 - res, _ = wait_until(sync_interval, 10, checkForStateSync, state) - if not res: - self.fail("Failed to get host.powerstate synced to expected state:" + state) - response = self.getHost(hostId=self.host.id).outofbandmanagement - self.assertEqual(response.powerstate, state) - - def get_host_in_available_state(self): - - self.configure_and_start_ipmi_server() - self.assert_issue_command_state('ON', 'On') - self.configureAndEnableHostHa() - - self.check_host_transition_to_available() - - response = self.getHost() - if response.hostha.hastate is not "Available": - print response - - self.assertEqual(response.hostha.hastate, "Available") - - def configureAndEnableHostHa(self): - self.apiClient.configureHAForHost(self.getHostHaConfigCmd()) - - response = self.apiClient.enableHAForHost(self.getHostHaEnableCmd()) - self.assertEqual(response.haenable, True) - - def configure_and_start_ipmi_server(self, power_state=None): - """ - Setup ipmisim and enable out-of-band management for host - """ - self.configure_and_enable_oobm() - self.start_ipmi_server() - if power_state: - bmc = IpmiServerContext().bmc - bmc.powerstate = power_state - - def assert_issue_command_state(self, command, expected): - """ - Asserts power action result for a given power command - """ - if command != 'STATUS': - self.issue_power_action_cmd(command) - response = self.issue_power_action_cmd('STATUS') - self.assertEqual(response.powerstate, expected) - - def kill_agent(self): - t_end = time.time() + 90 - while time.time() < t_end: - try: - SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], - passwd=self.hostConfig["password"]).execute \ - ("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") - return - except Exception: - print("Cannot ssh into: " + self.host.ipaddress) - self.fail(self) - - def set_host_to_alert(self): - self.dbclient.execute("update host set host.status = 'Alert' where host.uuid = '%s'" % self.host.id) - - def check_host_transitioned_to_degraded(self): - t_end = time.time() + 120 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate in "Degraded": - return - else: - continue - self.fail(self) - - def wait_util_host_is_fencing(self): - t_end = time.time() + 120 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate in "Fencing": - return - else: - continue - self.fail(self) - - def check_host_transitioned_to_suspect(self): - t_end = time.time() + 120 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate in "Suspect": - return - else: - continue - self.fail(self) - - def check_host_transitioned_to_checking(self): - t_end = time.time() + 120 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate in "Checking": - return - else: - continue - self.fail(self) - - def wait_util_host_is_fenced(self): - t_end = time.time() + 120 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate in "Fenced": - return - else: - continue - self.fail(self) - - def wait_util_host_is_up(self): - t_end = time.time() + 120 - while time.time() < t_end: - host = self.getHost() - if host.state in "Up": - return - else: - continue - self.fail(self) - - def stop_agent(self): - SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ - ("service cloudstack-agent stop") - - def start_agent(self): - self.ssh_and_restart_agent() - self.check_host_transition_to_available() - - def ssh_and_restart_agent(self): - t_end = time.time() + 90 - while time.time() < t_end: - try: - SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], - passwd=self.hostConfig["password"]).execute \ - ("service cloudstack-agent restart") - return - except Exception: - print("Cannot ssh into: " + self.host.ipaddress) - self.fail(self) - - def check_host_transition_to_available(self): - t_end = time.time() + 90 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate == "Available": - return - else: - continue - self.fail(self) - - def wait_util_host_is_recovered(self): - t_end = time.time() + 180 - while time.time() < t_end: - host = self.getHost() - if host.hostha.hastate in "Recovered": - return - else: - continue - self.fail(self) - - def reset_host(self): - SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], - passwd=self.hostConfig["password"]).execute \ - ("reboot") - - def deploy_vm(self): - vm = VirtualMachine.create( - self.apiClient, - services=self.services["virtual_machine"], - serviceofferingid=self.service_offering.id, - templateid=self.template.id, - zoneid=self.zone.id, - hostid = self.host.id, - method="POST" - ) - - self.cleanup.append(vm) - - def update_configuration(self, name, value): - update_configuration_cmd = updateConfiguration.updateConfigurationCmd() - update_configuration_cmd.name = name - update_configuration_cmd.value = value - update_configuration_cmd.clusterid = self.cluster_id - - self.apiClient.updateConfiguration(update_configuration_cmd) - - - @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_ha_stop_agent_host_is_degraded(self): - """ - Tests HA state turns Degraded when agent is stopped - """ - self.deploy_vm() - - # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available - self.get_host_in_available_state() - - # SSH into the KVM Host and executes kill -9 of the agent - self.stop_agent() - - # Checks if the host would turn into Degraded in the next 120 seconds - try: - self.check_host_transitioned_to_degraded() - except Exception as e: - self.start_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # Enable Host - self.start_agent() - - #@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_ha_recovering_start_agent_host_is_available(self): - """ - Tests HA state turns Recovered when agent is stopped and host is reset - """ - # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available - # Then kills the agent and wait untill the state is Degraded - - self.deploy_vm() - # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available - self.get_host_in_available_state() - - # SSH into the KVM Host and executes kill -9 of the agent - self.kill_agent() - - # Checks if the host would turn into Degraded in the next 120 seconds - try: - self.check_host_transitioned_to_degraded() - except Exception as e: - self.start_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # Reset host so a shut down could be emulated. During the bootup host should transition into recovered state - self.reset_host() - - # Waits until Degraded host turns into Recovered for 180 seconds, - # if it fails it tries to revert host back to Available - try: - self.wait_util_host_is_recovered() - except Exception as e: - self.start_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # SSH into the KVM Host and executes service cloudstack-agent restart of the agent - self.start_agent() - - #@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_ha_fencing_host(self): - """ - Tests HA state turns Recovered when agent is stopped and host is reset, - then configure incorrect OOBM configuration, so that Recover command would fail - and host would transition into Fenced state. - """ - self.deploy_vm() - - # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available - self.get_host_in_available_state() - - # SSH into the KVM Host and executes kill -9 of the agent - self.kill_agent() - - # Checks if the host would turn into Degraded in the next 120 seconds - try: - self.check_host_transitioned_to_degraded() - except Exception as e: - self.start_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # Change OOBM Configuration to invalid so it would fail the recover operations. - cmd = self.getOobmConfigCmd() - cmd.address = "1.1.1.1" - self.apiClient.configureOutOfBandManagement(cmd) - - # Reset host so a shut down could be emulated. During the bootup host should transition into recovered state - self.reset_host() - self.kill_agent() - - # Waits until Recovering host turns into Fencing for 180 seconds, - # if it fails it tries to revert host back to Up - try: - self.wait_util_host_is_fencing() - except Exception as e: - self.ssh_and_restart_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # Configure correct OOBM configuration so that the Fencing operation would succeed - self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd()) - - # Waits until Fencing host turns into Fenced for 180 seconds, - # if it fails it tries to revert host back to Up - try: - self.wait_util_host_is_fenced() - except Exception as e: - self.ssh_and_restart_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # SSH into the KVM Host and executes service cloudstack-agent restart of the agent - self.ssh_and_restart_agent() - - # Waits until state is Up so that cleanup would be successful - self.wait_util_host_is_up() - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_ha_kill_agent_host_is_degraded(self): - """ - Tests HA state turns Suspect/Checking when some activity/health checks fail - Configures HA, Logs into to a host and restarts the service - Then it confirms the ha state jumps through Suspect -> Checking -> Available - """ - # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available - self.get_host_in_available_state() - - # SSH into the KVM Host and executes kill -9 of the agent - self.ssh_and_restart_agent() - - # Checks if the host would turn into Suspect in the next 120 seconds - try: - self.check_host_transitioned_to_suspect() - except Exception as e: - self.start_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # Checks if the host would turn into Degraded in the next 120 seconds - try: - self.check_host_transitioned_to_checking() - except Exception as e: - self.start_agent() - raise Exception("Warning: Exception during test execution : %s" % e) - - # Enable Host - self.check_host_transition_to_available() diff --git a/test/integration/smoke/test_ha_kvm.py b/test/integration/smoke/test_hostha_kvm.py similarity index 61% rename from test/integration/smoke/test_ha_kvm.py rename to test/integration/smoke/test_hostha_kvm.py index 7709adc4859..53d81e3a9e6 100644 --- a/test/integration/smoke/test_ha_kvm.py +++ b/test/integration/smoke/test_hostha_kvm.py @@ -39,25 +39,61 @@ class TestHAKVM(cloudstackTestCase): """ def setUp(self): + self.testClient = super(TestHAKVM, self).getClsTestClient() self.apiclient = self.testClient.getApiClient() - self.hypervisor = self.testClient.getHypervisorInfo() self.dbclient = self.testClient.getDbConnection() self.services = self.testClient.getParsedTestDataConfig() + self.logger = logging.getLogger('TestHAKVM') + + #Get Zone specifics + self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests()) + self.hypervisor = self.testClient.getHypervisorInfo() + self.host = self.getHost() self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ - self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + self.cluster_id = self.host.clusterid # Cleanup any existing configs self.dbclient.execute("delete from ha_config where resource_type='Host'") - self.host = self.getHost() # use random port for ipmisim + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) s = socket.socket() s.bind(('', 0)) self.serverPort = s.getsockname()[1] s.close() - self.cleanup = [] + # Set Cluster-level setting in order to run tests faster + self.updateConfiguration("kvm.ha.activity.check.failure.ratio", "0.6") + self.updateConfiguration("kvm.ha.activity.check.interval", "8") + self.updateConfiguration("kvm.ha.activity.check.max.attempts", "5") + self.updateConfiguration("kvm.ha.activity.check.timeout", "30") + self.updateConfiguration("kvm.ha.degraded.max.period", "30") + self.updateConfiguration("kvm.ha.fence.timeout", "30") + self.updateConfiguration("kvm.ha.health.check.timeout", "30") + self.updateConfiguration("kvm.ha.recover.failure.threshold", "2") + self.updateConfiguration("kvm.ha.recover.timeout", "30") + self.updateConfiguration("kvm.ha.recover.wait.period", "30") + + self.service_offering = ServiceOffering.create( + self.apiclient, + self.services["service_offerings"]["hasmall"] + ) + + self.template = get_template( + self.apiclient, + self.zone.id, + self.services["ostype"] + ) + + self.cleanup = [self.service_offering] + + def updateConfiguration(self, name, value): + cmd = updateConfiguration.updateConfigurationCmd() + cmd.name = name + cmd.value = value + cmd.clusterid = self.cluster_id + self.apiclient.updateConfiguration(cmd) def getFakeMsId(self): return self.fakeMsId @@ -66,6 +102,7 @@ class TestHAKVM(cloudstackTestCase): return self.fakeMsId * 1000 def tearDown(self): + self.host = None try: self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) @@ -96,28 +133,19 @@ class TestHAKVM(cloudstackTestCase): continue self.fail(self) - def getHost(self): - response = list_hosts( - self.apiclient, - type='Routing', - resourcestate='Enabled' - ) - if response and len(response) > 0: - self.host = response[0] - return self.host - raise self.skipTest("No KVM hosts found, skipping host-ha test") - def getHost(self, hostId=None): response = list_hosts( self.apiclient, type='Routing', + hypervisor='kvm', id=hostId ) + # Check if more than one kvm hosts are available in order to successfully configure host-ha if response and len(response) > 0: self.host = response[0] return self.host - raise self.skipTest("No KVM hosts found, skipping host-ha test") + raise self.skipTest("Not enough KVM hosts found, skipping host-ha test") def getHostHaConfigCmd(self, provider='kvmhaprovider'): cmd = configureHAForHost.configureHAForHostCmd() @@ -125,22 +153,17 @@ class TestHAKVM(cloudstackTestCase): cmd.hostid = self.getHost().id return cmd - def getHostHaEnableCmd(self): - cmd = enableHAForHost.enableHAForHostCmd() - cmd.hostid = self.getHost().id - return cmd - def getHostHaDisableCmd(self): cmd = disableHAForHost.disableHAForHostCmd() cmd.hostid = self.getHost().id return cmd def configureAndEnableHostHa(self, initialize=True): + #Adding sleep between configuring and enabling self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + time.sleep(1) response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd()) self.assertEqual(response.haenable, True) - if initialize: - self.configureKVMHAProviderState(True, True, True, False) def configureAndDisableHostHa(self, hostId): self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) @@ -158,205 +181,48 @@ class TestHAKVM(cloudstackTestCase): self.assertEqual(response.haenable, True) return response - def configureKVMHAProviderState(self, health, activity, recover, fence): - cmd = configureHAForHost.configureHAForHostCmd() - cmd.hostid = self.getHost().id - cmd.health = health - cmd.activity = activity - cmd.recover = recover - cmd.fence = fence - response = self.apiclient.configureKVMHAProviderState(cmd) - self.assertEqual(response.success, 'true') + def disableAgent(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("systemctl disable cloudstack-agent || chkconfig cloudstack-agent off") - def checkSyncToState(self, state, interval=5000): - def checkForStateSync(expectedState): - response = self.getHost(hostId=self.getHost().id).hostha - return response.hastate == expectedState, None + def resetHost(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("reboot") - sync_interval = 1 + int(interval) / 1000 - res, _ = wait_until(sync_interval, 10, checkForStateSync, state) + def enableAgent(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("systemctl enable cloudstack-agent || chkconfig cloudstack-agent on") + + def waitUntilHostInState(self, state="Available", interval=3): + def checkForState(expectedState): + response = self.getHost() + print("checkForState:: expected=%s, actual=%s" % (state, response.hostha)) + return response.hostha.hastate == expectedState, None + + res, _ = wait_until(interval, 200, checkForState, state) if not res: - self.fail("Failed to get host.hastate synced to expected state:" + state) - response = self.getHost(hostId=self.getHost().id).hostha - self.assertEqual(response.hastate, state) + self.fail("Failed to see host ha state in :" + state) - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_hostha_configure_invalid_provider(self): - """ - Tests host-ha configuration with invalid driver - """ - cmd = self.getHostHaConfigCmd() - cmd.provider = 'randomDriverThatDoesNotExist' - try: - response = self.apiclient.configureHAForHost(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") + def deployVM(self): + vm = VirtualMachine.create( + self.apiclient, + services=self.services["virtual_machine"], + serviceofferingid=self.service_offering.id, + templateid=self.template.id, + zoneid=self.zone.id, + hostid = self.host.id, + method="POST" + ) - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_hostha_configure_default_driver(self): - """ - Tests host-ha configuration with valid data - """ - cmd = self.getHostHaConfigCmd() - response = self.apiclient.configureHAForHost(cmd) - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haprovider, cmd.provider.lower()) - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_ha_enable_feature_invalid(self): - """ - Tests ha feature enable command with invalid options - """ - cmd = self.getHostHaEnableCmd() - cmd.hostid = -1 - try: - response = self.apiclient.enableHAForHost(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - try: - cmd = enableHAForCluster.enableHAForClusterCmd() - response = self.apiclient.enableHAForCluster(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - try: - cmd = enableHAForZone.enableHAForZoneCmd() - response = self.apiclient.enableHAForZone(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_ha_disable_feature_invalid(self): - """ - Tests ha feature disable command with invalid options - """ - cmd = self.getHostHaDisableCmd() - cmd.hostid = -1 - try: - response = self.apiclient.disableHAForHost(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - try: - cmd = disableHAForCluster.disableHAForClusterCmd() - response = self.apiclient.disableHAForCluster(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - try: - cmd = disableHAForZone.disableHAForZoneCmd() - response = self.apiclient.disableHAForZone(cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_hostha_enable_feature_valid(self): - """ - Tests host-ha enable feature with valid options - """ - self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) - cmd = self.getHostHaEnableCmd() - response = self.apiclient.enableHAForHost(cmd) - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, True) - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_hostha_disable_feature_valid(self): - """ - Tests host-ha disable feature with valid options - """ - self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) - cmd = self.getHostHaDisableCmd() - response = self.apiclient.disableHAForHost(cmd) - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, False) - - response = self.getHost(hostId=cmd.hostid).hostha - self.assertEqual(response.hastate, 'Disabled') - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_configure_ha_provider_invalid(self): - """ - Tests configure HA Provider with invalid provider options - """ - - # Enable ha for host - self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) - cmd = self.getHostHaEnableCmd() - response = self.apiclient.enableHAForHost(cmd) - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, True) - - host = self.getHost(response.hostid) - - # Setup wrong configuration for the host - conf_ha_cmd = configureHAForHost.configureHAForHostCmd() - if host.hypervisor.lower() in "simulator": - conf_ha_cmd.provider = "kvmhaprovider" - if host.hypervisor.lower() in "kvm": - conf_ha_cmd.provider = "simulatorhaprovider" - - conf_ha_cmd.hostid = cmd.hostid - - # Call the configure HA provider API with not supported provider for HA - try: - self.apiclient.configureHAForHost(conf_ha_cmd) - except Exception: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_configure_ha_provider_valid(self): - """ - Tests configure HA Provider with valid provider options - """ - - # Enable ha for host - self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) - cmd = self.getHostHaEnableCmd() - response = self.apiclient.enableHAForHost(cmd) - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, True) - - host = self.getHost(response.hostid) - - # Setup configuration for the host - conf_ha_cmd = configureHAForHost.configureHAForHostCmd() - if host.hypervisor.lower() in "kvm": - conf_ha_cmd.provider = "kvmhaprovider" - if host.hypervisor.lower() in "simulator": - conf_ha_cmd.provider = "simulatorhaprovider" - - conf_ha_cmd.hostid = cmd.hostid - - # Call the configure HA provider API with not supported provider for HA - response = self.apiclient.configureHAForHost(conf_ha_cmd) - - # Check the response contains the set provider and hostID - self.assertEqual(response.haprovider, conf_ha_cmd.provider) - self.assertEqual(response.hostid, conf_ha_cmd.hostid) + self.cleanup.append(vm) @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") def test_disable_oobm_ha_state_ineligible(self): """ Tests that when HA is enabled for a host, if oobm is disabled HA State should turn into Ineligible """ + self.logger.debug("Starting test_disable_oobm_ha_state_ineligible") # Enable ha for host self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) @@ -385,59 +251,20 @@ class TestHAKVM(cloudstackTestCase): """ Tests host-ha configuration with valid data """ + self.logger.debug("Starting test_hostha_configure_default_driver") + cmd = self.getHostHaConfigCmd() response = self.apiclient.configureHAForHost(cmd) self.assertEqual(response.hostid, cmd.hostid) self.assertEqual(response.haprovider, cmd.provider.lower()) @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_enable_ha_when_host_powerstate_on(self): - """ - Tests that when HA is enabled for a host, if oobm state is on HA State should turn into Available - """ - - self.configureAndStartIpmiServer() - - self.assertIssueCommandState('ON', 'On') - - self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) - cmd = self.getHostHaEnableCmd() - response = self.apiclient.enableHAForHost(cmd) - self.assertEqual(response.hostid, cmd.hostid) - self.assertEqual(response.haenable, True) - - # Verify HA State is Available - self.check_host_transition_to_available() - - response = self.getHost() - if response.hostha.hastate is not "Available": - print response - - self.assertEqual(response.hostha.hastate, "Available") - - self.stopIpmiServer() - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_hostha_enable_feature_without_setting_provider(self): - """ - Tests Enable HA without setting the provider, Exception is thrown - """ - host = self.get_non_configured_ha_host() - cmd = self.getHostHaEnableCmd() - cmd.hostid = host.id - - try: - self.apiclient.enableHAForHost(cmd) - except Exception as e: - pass - else: - self.fail("Expected an exception to be thrown, failing") - - @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="treu") def test_hostha_enable_ha_when_host_disabled(self): """ Tests Enable HA when host is disconnected, should be Ineligible """ + self.logger.debug("Starting test_hostha_enable_ha_when_host_disabled") + # Enable HA self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) cmd = self.getHostHaEnableCmd() @@ -461,46 +288,45 @@ class TestHAKVM(cloudstackTestCase): self.enableHost(self.host.id) @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") - def test_hostha_enable_ha_when_host_inMaintenance(self): + def test_hostha_enable_ha_when_host_in_maintenance(self): """ Tests Enable HA when host is in Maintenance mode, should be Ineligible """ - - host = self.getHost() + self.logger.debug("Starting test_hostha_enable_ha_when_host_in_maintenance") # Enable HA self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) cmd = self.getHostHaEnableCmd() - cmd.hostid = host.id + cmd.hostid = self.host.id enable = self.apiclient.enableHAForHost(cmd) self.assertEqual(enable.hostid, cmd.hostid) self.assertEqual(enable.haenable, True) # Prepare for maintenance Host - self.setHostToMaintanance(host.id) + self.setHostToMaintanance(self.host.id) # Check HA State try: - response = self.getHost(host.id) + response = self.getHost(self.host.id) self.assertEqual(response.hostha.hastate, "Ineligible") except Exception as e: - self.cancelMaintenance(host.id) + self.cancelMaintenance() self.fail(e) # Enable Host - self.cancelMaintenance(host.id) + self.cancelMaintenance() @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") def test_hostha_enable_ha_when_host_disconected(self): """ Tests Enable HA when host is disconnected, should be Ineligible """ - host = self.getHost() + self.logger.debug("Starting test_hostha_enable_ha_when_host_disconected") # Enable HA self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) cmd = self.getHostHaEnableCmd() - cmd.hostid = host.id + cmd.hostid = self.host.id enable = self.apiclient.enableHAForHost(cmd) self.assertEqual(enable.hostid, cmd.hostid) self.assertEqual(enable.haenable, True) @@ -525,13 +351,13 @@ class TestHAKVM(cloudstackTestCase): """ Tests HA Provider should be possible to be removed when HA is enabled """ + self.logger.debug("Starting test_remove_ha_provider_not_possible") - host = self.getHost() # Enable HA self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) cmd = self.getHostHaEnableCmd() - cmd.hostid = host.id + cmd.hostid = self.host.id enable = self.apiclient.enableHAForHost(cmd) self.assertEqual(enable.hostid, cmd.hostid) self.assertEqual(enable.haenable, True) @@ -543,6 +369,134 @@ class TestHAKVM(cloudstackTestCase): else: self.fail("Expected an exception to be thrown, failing") + @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_kvm_host_degraded(self): + """ + Tests degraded HA state when agent is stopped/killed + """ + + self.configureAndStartIpmiServer() + self.assertIssueCommandState('ON', 'On') + self.configureAndEnableHostHa() + + self.deployVM() + + # Start with the available state + self.waitUntilHostInState("Available") + + # SSH into the KVM Host and executes kill -9 of the agent + self.stopAgent() + + # Check if host would go into Suspect state + try: + self.waitUntilHostInState("Suspect") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Checks if the host would turn into Degraded + try: + self.waitUntilHostInState("Degraded") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + self.startAgent() + self.waitUntilHostInState("Available") + + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_kvm_host_recovering(self): + """ + Tests recovery and fencing HA state transitions + """ + self.configureAndStartIpmiServer() + self.assertIssueCommandState('ON', 'On') + self.configureAndEnableHostHa() + + self.deployVM() + + # Start with the available state + self.waitUntilHostInState("Available") + + # Kill host by triggering a fault + self.killAgent() + self.disableAgent() + self.resetHost() + + # Check if host would go into Suspect state + try: + self.waitUntilHostInState("Suspect") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Checks if the host would turn into Recovered + try: + self.waitUntilHostInState("Recovered") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + self.enableAgent() + self.startAgent() + self.waitUntilHostInState("Available") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_kvm_host_fencing(self): + """ + Tests fencing/fenced HA state when host crashes + """ + self.logger.debug("Starting test_ha_kvm_host_fencing") + + self.configureAndStartIpmiServer() + self.assertIssueCommandState('ON', 'On') + self.configureAndEnableHostHa() + + self.deployVM() + + # Start with the available state + self.waitUntilHostInState("Available") + + # Fail oobm commands + cmd = self.getOobmConfigCmd() + cmd.address = "1.1.1.1" + self.apiclient.configureOutOfBandManagement(cmd) + + # Kill host by triggering a fault + self.killAgent() + self.disableAgent() + self.resetHost() + + # Check if host would go into Suspect state + try: + self.waitUntilHostInState("Suspect") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Checks if the host would turn into Fencing + try: + self.waitUntilHostInState("Fencing") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Allow oobm commands to work now + self.configureAndEnableOobm() + + # Checks if the host would turn into Fenced + try: + self.waitUntilHostInState("Fenced") + except Exception as e: + self.startAgent() + raise Exception("Warning: Exception during test execution : %s" % e) + + self.enableAgent() + self.startAgent() + self.cancelMaintenance() + self.waitUntilHostInState("Available") + def configureAndStartIpmiServer(self, power_state=None): """ Setup ipmisim and enable out-of-band management for host @@ -657,7 +611,18 @@ class TestHAKVM(cloudstackTestCase): host = self.getHost() SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute \ - ("service cloudstack-agent start") + ("systemctl start cloudstack-agent || service cloudstack-agent start") + + def stopAgent(self): + host = self.getHost() + SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("systemctl stop cloudstack-agent || service cloudstack-agent stop") + + def killAgent(self): + host = self.getHost() + SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("kill -9 $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") def disableHost(self, id): @@ -686,15 +651,9 @@ class TestHAKVM(cloudstackTestCase): self.assertEqual(response.resourcestate, "PrepareForMaintenance") - def cancelMaintenance(self, id): + def cancelMaintenance(self): cmd = cancelHostMaintenance.cancelHostMaintenanceCmd() - cmd.id = id - + cmd.id = self.host.id response = self.apiclient.cancelHostMaintenance(cmd) self.assertEqual(response.resourcestate, "Enabled") - - def killAgent(self): - host = self.getHost() - SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ - ("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") diff --git a/test/integration/smoke/test_hostha_simulator.py b/test/integration/smoke/test_hostha_simulator.py index 82163b33fd5..bb5fcb97c37 100644 --- a/test/integration/smoke/test_hostha_simulator.py +++ b/test/integration/smoke/test_hostha_simulator.py @@ -23,8 +23,6 @@ from marvin.lib.base import * from marvin.lib.common import * from nose.plugins.attrib import attr -import random - from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer import random @@ -35,7 +33,7 @@ import time class TestHostHA(cloudstackTestCase): - """ Test cases for host HA using Simulator host(s) + """ Test host-ha business logic using Simulator """ def setUp(self): @@ -61,8 +59,12 @@ class TestHostHA(cloudstackTestCase): self.cleanup = [] + def tearDown(self): try: + host = self.getHost() + self.configureAndDisableHostHa(host.id) + self.host = None self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") @@ -73,12 +75,15 @@ class TestHostHA(cloudstackTestCase): except Exception as e: raise Exception("Warning: Exception during cleanup : %s" % e) + def getFakeMsId(self): return self.fakeMsId + def getFakeMsRunId(self): return self.fakeMsId * 1000 + def getHost(self, hostId=None): if self.host and hostId is None: return self.host @@ -90,10 +95,13 @@ class TestHostHA(cloudstackTestCase): resourcestate='Enabled', id=hostId ) + if response and len(response) > 0: + random.shuffle(response) self.host = response[0] return self.host - raise self.skipTest("No simulator hosts found, skipping host-ha test") + raise self.skipTest("No suitable hosts found, skipping host-ha test") + def getHostHaConfigCmd(self, provider='simulatorhaprovider'): cmd = configureHAForHost.configureHAForHostCmd() @@ -101,16 +109,25 @@ class TestHostHA(cloudstackTestCase): cmd.hostid = self.getHost().id return cmd + def getHostHaEnableCmd(self): cmd = enableHAForHost.enableHAForHostCmd() cmd.hostid = self.getHost().id return cmd + def getHostHaDisableCmd(self): cmd = disableHAForHost.disableHAForHostCmd() cmd.hostid = self.getHost().id return cmd + + def getListHostHAResources(self): + cmd = listHostHAResources.listHostHAResourcesCmd() + cmd.hostid = self.getHost().id + return cmd + + def configureAndEnableHostHa(self, initialize=True): self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd()) @@ -118,6 +135,7 @@ class TestHostHA(cloudstackTestCase): if initialize: self.configureSimulatorHAProviderState(True, True, True, False) + def configureAndDisableHostHa(self, hostId): self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) cmd = self.getHostHaDisableCmd() @@ -126,6 +144,7 @@ class TestHostHA(cloudstackTestCase): self.assertEqual(response.hostid, cmd.hostid) self.assertEqual(response.haenable, False) + def enableHostHa(self, hostId): cmd = self.getHostHaEnableCmd() cmd.hostid = hostId @@ -133,6 +152,7 @@ class TestHostHA(cloudstackTestCase): self.assertEqual(response.hostid, cmd.hostid) self.assertEqual(response.haenable, True) + def configureSimulatorHAProviderState(self, health, activity, recover, fence): cmd = configureSimulatorHAProviderState.configureSimulatorHAProviderStateCmd() cmd.hostid = self.getHost().id @@ -143,24 +163,28 @@ class TestHostHA(cloudstackTestCase): response = self.apiclient.configureSimulatorHAProviderState(cmd) self.assertEqual(response.success, 'true') + def getSimulatorHAStateTransitions(self, hostId): cmd = listSimulatorHAStateTransitions.listSimulatorHAStateTransitionsCmd() cmd.hostid = hostId return self.apiclient.listSimulatorHAStateTransitions(cmd) + def checkSyncToState(self, state, interval=5000): def checkForStateSync(expectedState): response = self.getHost(hostId=self.getHost().id).hostha + print("checkForStateSync:: response=%s, expected=%s" % (response, expectedState)) return response.hastate == expectedState, None sync_interval = 1 + int(interval) / 1000 - res, _ = wait_until(sync_interval, 50, checkForStateSync, state) + res, _ = wait_until(sync_interval, 100, checkForStateSync, state) if not res: self.fail("Failed to get host.hastate synced to expected state:" + state) response = self.getHost(hostId=self.getHost().id).hostha self.assertEqual(response.hastate, state) - def get_non_configured_ha_host(self): + + def getNonConfiguredHaHost(self): response = list_hosts( self.apiclient, type='Routing' @@ -171,12 +195,13 @@ class TestHostHA(cloudstackTestCase): else: return None + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_hostha_enable_feature_without_setting_provider(self): """ Tests Enable HA without setting the provider, Exception is thrown """ - host = self.get_non_configured_ha_host() + host = self.getNonConfiguredHaHost() if host is None: cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.") @@ -191,6 +216,7 @@ class TestHostHA(cloudstackTestCase): else: self.fail("Expected an exception to be thrown, failing") + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_list_providers(self): """ @@ -206,6 +232,7 @@ class TestHostHA(cloudstackTestCase): response = self.apiclient.listHostHAProviders(cmd)[0] self.assertEqual(response.haprovider, 'KVMHAProvider') + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_hostha_configure_invalid_provider(self): """ @@ -220,6 +247,7 @@ class TestHostHA(cloudstackTestCase): else: self.fail("Expected an exception to be thrown, failing") + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_hostha_configure_default_driver(self): """ @@ -230,6 +258,7 @@ class TestHostHA(cloudstackTestCase): self.assertEqual(response.hostid, cmd.hostid) self.assertEqual(response.haprovider, cmd.provider.lower()) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_enable_feature_invalid(self): """ @@ -258,6 +287,7 @@ class TestHostHA(cloudstackTestCase): else: self.fail("Expected an exception to be thrown, failing") + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_disable_feature_invalid(self): """ @@ -287,6 +317,7 @@ class TestHostHA(cloudstackTestCase): else: self.fail("Expected an exception to be thrown, failing") + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_hostha_enable_feature_valid(self): """ @@ -298,6 +329,7 @@ class TestHostHA(cloudstackTestCase): self.assertEqual(response.hostid, cmd.hostid) self.assertEqual(response.haenable, True) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_hostha_disable_feature_valid(self): """ @@ -312,15 +344,16 @@ class TestHostHA(cloudstackTestCase): response = self.getHost(hostId=cmd.hostid).hostha self.assertEqual(response.hastate, 'Disabled') + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") - def test_ha_enabledisable_across_clusterzones(self): + def test_ha_configure_enabledisable_across_clusterzones(self): """ Tests ha enable/disable feature at cluster and zone level Zone > Cluster > Host """ + host = self.getHost() self.configureAndEnableHostHa() - host = self.getHost() self.checkSyncToState('Available') response = self.getHost(hostId=host.id).hostha self.assertTrue(response.hastate == 'Available') @@ -366,12 +399,16 @@ class TestHostHA(cloudstackTestCase): # Check state sync self.checkSyncToState('Available') + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_multiple_mgmt_server_ownership(self): """ Tests ha resource ownership expiry across multi-mgmt server """ - self.configureAndEnableHostHa() + host = self.getHost() + self.configureAndDisableHostHa(host.id) + self.configureSimulatorHAProviderState(True, True, True, False) + self.configureAndEnableHostHa(False) cloudstackVersion = Configurations.listCapabilities(self.apiclient).cloudstackversion @@ -435,23 +472,34 @@ class TestHostHA(cloudstackTestCase): newOwnerId = result[0][0] self.assertTrue(newOwnerId in currentMsHosts) + def checkFSMTransition(self, transition, event, haState, prevHaState, hasActiviyCounter, hasRecoveryCounter): + print("checkFSMTransition:: transition=%s, event=%s, state=%s" % (transition, event, haState)) self.assertEqual(transition.event, event) self.assertEqual(transition.hastate, haState) self.assertEqual(transition.prevhastate, prevHaState) - if hasActiviyCounter: + + if hasActiviyCounter is None: + pass + elif hasActiviyCounter: self.assertTrue(transition.activitycounter > 0) else: self.assertEqual(transition.activitycounter, 0) - if hasRecoveryCounter: + + if hasRecoveryCounter is None: + pass + elif hasRecoveryCounter: self.assertTrue(transition.recoverycounter > 0) else: self.assertEqual(transition.recoverycounter, 0) + def findFSMTransitionToState(self, state, host): transitions = self.getSimulatorHAStateTransitions(host.id) if not transitions: + print("findFSMTransition:: no transitions returned") return False, (None, None, None) + previousTransition = None stateTransition = None nextTransition = None @@ -463,10 +511,13 @@ class TestHostHA(cloudstackTestCase): stateTransition = transition if not stateTransition: previousTransition = transition + + print("findFSMTransition:: prev=%s, cur=%s, next=%s, find state=%s" % (previousTransition, stateTransition, nextTransition, state)) if stateTransition: return True, (previousTransition, stateTransition, nextTransition,) return False, (previousTransition, stateTransition, nextTransition,) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_verify_fsm_available(self): """ @@ -479,12 +530,13 @@ class TestHostHA(cloudstackTestCase): self.configureSimulatorHAProviderState(True, True, True, False) self.configureAndEnableHostHa(False) - res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'available', host) + res, (_, T, _) = wait_until(3, 20, self.findFSMTransitionToState, 'available', host) if not res: self.fail("FSM did not transition to available state") self.checkFSMTransition(T, 'enabled', 'available', 'disabled', False, False) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_verify_fsm_degraded(self): """ @@ -494,26 +546,26 @@ class TestHostHA(cloudstackTestCase): Available->Suspect<->Checking->Degraded->Available """ host = self.getHost() + self.configureAndDisableHostHa(host.id) self.configureSimulatorHAProviderState(False, True, True, False) self.configureAndEnableHostHa(False) # Initial health check failure - res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'suspect', host) + res, (_, T, _) = wait_until(3, 50, self.findFSMTransitionToState, 'suspect', host) if not res: self.fail("FSM did not transition to suspect state") self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False) # Check transition to Degraded - res, (prevT, T, nextT) = wait_until(2, 50, self.findFSMTransitionToState, 'degraded', host) + res, (prevT, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'degraded', host) if not res: self.fail("FSM did not transition to degraded state") if prevT: self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False) self.checkFSMTransition(T, 'activitycheckfailureunderthresholdratio', 'degraded', 'checking', True, False) - if nextT: - self.checkFSMTransition(nextT, 'periodicrecheckresourceactivity', 'suspect', 'degraded', False, False) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_verify_fsm_recovering(self): @@ -524,36 +576,33 @@ class TestHostHA(cloudstackTestCase): Available->Suspect<->Checking->Recovering->Recovered<-retry-loop->->Fencing """ host = self.getHost() + self.configureAndDisableHostHa(host.id) self.configureSimulatorHAProviderState(False, False, True, False) self.configureAndEnableHostHa(False) # Initial health check failure - res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'suspect', host) + res, (_, T, _) = wait_until(3, 50, self.findFSMTransitionToState, 'suspect', host) if not res: self.fail("FSM did not transition to suspect state") self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False) # Check transition to recovering - res, (prevT, T, nextT) = wait_until(2, 100, self.findFSMTransitionToState, 'recovering', host) + res, (prevT, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'recovering', host) if not res: self.fail("FSM did not transition to recovering state") if prevT: self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False) self.checkFSMTransition(T, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, False) - if nextT: - self.checkFSMTransition(nextT, 'recovered', 'recovered', 'recovering', False, True) # Check transition to fencing due to recovery attempts exceeded - res, (prevT, T, nextT) = wait_until(2, 100, self.findFSMTransitionToState, 'fencing', host) + res, (_, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'fencing', host) if not res: self.fail("FSM did not transition to fencing state") - if prevT: - self.checkFSMTransition(prevT, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, - True) - self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True) + self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', None, True) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_ha_verify_fsm_fenced(self): @@ -568,15 +617,13 @@ class TestHostHA(cloudstackTestCase): self.configureAndEnableHostHa(False) # Check for transition to fenced - res, (prevT, T, _) = wait_until(2, 100, self.findFSMTransitionToState, 'fenced', host) + res, (prevT, T, _) = wait_until(3, 100, self.findFSMTransitionToState, 'fenced', host) if not res: self.fail("FSM did not transition to fenced state") self.checkFSMTransition(prevT, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True) self.checkFSMTransition(T, 'fenced', 'fenced', 'fencing', False, False) - # TODO: add test case for HA vm reboot checks - # Simulate manual recovery of host and cancel maintenance mode self.configureSimulatorHAProviderState(True, True, True, False) cancelCmd = cancelHostMaintenance.cancelHostMaintenanceCmd() @@ -584,13 +631,13 @@ class TestHostHA(cloudstackTestCase): self.apiclient.cancelHostMaintenance(cancelCmd) # Check for transition to available after manual recovery - res, (prevT, T, _) = wait_until(2, 100, self.findFSMTransitionToState, 'available', host) + res, (prevT, T, _) = wait_until(3, 50, self.findFSMTransitionToState, 'available', host) if not res: self.fail("FSM did not transition to available state") - self.checkFSMTransition(prevT, 'healthcheckpassed', 'ineligible', 'fenced', False, False) self.checkFSMTransition(T, 'eligible', 'available', 'ineligible', False, False) + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_configure_ha_provider_invalid(self): """ @@ -623,6 +670,7 @@ class TestHostHA(cloudstackTestCase): else: self.fail("Expected an exception to be thrown, failing") + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") def test_configure_ha_provider_valid(self): """ @@ -654,3 +702,75 @@ class TestHostHA(cloudstackTestCase): # Check the response contains the set provider and hostID self.assertEqual(response.haprovider, conf_ha_cmd.provider) self.assertEqual(response.hostid, conf_ha_cmd.hostid) + + + def getHaProvider(self, host): + cmd = listHostHAProviders.listHostHAProvidersCmd() + cmd.hypervisor = host.hypervisor + response = self.apiclient.listHostHAProviders(cmd) + return response[0].haprovider + + + def configureHaProvider(self): + cmd = self.getHostHaConfigCmd(self.getHaProvider(self.getHost())) + return self.apiclient.configureHAForHost(cmd) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host(self): + """ + Test that verifies the listHAForHost API + """ + self.configureHaProvider() + db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config") + + cmd = self.getListHostHAResources() + del cmd.hostid + response = self.apiclient.listHostHAResources(cmd) + + self.assertEqual(db_count[0][0], len(response)) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host_valid(self): + """ + Valid test for listing a specific host HA resources + """ + + self.configureHaProvider() + cmd = self.getListHostHAResources() + response = self.apiclient.listHostHAResources(cmd) + self.assertEqual(response[0].hostid, cmd.hostid) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host_invalid(self): + """ + Test that listHostHAResources is returning exception when called with invalid data + """ + + self.configureHaProvider() + cmd = self.getListHostHAResources() + cmd.hostid = "someinvalidvalue" + + try: + response = self.apiclient.listHostHAResources(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") diff --git a/tools/travis/install.sh b/tools/travis/install.sh index 61cdec00849..d759528208f 100755 --- a/tools/travis/install.sh +++ b/tools/travis/install.sh @@ -29,7 +29,7 @@ if [ $TEST_SEQUENCE_NUMBER -eq 1 ]; then cd nonoss && bash -x install-non-oss.sh && cd .. git clean -fdx . mvn -P developer,systemvm -Dsimulator -Dnoredist --projects='org.apache.cloudstack:cloudstack' org.apache.rat:apache-rat-plugin:0.12:check - mvn -P developer,systemvm -Dsimulator -Dnoredist clean install + mvn -q -B -P developer,systemvm -Dsimulator -Dnoredist clean install else mvn -Pdeveloper -Dsimulator clean install -DskipTests -T4 | egrep "Building|Tests|SUCCESS|FAILURE" fi diff --git a/tools/travis/script.sh b/tools/travis/script.sh index 48eb3f6b688..c370225bb56 100755 --- a/tools/travis/script.sh +++ b/tools/travis/script.sh @@ -27,6 +27,7 @@ TESTS=($@) echo "Running tests: " ${TESTS[@]} for suite in "${TESTS[@]}" ; do + echo "Currently running test: $suite" nosetests --with-xunit --xunit-file=integration-test-results/$suite.xml --with-marvin --marvin-config=setup/dev/advanced.cfg test/integration/$suite.py -s -a tags=advanced,required_hardware=false --zone=Sandbox-simulator --hypervisor=simulator || true ; done