mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Merge pull request #1496 from shapeblue/kvm-ha
CLOUDSTACK-9350: KVM-HA- Fix CheckOnHost for Local storage- KVM-HA- Fix CheckOnHost for Local storage - Also skip HA on VMs that are using local storage * pr/1496: CLOUDSTACK-9350: KVM-HA- Fix CheckOnHost for Local storage - Also skip HA on VMs that are using local storage Signed-off-by: Will Stevens <williamstevens@gmail.com>
This commit is contained in:
commit
fa3bce5a83
@ -27,7 +27,11 @@ import com.cloud.host.Status;
|
||||
import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.hypervisor.Hypervisor;
|
||||
import com.cloud.resource.ResourceManager;
|
||||
import com.cloud.storage.Storage.StoragePoolType;
|
||||
import com.cloud.utils.component.AdapterBase;
|
||||
|
||||
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
|
||||
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
@ -41,6 +45,8 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
|
||||
AgentManager _agentMgr;
|
||||
@Inject
|
||||
ResourceManager _resourceMgr;
|
||||
@Inject
|
||||
PrimaryDataStoreDao _storagePoolDao;
|
||||
|
||||
@Override
|
||||
public boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) throws UnknownVM {
|
||||
@ -60,6 +66,21 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
|
||||
if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<StoragePoolVO> clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId());
|
||||
boolean hasNfs = false;
|
||||
for (StoragePoolVO pool : clusterPools) {
|
||||
if (pool.getPoolType() == StoragePoolType.NetworkFilesystem) {
|
||||
hasNfs = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasNfs) {
|
||||
s_logger.warn(
|
||||
"Agent investigation was requested on host " + agent + ", but host does not support investigation because it has no NFS storage. Skipping investigation.");
|
||||
return Status.Disconnected;
|
||||
}
|
||||
|
||||
Status hostStatus = null;
|
||||
Status neighbourStatus = null;
|
||||
CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
|
||||
@ -78,7 +99,8 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
|
||||
|
||||
List<HostVO> neighbors = _resourceMgr.listHostsInClusterByStatus(agent.getClusterId(), Status.Up);
|
||||
for (HostVO neighbor : neighbors) {
|
||||
if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) {
|
||||
if (neighbor.getId() == agent.getId()
|
||||
|| (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) {
|
||||
continue;
|
||||
}
|
||||
s_logger.debug("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId());
|
||||
|
||||
1
pom.xml
1
pom.xml
@ -921,6 +921,7 @@
|
||||
<exclude>**/.checkstyle</exclude>
|
||||
<exclude>scripts/installer/windows/acs_license.rtf</exclude>
|
||||
<exclude>**/*.md</exclude>
|
||||
<exclude>test/integration/component/test_host_ha.sh</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
@ -62,6 +62,7 @@ import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.hypervisor.Hypervisor.HypervisorType;
|
||||
import com.cloud.resource.ResourceManager;
|
||||
import com.cloud.server.ManagementServer;
|
||||
import com.cloud.service.ServiceOfferingVO;
|
||||
import com.cloud.service.dao.ServiceOfferingDao;
|
||||
import com.cloud.storage.StorageManager;
|
||||
import com.cloud.storage.dao.GuestOSCategoryDao;
|
||||
@ -264,6 +265,13 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai
|
||||
"Host [" + hostDesc + "] is down." + ((sb != null) ? sb.toString() : ""));
|
||||
|
||||
for (VMInstanceVO vm : reorderedVMList) {
|
||||
ServiceOfferingVO vmOffering = _serviceOfferingDao.findById(vm.getServiceOfferingId());
|
||||
if (vmOffering.getUseLocalStorage()) {
|
||||
if (s_logger.isDebugEnabled()){
|
||||
s_logger.debug("Skipping HA on vm " + vm + ", because it uses local storage. Its fate is tied to the host.");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("Notifying HA Mgr of to restart vm " + vm.getId() + "-" + vm.getInstanceName());
|
||||
}
|
||||
|
||||
@ -59,6 +59,7 @@ import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.hypervisor.Hypervisor.HypervisorType;
|
||||
import com.cloud.resource.ResourceManager;
|
||||
import com.cloud.server.ManagementServer;
|
||||
import com.cloud.service.ServiceOfferingVO;
|
||||
import com.cloud.service.dao.ServiceOfferingDao;
|
||||
import com.cloud.storage.StorageManager;
|
||||
import com.cloud.storage.dao.GuestOSCategoryDao;
|
||||
@ -195,6 +196,7 @@ public class HighAvailabilityManagerImplTest {
|
||||
Mockito.when(_dcDao.findById(Mockito.anyLong())).thenReturn(Mockito.mock(DataCenterVO.class));
|
||||
Mockito.when(_haDao.findPreviousHA(Mockito.anyLong())).thenReturn(Arrays.asList(Mockito.mock(HaWorkVO.class)));
|
||||
Mockito.when(_haDao.persist((HaWorkVO)Mockito.anyObject())).thenReturn(Mockito.mock(HaWorkVO.class));
|
||||
Mockito.when(_serviceOfferingDao.findById(vm1.getServiceOfferingId())).thenReturn(Mockito.mock(ServiceOfferingVO.class));
|
||||
|
||||
highAvailabilityManager.scheduleRestartForVmsOnHost(hostVO, true);
|
||||
}
|
||||
|
||||
516
test/integration/component/test_host_ha.py
Normal file
516
test/integration/component/test_host_ha.py
Normal file
@ -0,0 +1,516 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from __builtin__ import False
|
||||
""" BVT tests for Hosts Maintenance
|
||||
"""
|
||||
|
||||
# Import Local Modules
|
||||
from marvin.codes import FAILED
|
||||
from marvin.cloudstackTestCase import *
|
||||
from marvin.cloudstackAPI import *
|
||||
from marvin.lib.utils import *
|
||||
from marvin.lib.base import *
|
||||
from marvin.lib.common import *
|
||||
from nose.plugins.attrib import attr
|
||||
|
||||
from time import sleep
|
||||
|
||||
_multiprocess_shared_ = False
|
||||
|
||||
|
||||
class TestHostHA(cloudstackTestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.logger = logging.getLogger('TestHM')
|
||||
self.stream_handler = logging.StreamHandler()
|
||||
self.logger.setLevel(logging.DEBUG)
|
||||
self.logger.addHandler(self.stream_handler)
|
||||
self.apiclient = self.testClient.getApiClient()
|
||||
self.hypervisor = self.testClient.getHypervisorInfo()
|
||||
self.dbclient = self.testClient.getDbConnection()
|
||||
self.services = self.testClient.getParsedTestDataConfig()
|
||||
self.zone = get_zone(self.apiclient, self.testClient.getZoneForTests())
|
||||
self.pod = get_pod(self.apiclient, self.zone.id)
|
||||
self.cleanup = []
|
||||
self.services = {
|
||||
"service_offering": {
|
||||
"name": "Ultra Tiny Instance",
|
||||
"displaytext": "Ultra Tiny Instance",
|
||||
"cpunumber": 1,
|
||||
"cpuspeed": 100,
|
||||
"memory": 128,
|
||||
},
|
||||
"service_offering_local": {
|
||||
"name": "Ultra Tiny Local Instance",
|
||||
"displaytext": "Ultra Tiny Local Instance",
|
||||
"cpunumber": 1,
|
||||
"cpuspeed": 100,
|
||||
"memory": 128,
|
||||
"storagetype": "local"
|
||||
},
|
||||
"vm": {
|
||||
"username": "root",
|
||||
"password": "password",
|
||||
"ssh_port": 22,
|
||||
# Hypervisor type should be same as
|
||||
# hypervisor type of cluster
|
||||
"privateport": 22,
|
||||
"publicport": 22,
|
||||
"protocol": 'TCP',
|
||||
},
|
||||
"natrule": {
|
||||
"privateport": 22,
|
||||
"publicport": 22,
|
||||
"startport": 22,
|
||||
"endport": 22,
|
||||
"protocol": "TCP",
|
||||
"cidrlist": '0.0.0.0/0',
|
||||
},
|
||||
"ostype": 'CentOS 5.3 (64-bit)',
|
||||
"sleep": 60,
|
||||
"timeout": 10,
|
||||
}
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
try:
|
||||
# Clean up, terminate the created templates
|
||||
cleanup_resources(self.apiclient, self.cleanup)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception("Warning: Exception during cleanup : %s" % e)
|
||||
|
||||
return
|
||||
|
||||
def createVMs(self, hostId, number, local):
|
||||
|
||||
self.template = get_template(
|
||||
self.apiclient,
|
||||
self.zone.id,
|
||||
self.services["ostype"]
|
||||
)
|
||||
|
||||
if self.template == FAILED:
|
||||
assert False, "get_template() failed to return template with description %s" % self.services["ostype"]
|
||||
|
||||
self.logger.debug("Using template %s " % self.template.id)
|
||||
|
||||
if local:
|
||||
self.service_offering = ServiceOffering.create(
|
||||
self.apiclient,
|
||||
self.services["service_offering_local"]
|
||||
)
|
||||
else:
|
||||
self.service_offering = ServiceOffering.create(
|
||||
self.apiclient,
|
||||
self.services["service_offering"]
|
||||
)
|
||||
|
||||
|
||||
self.logger.debug("Using service offering %s " % self.service_offering.id)
|
||||
|
||||
vms = []
|
||||
for i in range(0, number):
|
||||
self.services["vm"]["zoneid"] = self.zone.id
|
||||
self.services["vm"]["template"] = self.template.id
|
||||
self.services["vm"]["displayname"] = 'vm' + str(i)
|
||||
self.services["vm"]["hypervisor"] = self.hypervisor
|
||||
vm = VirtualMachine.create(
|
||||
self.apiclient,
|
||||
self.services["vm"],
|
||||
serviceofferingid=self.service_offering.id,
|
||||
hostid=hostId
|
||||
)
|
||||
vms.append(vm)
|
||||
self.cleanup.append(vm)
|
||||
self.logger.debug("VM create = {}".format(vm.id))
|
||||
return vm
|
||||
|
||||
def noOfVMsOnHost(self, hostId):
|
||||
listVms = VirtualMachine.list(
|
||||
self.apiclient,
|
||||
hostid=hostId
|
||||
)
|
||||
vmnos = 0
|
||||
if (listVms is not None):
|
||||
for vm in listVms:
|
||||
self.logger.debug('VirtualMachine on Hyp 1 = {}'.format(vm.id))
|
||||
vmnos = vmnos + 1
|
||||
|
||||
return vmnos
|
||||
|
||||
def checkHostDown(self, fromHostIp, testHostIp):
|
||||
try:
|
||||
ssh = SshClient(fromHostIp, 22, "root", "password")
|
||||
res = ssh.execute("ping -c 1 %s" % testHostIp)
|
||||
result = str(res)
|
||||
if result.count("100% packet loss") == 1:
|
||||
return True, 1
|
||||
else:
|
||||
return False, 1
|
||||
except Exception as e:
|
||||
self.logger.debug("Got exception %s" % e)
|
||||
return False, 1
|
||||
|
||||
def checkHostUp(self, fromHostIp, testHostIp):
|
||||
try:
|
||||
ssh = SshClient(fromHostIp, 22, "root", "password")
|
||||
res = ssh.execute("ping -c 1 %s" % testHostIp)
|
||||
result = str(res)
|
||||
if result.count(" 0% packet loss") == 1:
|
||||
return True, 1
|
||||
else:
|
||||
return False, 1
|
||||
except Exception as e:
|
||||
self.logger.debug("Got exception %s" % e)
|
||||
return False, 1
|
||||
|
||||
|
||||
def isOnlyNFSStorageAvailable(self):
|
||||
if self.zone.localstorageenabled:
|
||||
return False
|
||||
storage_pools = StoragePool.list(
|
||||
self.apiclient,
|
||||
zoneid=self.zone.id,
|
||||
listall=True
|
||||
)
|
||||
self.assertEqual(
|
||||
isinstance(storage_pools, list),
|
||||
True,
|
||||
"Check if listStoragePools returns a valid response"
|
||||
)
|
||||
for storage_pool in storage_pools:
|
||||
if storage_pool.type == u'NetworkFilesystem':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def isOnlyLocalStorageAvailable(self):
|
||||
if not(self.zone.localstorageenabled):
|
||||
return False
|
||||
|
||||
storage_pools = StoragePool.list(
|
||||
self.apiclient,
|
||||
zoneid=self.zone.id,
|
||||
listall=True
|
||||
)
|
||||
self.assertEqual(
|
||||
isinstance(storage_pools, list),
|
||||
True,
|
||||
"Check if listStoragePools returns a valid response"
|
||||
)
|
||||
for storage_pool in storage_pools:
|
||||
if storage_pool.type == u'NetworkFilesystem':
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def isLocalAndNFSStorageAvailable(self):
|
||||
if not(self.zone.localstorageenabled):
|
||||
return False
|
||||
|
||||
storage_pools = StoragePool.list(
|
||||
self.apiclient,
|
||||
zoneid=self.zone.id,
|
||||
listall=True
|
||||
)
|
||||
self.assertEqual(
|
||||
isinstance(storage_pools, list),
|
||||
True,
|
||||
"Check if listStoragePools returns a valid response"
|
||||
)
|
||||
for storage_pool in storage_pools:
|
||||
if storage_pool.type == u'NetworkFilesystem':
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def checkHostStateInCloudstack(self, state, hostId):
|
||||
try:
|
||||
listHost = Host.list(
|
||||
self.apiclient,
|
||||
type='Routing',
|
||||
zoneid=self.zone.id,
|
||||
podid=self.pod.id,
|
||||
id=hostId
|
||||
)
|
||||
self.assertEqual(
|
||||
isinstance(listHost, list),
|
||||
True,
|
||||
"Check if listHost returns a valid response"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
len(listHost),
|
||||
1,
|
||||
"Check if listHost returns a host"
|
||||
)
|
||||
self.logger.debug(" Host state is %s " % listHost[0].state)
|
||||
if listHost[0].state == state:
|
||||
return True, 1
|
||||
else:
|
||||
return False, 1
|
||||
except Exception as e:
|
||||
self.logger.debug("Got exception %s" % e)
|
||||
return False, 1
|
||||
|
||||
|
||||
def disconnectHostfromNetwork(self, hostIp, timeout):
|
||||
srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh"
|
||||
if not(os.path.isfile(srcFile)):
|
||||
self.logger.debug("File %s not found" % srcFile)
|
||||
raise unittest.SkipTest("Script file %s required for HA not found" % srcFile);
|
||||
|
||||
ssh = SshClient(hostIp, 22, "root", "password")
|
||||
ssh.scp(srcFile, "/root/test_host_ha.sh")
|
||||
ssh.execute("nohup sh /root/test_host_ha.sh %s > /dev/null 2>&1 &\n" % timeout)
|
||||
return
|
||||
|
||||
|
||||
@attr(
|
||||
tags=[
|
||||
"advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"eip",
|
||||
"sg"],
|
||||
required_hardware="true")
|
||||
def test_01_host_ha_with_nfs_storagepool_with_vm(self):
|
||||
|
||||
if not(self.isOnlyNFSStorageAvailable()):
|
||||
raise unittest.SkipTest("Skipping this test as this is for NFS store only.");
|
||||
return
|
||||
|
||||
listHost = Host.list(
|
||||
self.apiclient,
|
||||
type='Routing',
|
||||
zoneid=self.zone.id,
|
||||
podid=self.pod.id,
|
||||
)
|
||||
for host in listHost:
|
||||
self.logger.debug('Hypervisor = {}'.format(host.id))
|
||||
|
||||
|
||||
if len(listHost) != 2:
|
||||
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
|
||||
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
|
||||
return
|
||||
|
||||
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
|
||||
|
||||
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
|
||||
|
||||
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
|
||||
|
||||
|
||||
if no_of_vms < 5:
|
||||
self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha")
|
||||
no_vm_req = 5 - no_of_vms
|
||||
if (no_vm_req > 0):
|
||||
self.logger.debug("Creating vms = {}".format(no_vm_req))
|
||||
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False)
|
||||
|
||||
ha_host = listHost[1]
|
||||
other_host = listHost[0]
|
||||
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
|
||||
ha_host = listHost[0]
|
||||
other_host = listHost[1]
|
||||
|
||||
self.disconnectHostfromNetwork(ha_host.ipaddress, 400)
|
||||
|
||||
hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress)
|
||||
if not(hostDown):
|
||||
raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress))
|
||||
|
||||
hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id)
|
||||
#the test could have failed here but we will try our best to get host back in consistent state
|
||||
|
||||
no_of_vms = self.noOfVMsOnHost(ha_host.id)
|
||||
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
|
||||
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
|
||||
#
|
||||
hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress)
|
||||
if not(hostUp):
|
||||
self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress))
|
||||
|
||||
|
||||
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
|
||||
|
||||
if not(hostDownInCloudstack):
|
||||
raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress))
|
||||
if not(hostUpInCloudstack):
|
||||
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
|
||||
|
||||
return
|
||||
|
||||
|
||||
@attr(
|
||||
tags=[
|
||||
"advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"eip",
|
||||
"sg"],
|
||||
required_hardware="true")
|
||||
def test_02_host_ha_with_local_storage_and_nfs(self):
|
||||
|
||||
if not(self.isLocalAndNFSStorageAvailable()):
|
||||
raise unittest.SkipTest("Skipping this test as this is for Local storage and NFS storage only.");
|
||||
return
|
||||
|
||||
listHost = Host.list(
|
||||
self.apiclient,
|
||||
type='Routing',
|
||||
zoneid=self.zone.id,
|
||||
podid=self.pod.id,
|
||||
)
|
||||
for host in listHost:
|
||||
self.logger.debug('Hypervisor = {}'.format(host.id))
|
||||
|
||||
|
||||
if len(listHost) != 2:
|
||||
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
|
||||
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
|
||||
return
|
||||
|
||||
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
|
||||
|
||||
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
|
||||
|
||||
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
|
||||
|
||||
|
||||
if no_of_vms < 5:
|
||||
self.logger.debug("test_02: Create VMs as there are not enough vms to check host ha")
|
||||
no_vm_req = 5 - no_of_vms
|
||||
if (no_vm_req > 0):
|
||||
self.logger.debug("Creating vms = {}".format(no_vm_req))
|
||||
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True)
|
||||
|
||||
ha_host = listHost[1]
|
||||
other_host = listHost[0]
|
||||
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
|
||||
ha_host = listHost[0]
|
||||
other_host = listHost[1]
|
||||
|
||||
self.disconnectHostfromNetwork(ha_host.ipaddress, 400)
|
||||
|
||||
hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress)
|
||||
if not(hostDown):
|
||||
raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress))
|
||||
|
||||
hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id)
|
||||
#the test could have failed here but we will try our best to get host back in consistent state
|
||||
|
||||
no_of_vms = self.noOfVMsOnHost(ha_host.id)
|
||||
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
|
||||
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
|
||||
#
|
||||
hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress)
|
||||
if not(hostUp):
|
||||
self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress))
|
||||
|
||||
|
||||
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
|
||||
|
||||
if not(hostDownInCloudstack):
|
||||
raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress))
|
||||
if not(hostUpInCloudstack):
|
||||
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
||||
@attr(
|
||||
tags=[
|
||||
"advanced",
|
||||
"advancedns",
|
||||
"smoke",
|
||||
"basic",
|
||||
"eip",
|
||||
"sg"],
|
||||
required_hardware="true")
|
||||
def test_03_host_ha_with_only_local_storage(self):
|
||||
|
||||
if not(self.isOnlyLocalStorageAvailable()):
|
||||
raise unittest.SkipTest("Skipping this test as this is for Local storage only.");
|
||||
return
|
||||
|
||||
listHost = Host.list(
|
||||
self.apiclient,
|
||||
type='Routing',
|
||||
zoneid=self.zone.id,
|
||||
podid=self.pod.id,
|
||||
)
|
||||
for host in listHost:
|
||||
self.logger.debug('Hypervisor = {}'.format(host.id))
|
||||
|
||||
|
||||
if len(listHost) != 2:
|
||||
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
|
||||
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
|
||||
return
|
||||
|
||||
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
|
||||
|
||||
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
|
||||
|
||||
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
|
||||
|
||||
if no_of_vms < 5:
|
||||
self.logger.debug("test_03: Create VMs as there are not enough vms to check host ha")
|
||||
no_vm_req = 5 - no_of_vms
|
||||
if (no_vm_req > 0):
|
||||
self.logger.debug("Creating vms = {}".format(no_vm_req))
|
||||
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True)
|
||||
|
||||
ha_host = listHost[1]
|
||||
other_host = listHost[0]
|
||||
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
|
||||
ha_host = listHost[0]
|
||||
other_host = listHost[1]
|
||||
|
||||
self.disconnectHostfromNetwork(ha_host.ipaddress, 400)
|
||||
|
||||
hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress)
|
||||
if not(hostDown):
|
||||
raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress))
|
||||
|
||||
hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Alert", ha_host.id)
|
||||
#the test could have failed here but we will try our best to get host back in consistent state
|
||||
|
||||
no_of_vms = self.noOfVMsOnHost(ha_host.id)
|
||||
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
|
||||
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
|
||||
#
|
||||
hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress)
|
||||
if not(hostUp):
|
||||
self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress))
|
||||
|
||||
|
||||
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
|
||||
|
||||
if not(hostDownInCloudstack):
|
||||
raise self.fail("Host is not in alert %s, in cloudstack so failing test " % (ha_host.ipaddress))
|
||||
if not(hostUpInCloudstack):
|
||||
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
|
||||
|
||||
return
|
||||
40
test/integration/component/test_host_ha.sh
Executable file
40
test/integration/component/test_host_ha.sh
Executable file
@ -0,0 +1,40 @@
|
||||
#!/bin/bash
|
||||
|
||||
#bring down all eth interfaces
|
||||
|
||||
usage() { echo "Usage: $0 <duration in seconds for downing all network interfaces>"; exit 1; }
|
||||
|
||||
case $1 in
|
||||
''|*[!0-9]*) echo "The parameter should be an integer"; exit ;;
|
||||
*) echo $1 ;;
|
||||
esac
|
||||
|
||||
if [ -z $1 ]; then
|
||||
usage
|
||||
elif [ $1 -lt 1 ]; then
|
||||
echo "Down time should be at least 1 second"
|
||||
exit 1
|
||||
elif [ $1 -gt 5000 ]; then
|
||||
echo "Down time should be less than 5000 second"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth`
|
||||
do
|
||||
ifconfig $i down
|
||||
done
|
||||
|
||||
|
||||
service cloudstack-agent stop
|
||||
update-rc.d -f cloudstack-agent remove
|
||||
|
||||
sleep $1
|
||||
|
||||
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth`
|
||||
do
|
||||
ifconfig $i up
|
||||
done
|
||||
|
||||
|
||||
update-rc.d -f cloudstack-agent defaults
|
||||
service cloudstack-agent start
|
||||
@ -523,15 +523,14 @@ def verifyRouterState(apiclient, routerid, allowedstates):
|
||||
return [PASS, None]
|
||||
|
||||
|
||||
|
||||
def wait_until(retry_interval=2, no_of_times=2, callback=None, *callback_args):
|
||||
""" Utility method to try out the callback method at most no_of_times with a interval of retry_interval,
|
||||
Will return immediately if callback returns True. The callback method should be written to return a list of values first being a boolean """
|
||||
|
||||
if callback is None:
|
||||
raise ("Bad value for callback method !")
|
||||
|
||||
wait_result = False
|
||||
|
||||
wait_result = False
|
||||
for i in range(0,no_of_times):
|
||||
time.sleep(retry_interval)
|
||||
wait_result, return_val = callback(*callback_args)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user