CLOUDSTACK-8616 - Removing the Priority form KeepaliveD configuration

- We use no preempt mode with state set as EQUAL to both nodes, no need to have Priotities setup
   - Do not add IPs as comments to the configuration. If a new guest interface is added, the file will change anyway.
     - This was used in the past when keepalived would restart for each new interface added
   - Removed the long sleep form the tests: we now sleep 5 seconds per PF rule added

CLOUDSTACK-8616 - Fix keepalived.ts/2 files comparison

   - Add call to set_fault() in case of router transits to that state
   - Removing commented out code

CLOUDSTACK-8616 - Fixing check_heartbeat.sh.templ

CLOUDSTACK-8616 - Call set_fault from the check_heartbeat.sh script

Signed-off-by: wilderrodrigues <wrodrigues@schubergphilis.com>
This commit is contained in:
wilderrodrigues 2015-07-08 07:24:35 +02:00
parent f5ed824ea2
commit 20be22c698
8 changed files with 27 additions and 60 deletions

View File

@ -133,10 +133,6 @@ class CsInterface:
return self.get_attr("gateway")
else:
return self.config.cmdline().get_guest_gw()
# if self.config.cmdline().is_redundant():
# return self.config.cmdline().get_guest_gw()
# else:
# return self.get_ip()
def ip_in_subnet(self, ip):
ipo = IPAddress(ip)
@ -410,10 +406,6 @@ class CsIP:
])
if self.get_type() in ["public"]:
# self.fw.append(["nat", "front",
# "-A POSTROUTING -o %s -j SNAT --to-source %s" %
# (self.dev, self.address['public_ip'])
# ])
self.fw.append(["", "front",
"-A FORWARD -o %s -d %s -j ACL_INBOUND_%s" % (self.dev, self.address['network'], self.dev)
])
@ -457,7 +449,6 @@ class CsIP:
vpccidr = self.config.cmdline().get_vpccidr()
self.fw.append(["filter", "", "-A FORWARD -s %s ! -d %s -j ACCEPT" % (vpccidr, vpccidr)])
self.fw.append(["nat", "", "-A POSTROUTING -j SNAT -o %s --to-source %s" % (self.dev, self.address['public_ip'])])
# route.flush()
def list(self):
self.iplist = {}

View File

@ -57,11 +57,6 @@ class CsCmdLine(CsDataBag):
self.dbag['config'] = {}
return self.dbag['config']
def get_priority(self):
if "router_pr" in self.idata():
return self.idata()['router_pr']
return 99
def set_guest_gw(self, val):
self.idata()['guestgw'] = val
@ -70,9 +65,6 @@ class CsCmdLine(CsDataBag):
return self.idata()['guestgw']
return False
def set_priority(self, val):
self.idata()['router_pr'] = val
def is_redundant(self):
if "redundant_router" in self.idata():
return self.idata()['redundant_router'] == "true"

View File

@ -108,16 +108,9 @@ class CsRedundant(object):
# keepalived configuration
file = CsFile(self.KEEPALIVED_CONF)
ads = [o for o in self.address.get_ips() if o.is_public()]
# Add a comment for each public IP. If any change this will cause keepalived to restart
# As things stand keepalived will be configured before the IP is added or deleted
i = 0
for o in ads:
file.addeq("! %s=%s" % (i, o.get_cidr()))
i = i + 1
file.search(" router_id ", " router_id %s" % self.cl.get_name())
file.search(" priority ", " priority %s" % self.cl.get_priority())
file.search(" interface ", " interface %s" % guest.get_device())
file.search(" state ", " state %s" % "EQUAL")
file.search(" virtual_router_id ", " virtual_router_id %s" % self.cl.get_router_id())
file.greplace("[RROUTER_BIN_PATH]", self.CS_ROUTER_DIR)
file.section("authentication {", "}", [" auth_type AH \n", " auth_pass %s\n" % self.cl.get_router_password()])
@ -153,7 +146,7 @@ class CsRedundant(object):
proc = CsProcess(['/usr/sbin/keepalived', '--vrrp'])
if not proc.find():
CsHelper.service("keepalived", "restart")
CsHelper.service("keepalived", "start")
def release_lock(self):
try:
@ -185,6 +178,7 @@ class CsRedundant(object):
if not self.cl.is_redundant():
logging.error("Set fault called on non-redundant router")
return
self.set_lock()
logging.info("Router switched to fault mode")
ads = [o for o in self.address.get_ips() if o.is_public()]
@ -208,11 +202,7 @@ class CsRedundant(object):
if not self.cl.is_redundant():
logging.error("Set backup called on non-redundant router")
return
"""
if not self.cl.is_master():
logging.error("Set backup called on node that is already backup")
return
"""
self.set_lock()
logging.debug("Setting router to backup")
ads = [o for o in self.address.get_ips() if o.is_public()]
@ -232,7 +222,7 @@ class CsRedundant(object):
for o in ads:
CsPasswdSvc(o.get_gateway()).stop()
CsHelper.service("dnsmasq", "stop")
# self._set_priority(self.CS_PRIO_DOWN)
self.cl.set_master_state(False)
self.cl.save()
self.release_lock()
@ -243,11 +233,7 @@ class CsRedundant(object):
if not self.cl.is_redundant():
logging.error("Set master called on non-redundant router")
return
"""
if self.cl.is_master():
logging.error("Set master called on master node")
return
"""
self.set_lock()
logging.debug("Setting router to master")
ads = [o for o in self.address.get_ips() if o.is_public()]

View File

@ -51,3 +51,6 @@ if options.master:
if options.backup:
red.set_backup()
if options.fault:
red.set_fault()

View File

@ -22,11 +22,11 @@ STRIKE_FILE="$ROUTER_BIN_PATH/keepalived.strikes"
if [ -e $ROUTER_BIN_PATH/keepalived.ts2 ]
then
lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2)
thistime=$(cat $ROUTER_BIN_PATH/keepalived.ts)
diff=$(($thistime - $lasttime))
lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2)
diff=$(($lasttime - $thistime))
s=0
if [ $diff -lt 30 ]
if [ $diff -ge 10 ]
then
if [ -e $STRIKE_FILE ]
then
@ -47,13 +47,14 @@ then
if [ $s -gt 2 ]
then
echo Keepalived process is dead! >> $ROUTER_LOG
$ROUTER_BIN_PATH/services.sh stop >> $ROUTER_LOG 2>&1
$ROUTER_BIN_PATH/disable_pubip.sh >> $ROUTER_LOG 2>&1
$ROUTER_BIN_PATH/primary-backup.sh fault >> $ROUTER_LOG 2>&1
service keepalived stop >> $ROUTER_LOG 2>&1
service conntrackd stop >> $ROUTER_LOG 2>&1
pkill -9 keepalived >> $ROUTER_LOG 2>&1
pkill -9 conntrackd >> $ROUTER_LOG 2>&1
#Set fault so we have the same effect as a KeepaliveD fault.
python /opt/cloud/bin/master.py --fault
pkill -9 keepalived >> $ROUTER_LOG 2>&1
pkill -9 conntrackd >> $ROUTER_LOG 2>&1
echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
exit
fi

View File

@ -21,15 +21,14 @@ global_defs {
vrrp_script heartbeat {
script "[RROUTER_BIN_PATH]/heartbeat.sh"
interval 10
interval 5
}
vrrp_instance inside_network {
state BACKUP
state EQUAL
interface eth0
virtual_router_id 51
priority [PRIORITY]
nopreempt
nopreempt
advert_int 1
authentication {
@ -46,7 +45,7 @@ vrrp_instance inside_network {
}
!That's the correct path of the master.py file.
notify_master "/opt/cloud/bin/master.py --master"
notify_backup "/opt/cloud/bin/master.py --backup"
notify_master "/opt/cloud/bin/master.py --master"
notify_fault "/opt/cloud/bin/master.py --fault"
}
}

View File

@ -32,13 +32,6 @@ class TestCsCmdLine(unittest.TestCase):
def test_idata(self):
self.assertTrue(self.cscmdline.idata() == {})
def test_get_priority(self):
self.assertTrue(self.cscmdline.get_priority() == 99)
def test_set_priority(self):
self.cscmdline.set_priority(100)
self.assertTrue(self.cscmdline.get_priority() == 100)
def test_is_redundant(self):
self.assertTrue(self.cscmdline.is_redundant() is False)
self.cscmdline.set_redundant()

View File

@ -283,7 +283,8 @@ class TestVPCRedundancy(cloudstackTestCase):
cnts = [0, 0, 0]
self.query_routers(count, showall)
for router in self.routers:
cnts[vals.index(router.redundantstate)] += 1
if router.state == "Running":
cnts[vals.index(router.redundantstate)] += 1
if cnts[vals.index('MASTER')] != 1:
self.fail("No Master or too many master routers found %s" % cnts[vals.index('MASTER')])
# if cnts[vals.index('UNKNOWN')] > 0:
@ -431,6 +432,7 @@ class TestVPCRedundancy(cloudstackTestCase):
self.do_vpc_test(False)
self.stop_router("MASTER")
# wait for the backup router to transit to master state
time.sleep(30)
self.check_master_status(1)
self.do_vpc_test(False)
@ -441,7 +443,6 @@ class TestVPCRedundancy(cloudstackTestCase):
self.start_router()
self.add_nat_rules()
time.sleep(60)
self.check_master_status(2)
self.do_vpc_test(False)
@ -459,6 +460,7 @@ class TestVPCRedundancy(cloudstackTestCase):
vm.set_ip(self.acquire_publicip(o.get_net()))
if vm.get_nat() is None:
vm.set_nat(self.create_natrule(vm.get_vm(), vm.get_ip(), o.get_net()))
time.sleep(5)
def do_vpc_test(self, expectFail):
retries = 20