mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
CLOUDSTACK-8616 - Removing the Priority form KeepaliveD configuration
- We use no preempt mode with state set as EQUAL to both nodes, no need to have Priotities setup
- Do not add IPs as comments to the configuration. If a new guest interface is added, the file will change anyway.
- This was used in the past when keepalived would restart for each new interface added
- Removed the long sleep form the tests: we now sleep 5 seconds per PF rule added
CLOUDSTACK-8616 - Fix keepalived.ts/2 files comparison
- Add call to set_fault() in case of router transits to that state
- Removing commented out code
CLOUDSTACK-8616 - Fixing check_heartbeat.sh.templ
CLOUDSTACK-8616 - Call set_fault from the check_heartbeat.sh script
Signed-off-by: wilderrodrigues <wrodrigues@schubergphilis.com>
This commit is contained in:
parent
f5ed824ea2
commit
20be22c698
@ -133,10 +133,6 @@ class CsInterface:
|
|||||||
return self.get_attr("gateway")
|
return self.get_attr("gateway")
|
||||||
else:
|
else:
|
||||||
return self.config.cmdline().get_guest_gw()
|
return self.config.cmdline().get_guest_gw()
|
||||||
# if self.config.cmdline().is_redundant():
|
|
||||||
# return self.config.cmdline().get_guest_gw()
|
|
||||||
# else:
|
|
||||||
# return self.get_ip()
|
|
||||||
|
|
||||||
def ip_in_subnet(self, ip):
|
def ip_in_subnet(self, ip):
|
||||||
ipo = IPAddress(ip)
|
ipo = IPAddress(ip)
|
||||||
@ -410,10 +406,6 @@ class CsIP:
|
|||||||
])
|
])
|
||||||
|
|
||||||
if self.get_type() in ["public"]:
|
if self.get_type() in ["public"]:
|
||||||
# self.fw.append(["nat", "front",
|
|
||||||
# "-A POSTROUTING -o %s -j SNAT --to-source %s" %
|
|
||||||
# (self.dev, self.address['public_ip'])
|
|
||||||
# ])
|
|
||||||
self.fw.append(["", "front",
|
self.fw.append(["", "front",
|
||||||
"-A FORWARD -o %s -d %s -j ACL_INBOUND_%s" % (self.dev, self.address['network'], self.dev)
|
"-A FORWARD -o %s -d %s -j ACL_INBOUND_%s" % (self.dev, self.address['network'], self.dev)
|
||||||
])
|
])
|
||||||
@ -457,7 +449,6 @@ class CsIP:
|
|||||||
vpccidr = self.config.cmdline().get_vpccidr()
|
vpccidr = self.config.cmdline().get_vpccidr()
|
||||||
self.fw.append(["filter", "", "-A FORWARD -s %s ! -d %s -j ACCEPT" % (vpccidr, vpccidr)])
|
self.fw.append(["filter", "", "-A FORWARD -s %s ! -d %s -j ACCEPT" % (vpccidr, vpccidr)])
|
||||||
self.fw.append(["nat", "", "-A POSTROUTING -j SNAT -o %s --to-source %s" % (self.dev, self.address['public_ip'])])
|
self.fw.append(["nat", "", "-A POSTROUTING -j SNAT -o %s --to-source %s" % (self.dev, self.address['public_ip'])])
|
||||||
# route.flush()
|
|
||||||
|
|
||||||
def list(self):
|
def list(self):
|
||||||
self.iplist = {}
|
self.iplist = {}
|
||||||
|
|||||||
@ -57,11 +57,6 @@ class CsCmdLine(CsDataBag):
|
|||||||
self.dbag['config'] = {}
|
self.dbag['config'] = {}
|
||||||
return self.dbag['config']
|
return self.dbag['config']
|
||||||
|
|
||||||
def get_priority(self):
|
|
||||||
if "router_pr" in self.idata():
|
|
||||||
return self.idata()['router_pr']
|
|
||||||
return 99
|
|
||||||
|
|
||||||
def set_guest_gw(self, val):
|
def set_guest_gw(self, val):
|
||||||
self.idata()['guestgw'] = val
|
self.idata()['guestgw'] = val
|
||||||
|
|
||||||
@ -70,9 +65,6 @@ class CsCmdLine(CsDataBag):
|
|||||||
return self.idata()['guestgw']
|
return self.idata()['guestgw']
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def set_priority(self, val):
|
|
||||||
self.idata()['router_pr'] = val
|
|
||||||
|
|
||||||
def is_redundant(self):
|
def is_redundant(self):
|
||||||
if "redundant_router" in self.idata():
|
if "redundant_router" in self.idata():
|
||||||
return self.idata()['redundant_router'] == "true"
|
return self.idata()['redundant_router'] == "true"
|
||||||
|
|||||||
@ -108,16 +108,9 @@ class CsRedundant(object):
|
|||||||
# keepalived configuration
|
# keepalived configuration
|
||||||
file = CsFile(self.KEEPALIVED_CONF)
|
file = CsFile(self.KEEPALIVED_CONF)
|
||||||
ads = [o for o in self.address.get_ips() if o.is_public()]
|
ads = [o for o in self.address.get_ips() if o.is_public()]
|
||||||
# Add a comment for each public IP. If any change this will cause keepalived to restart
|
|
||||||
# As things stand keepalived will be configured before the IP is added or deleted
|
|
||||||
i = 0
|
|
||||||
for o in ads:
|
|
||||||
file.addeq("! %s=%s" % (i, o.get_cidr()))
|
|
||||||
i = i + 1
|
|
||||||
file.search(" router_id ", " router_id %s" % self.cl.get_name())
|
file.search(" router_id ", " router_id %s" % self.cl.get_name())
|
||||||
file.search(" priority ", " priority %s" % self.cl.get_priority())
|
|
||||||
file.search(" interface ", " interface %s" % guest.get_device())
|
file.search(" interface ", " interface %s" % guest.get_device())
|
||||||
file.search(" state ", " state %s" % "EQUAL")
|
|
||||||
file.search(" virtual_router_id ", " virtual_router_id %s" % self.cl.get_router_id())
|
file.search(" virtual_router_id ", " virtual_router_id %s" % self.cl.get_router_id())
|
||||||
file.greplace("[RROUTER_BIN_PATH]", self.CS_ROUTER_DIR)
|
file.greplace("[RROUTER_BIN_PATH]", self.CS_ROUTER_DIR)
|
||||||
file.section("authentication {", "}", [" auth_type AH \n", " auth_pass %s\n" % self.cl.get_router_password()])
|
file.section("authentication {", "}", [" auth_type AH \n", " auth_pass %s\n" % self.cl.get_router_password()])
|
||||||
@ -153,7 +146,7 @@ class CsRedundant(object):
|
|||||||
|
|
||||||
proc = CsProcess(['/usr/sbin/keepalived', '--vrrp'])
|
proc = CsProcess(['/usr/sbin/keepalived', '--vrrp'])
|
||||||
if not proc.find():
|
if not proc.find():
|
||||||
CsHelper.service("keepalived", "restart")
|
CsHelper.service("keepalived", "start")
|
||||||
|
|
||||||
def release_lock(self):
|
def release_lock(self):
|
||||||
try:
|
try:
|
||||||
@ -185,6 +178,7 @@ class CsRedundant(object):
|
|||||||
if not self.cl.is_redundant():
|
if not self.cl.is_redundant():
|
||||||
logging.error("Set fault called on non-redundant router")
|
logging.error("Set fault called on non-redundant router")
|
||||||
return
|
return
|
||||||
|
|
||||||
self.set_lock()
|
self.set_lock()
|
||||||
logging.info("Router switched to fault mode")
|
logging.info("Router switched to fault mode")
|
||||||
ads = [o for o in self.address.get_ips() if o.is_public()]
|
ads = [o for o in self.address.get_ips() if o.is_public()]
|
||||||
@ -208,11 +202,7 @@ class CsRedundant(object):
|
|||||||
if not self.cl.is_redundant():
|
if not self.cl.is_redundant():
|
||||||
logging.error("Set backup called on non-redundant router")
|
logging.error("Set backup called on non-redundant router")
|
||||||
return
|
return
|
||||||
"""
|
|
||||||
if not self.cl.is_master():
|
|
||||||
logging.error("Set backup called on node that is already backup")
|
|
||||||
return
|
|
||||||
"""
|
|
||||||
self.set_lock()
|
self.set_lock()
|
||||||
logging.debug("Setting router to backup")
|
logging.debug("Setting router to backup")
|
||||||
ads = [o for o in self.address.get_ips() if o.is_public()]
|
ads = [o for o in self.address.get_ips() if o.is_public()]
|
||||||
@ -232,7 +222,7 @@ class CsRedundant(object):
|
|||||||
for o in ads:
|
for o in ads:
|
||||||
CsPasswdSvc(o.get_gateway()).stop()
|
CsPasswdSvc(o.get_gateway()).stop()
|
||||||
CsHelper.service("dnsmasq", "stop")
|
CsHelper.service("dnsmasq", "stop")
|
||||||
# self._set_priority(self.CS_PRIO_DOWN)
|
|
||||||
self.cl.set_master_state(False)
|
self.cl.set_master_state(False)
|
||||||
self.cl.save()
|
self.cl.save()
|
||||||
self.release_lock()
|
self.release_lock()
|
||||||
@ -243,11 +233,7 @@ class CsRedundant(object):
|
|||||||
if not self.cl.is_redundant():
|
if not self.cl.is_redundant():
|
||||||
logging.error("Set master called on non-redundant router")
|
logging.error("Set master called on non-redundant router")
|
||||||
return
|
return
|
||||||
"""
|
|
||||||
if self.cl.is_master():
|
|
||||||
logging.error("Set master called on master node")
|
|
||||||
return
|
|
||||||
"""
|
|
||||||
self.set_lock()
|
self.set_lock()
|
||||||
logging.debug("Setting router to master")
|
logging.debug("Setting router to master")
|
||||||
ads = [o for o in self.address.get_ips() if o.is_public()]
|
ads = [o for o in self.address.get_ips() if o.is_public()]
|
||||||
|
|||||||
@ -51,3 +51,6 @@ if options.master:
|
|||||||
|
|
||||||
if options.backup:
|
if options.backup:
|
||||||
red.set_backup()
|
red.set_backup()
|
||||||
|
|
||||||
|
if options.fault:
|
||||||
|
red.set_fault()
|
||||||
@ -22,11 +22,11 @@ STRIKE_FILE="$ROUTER_BIN_PATH/keepalived.strikes"
|
|||||||
|
|
||||||
if [ -e $ROUTER_BIN_PATH/keepalived.ts2 ]
|
if [ -e $ROUTER_BIN_PATH/keepalived.ts2 ]
|
||||||
then
|
then
|
||||||
lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2)
|
|
||||||
thistime=$(cat $ROUTER_BIN_PATH/keepalived.ts)
|
thistime=$(cat $ROUTER_BIN_PATH/keepalived.ts)
|
||||||
diff=$(($thistime - $lasttime))
|
lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2)
|
||||||
|
diff=$(($lasttime - $thistime))
|
||||||
s=0
|
s=0
|
||||||
if [ $diff -lt 30 ]
|
if [ $diff -ge 10 ]
|
||||||
then
|
then
|
||||||
if [ -e $STRIKE_FILE ]
|
if [ -e $STRIKE_FILE ]
|
||||||
then
|
then
|
||||||
@ -47,13 +47,14 @@ then
|
|||||||
if [ $s -gt 2 ]
|
if [ $s -gt 2 ]
|
||||||
then
|
then
|
||||||
echo Keepalived process is dead! >> $ROUTER_LOG
|
echo Keepalived process is dead! >> $ROUTER_LOG
|
||||||
$ROUTER_BIN_PATH/services.sh stop >> $ROUTER_LOG 2>&1
|
|
||||||
$ROUTER_BIN_PATH/disable_pubip.sh >> $ROUTER_LOG 2>&1
|
|
||||||
$ROUTER_BIN_PATH/primary-backup.sh fault >> $ROUTER_LOG 2>&1
|
|
||||||
service keepalived stop >> $ROUTER_LOG 2>&1
|
service keepalived stop >> $ROUTER_LOG 2>&1
|
||||||
service conntrackd stop >> $ROUTER_LOG 2>&1
|
service conntrackd stop >> $ROUTER_LOG 2>&1
|
||||||
pkill -9 keepalived >> $ROUTER_LOG 2>&1
|
|
||||||
pkill -9 conntrackd >> $ROUTER_LOG 2>&1
|
#Set fault so we have the same effect as a KeepaliveD fault.
|
||||||
|
python /opt/cloud/bin/master.py --fault
|
||||||
|
|
||||||
|
pkill -9 keepalived >> $ROUTER_LOG 2>&1
|
||||||
|
pkill -9 conntrackd >> $ROUTER_LOG 2>&1
|
||||||
echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
|
echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -21,15 +21,14 @@ global_defs {
|
|||||||
|
|
||||||
vrrp_script heartbeat {
|
vrrp_script heartbeat {
|
||||||
script "[RROUTER_BIN_PATH]/heartbeat.sh"
|
script "[RROUTER_BIN_PATH]/heartbeat.sh"
|
||||||
interval 10
|
interval 5
|
||||||
}
|
}
|
||||||
|
|
||||||
vrrp_instance inside_network {
|
vrrp_instance inside_network {
|
||||||
state BACKUP
|
state EQUAL
|
||||||
interface eth0
|
interface eth0
|
||||||
virtual_router_id 51
|
virtual_router_id 51
|
||||||
priority [PRIORITY]
|
nopreempt
|
||||||
nopreempt
|
|
||||||
|
|
||||||
advert_int 1
|
advert_int 1
|
||||||
authentication {
|
authentication {
|
||||||
@ -46,7 +45,7 @@ vrrp_instance inside_network {
|
|||||||
}
|
}
|
||||||
|
|
||||||
!That's the correct path of the master.py file.
|
!That's the correct path of the master.py file.
|
||||||
notify_master "/opt/cloud/bin/master.py --master"
|
|
||||||
notify_backup "/opt/cloud/bin/master.py --backup"
|
notify_backup "/opt/cloud/bin/master.py --backup"
|
||||||
|
notify_master "/opt/cloud/bin/master.py --master"
|
||||||
notify_fault "/opt/cloud/bin/master.py --fault"
|
notify_fault "/opt/cloud/bin/master.py --fault"
|
||||||
}
|
}
|
||||||
@ -32,13 +32,6 @@ class TestCsCmdLine(unittest.TestCase):
|
|||||||
def test_idata(self):
|
def test_idata(self):
|
||||||
self.assertTrue(self.cscmdline.idata() == {})
|
self.assertTrue(self.cscmdline.idata() == {})
|
||||||
|
|
||||||
def test_get_priority(self):
|
|
||||||
self.assertTrue(self.cscmdline.get_priority() == 99)
|
|
||||||
|
|
||||||
def test_set_priority(self):
|
|
||||||
self.cscmdline.set_priority(100)
|
|
||||||
self.assertTrue(self.cscmdline.get_priority() == 100)
|
|
||||||
|
|
||||||
def test_is_redundant(self):
|
def test_is_redundant(self):
|
||||||
self.assertTrue(self.cscmdline.is_redundant() is False)
|
self.assertTrue(self.cscmdline.is_redundant() is False)
|
||||||
self.cscmdline.set_redundant()
|
self.cscmdline.set_redundant()
|
||||||
|
|||||||
@ -283,7 +283,8 @@ class TestVPCRedundancy(cloudstackTestCase):
|
|||||||
cnts = [0, 0, 0]
|
cnts = [0, 0, 0]
|
||||||
self.query_routers(count, showall)
|
self.query_routers(count, showall)
|
||||||
for router in self.routers:
|
for router in self.routers:
|
||||||
cnts[vals.index(router.redundantstate)] += 1
|
if router.state == "Running":
|
||||||
|
cnts[vals.index(router.redundantstate)] += 1
|
||||||
if cnts[vals.index('MASTER')] != 1:
|
if cnts[vals.index('MASTER')] != 1:
|
||||||
self.fail("No Master or too many master routers found %s" % cnts[vals.index('MASTER')])
|
self.fail("No Master or too many master routers found %s" % cnts[vals.index('MASTER')])
|
||||||
# if cnts[vals.index('UNKNOWN')] > 0:
|
# if cnts[vals.index('UNKNOWN')] > 0:
|
||||||
@ -431,6 +432,7 @@ class TestVPCRedundancy(cloudstackTestCase):
|
|||||||
self.do_vpc_test(False)
|
self.do_vpc_test(False)
|
||||||
|
|
||||||
self.stop_router("MASTER")
|
self.stop_router("MASTER")
|
||||||
|
# wait for the backup router to transit to master state
|
||||||
time.sleep(30)
|
time.sleep(30)
|
||||||
self.check_master_status(1)
|
self.check_master_status(1)
|
||||||
self.do_vpc_test(False)
|
self.do_vpc_test(False)
|
||||||
@ -441,7 +443,6 @@ class TestVPCRedundancy(cloudstackTestCase):
|
|||||||
|
|
||||||
self.start_router()
|
self.start_router()
|
||||||
self.add_nat_rules()
|
self.add_nat_rules()
|
||||||
time.sleep(60)
|
|
||||||
self.check_master_status(2)
|
self.check_master_status(2)
|
||||||
self.do_vpc_test(False)
|
self.do_vpc_test(False)
|
||||||
|
|
||||||
@ -459,6 +460,7 @@ class TestVPCRedundancy(cloudstackTestCase):
|
|||||||
vm.set_ip(self.acquire_publicip(o.get_net()))
|
vm.set_ip(self.acquire_publicip(o.get_net()))
|
||||||
if vm.get_nat() is None:
|
if vm.get_nat() is None:
|
||||||
vm.set_nat(self.create_natrule(vm.get_vm(), vm.get_ip(), o.get_net()))
|
vm.set_nat(self.create_natrule(vm.get_vm(), vm.get_ip(), o.get_net()))
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
def do_vpc_test(self, expectFail):
|
def do_vpc_test(self, expectFail):
|
||||||
retries = 20
|
retries = 20
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user