diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py index d95a2b452a2..62a4a8a9eb3 100755 --- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py +++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsAddress.py @@ -133,10 +133,6 @@ class CsInterface: return self.get_attr("gateway") else: return self.config.cmdline().get_guest_gw() -# if self.config.cmdline().is_redundant(): -# return self.config.cmdline().get_guest_gw() -# else: -# return self.get_ip() def ip_in_subnet(self, ip): ipo = IPAddress(ip) @@ -410,10 +406,6 @@ class CsIP: ]) if self.get_type() in ["public"]: - # self.fw.append(["nat", "front", - # "-A POSTROUTING -o %s -j SNAT --to-source %s" % - # (self.dev, self.address['public_ip']) - # ]) self.fw.append(["", "front", "-A FORWARD -o %s -d %s -j ACL_INBOUND_%s" % (self.dev, self.address['network'], self.dev) ]) @@ -457,7 +449,6 @@ class CsIP: vpccidr = self.config.cmdline().get_vpccidr() self.fw.append(["filter", "", "-A FORWARD -s %s ! -d %s -j ACCEPT" % (vpccidr, vpccidr)]) self.fw.append(["nat", "", "-A POSTROUTING -j SNAT -o %s --to-source %s" % (self.dev, self.address['public_ip'])]) - # route.flush() def list(self): self.iplist = {} diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsDatabag.py b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsDatabag.py index e8f15fed9ba..2a37b0a858f 100755 --- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsDatabag.py +++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsDatabag.py @@ -57,11 +57,6 @@ class CsCmdLine(CsDataBag): self.dbag['config'] = {} return self.dbag['config'] - def get_priority(self): - if "router_pr" in self.idata(): - return self.idata()['router_pr'] - return 99 - def set_guest_gw(self, val): self.idata()['guestgw'] = val @@ -70,9 +65,6 @@ class CsCmdLine(CsDataBag): return self.idata()['guestgw'] return False - def set_priority(self, val): - self.idata()['router_pr'] = val - def is_redundant(self): if "redundant_router" in self.idata(): return self.idata()['redundant_router'] == "true" diff --git a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py index 8a4cd26f94c..9003555f73b 100755 --- a/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py +++ b/systemvm/patches/debian/config/opt/cloud/bin/cs/CsRedundant.py @@ -108,16 +108,9 @@ class CsRedundant(object): # keepalived configuration file = CsFile(self.KEEPALIVED_CONF) ads = [o for o in self.address.get_ips() if o.is_public()] - # Add a comment for each public IP. If any change this will cause keepalived to restart - # As things stand keepalived will be configured before the IP is added or deleted - i = 0 - for o in ads: - file.addeq("! %s=%s" % (i, o.get_cidr())) - i = i + 1 + file.search(" router_id ", " router_id %s" % self.cl.get_name()) - file.search(" priority ", " priority %s" % self.cl.get_priority()) file.search(" interface ", " interface %s" % guest.get_device()) - file.search(" state ", " state %s" % "EQUAL") file.search(" virtual_router_id ", " virtual_router_id %s" % self.cl.get_router_id()) file.greplace("[RROUTER_BIN_PATH]", self.CS_ROUTER_DIR) file.section("authentication {", "}", [" auth_type AH \n", " auth_pass %s\n" % self.cl.get_router_password()]) @@ -153,7 +146,7 @@ class CsRedundant(object): proc = CsProcess(['/usr/sbin/keepalived', '--vrrp']) if not proc.find(): - CsHelper.service("keepalived", "restart") + CsHelper.service("keepalived", "start") def release_lock(self): try: @@ -185,6 +178,7 @@ class CsRedundant(object): if not self.cl.is_redundant(): logging.error("Set fault called on non-redundant router") return + self.set_lock() logging.info("Router switched to fault mode") ads = [o for o in self.address.get_ips() if o.is_public()] @@ -208,11 +202,7 @@ class CsRedundant(object): if not self.cl.is_redundant(): logging.error("Set backup called on non-redundant router") return - """ - if not self.cl.is_master(): - logging.error("Set backup called on node that is already backup") - return - """ + self.set_lock() logging.debug("Setting router to backup") ads = [o for o in self.address.get_ips() if o.is_public()] @@ -232,7 +222,7 @@ class CsRedundant(object): for o in ads: CsPasswdSvc(o.get_gateway()).stop() CsHelper.service("dnsmasq", "stop") - # self._set_priority(self.CS_PRIO_DOWN) + self.cl.set_master_state(False) self.cl.save() self.release_lock() @@ -243,11 +233,7 @@ class CsRedundant(object): if not self.cl.is_redundant(): logging.error("Set master called on non-redundant router") return - """ - if self.cl.is_master(): - logging.error("Set master called on master node") - return - """ + self.set_lock() logging.debug("Setting router to master") ads = [o for o in self.address.get_ips() if o.is_public()] diff --git a/systemvm/patches/debian/config/opt/cloud/bin/master.py b/systemvm/patches/debian/config/opt/cloud/bin/master.py index cea11425d9e..41d90f06ce3 100755 --- a/systemvm/patches/debian/config/opt/cloud/bin/master.py +++ b/systemvm/patches/debian/config/opt/cloud/bin/master.py @@ -51,3 +51,6 @@ if options.master: if options.backup: red.set_backup() + +if options.fault: + red.set_fault() \ No newline at end of file diff --git a/systemvm/patches/debian/config/opt/cloud/templates/check_heartbeat.sh.templ b/systemvm/patches/debian/config/opt/cloud/templates/check_heartbeat.sh.templ index 41311596c73..6a0d8a963fc 100755 --- a/systemvm/patches/debian/config/opt/cloud/templates/check_heartbeat.sh.templ +++ b/systemvm/patches/debian/config/opt/cloud/templates/check_heartbeat.sh.templ @@ -22,11 +22,11 @@ STRIKE_FILE="$ROUTER_BIN_PATH/keepalived.strikes" if [ -e $ROUTER_BIN_PATH/keepalived.ts2 ] then - lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2) thistime=$(cat $ROUTER_BIN_PATH/keepalived.ts) - diff=$(($thistime - $lasttime)) + lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2) + diff=$(($lasttime - $thistime)) s=0 - if [ $diff -lt 30 ] + if [ $diff -ge 10 ] then if [ -e $STRIKE_FILE ] then @@ -47,13 +47,14 @@ then if [ $s -gt 2 ] then echo Keepalived process is dead! >> $ROUTER_LOG - $ROUTER_BIN_PATH/services.sh stop >> $ROUTER_LOG 2>&1 - $ROUTER_BIN_PATH/disable_pubip.sh >> $ROUTER_LOG 2>&1 - $ROUTER_BIN_PATH/primary-backup.sh fault >> $ROUTER_LOG 2>&1 service keepalived stop >> $ROUTER_LOG 2>&1 service conntrackd stop >> $ROUTER_LOG 2>&1 - pkill -9 keepalived >> $ROUTER_LOG 2>&1 - pkill -9 conntrackd >> $ROUTER_LOG 2>&1 + + #Set fault so we have the same effect as a KeepaliveD fault. + python /opt/cloud/bin/master.py --fault + + pkill -9 keepalived >> $ROUTER_LOG 2>&1 + pkill -9 conntrackd >> $ROUTER_LOG 2>&1 echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG exit fi diff --git a/systemvm/patches/debian/config/opt/cloud/templates/keepalived.conf.templ b/systemvm/patches/debian/config/opt/cloud/templates/keepalived.conf.templ index 0e64a7ea6cc..1563b3936d5 100644 --- a/systemvm/patches/debian/config/opt/cloud/templates/keepalived.conf.templ +++ b/systemvm/patches/debian/config/opt/cloud/templates/keepalived.conf.templ @@ -21,15 +21,14 @@ global_defs { vrrp_script heartbeat { script "[RROUTER_BIN_PATH]/heartbeat.sh" - interval 10 + interval 5 } vrrp_instance inside_network { - state BACKUP + state EQUAL interface eth0 virtual_router_id 51 - priority [PRIORITY] - nopreempt + nopreempt advert_int 1 authentication { @@ -46,7 +45,7 @@ vrrp_instance inside_network { } !That's the correct path of the master.py file. - notify_master "/opt/cloud/bin/master.py --master" notify_backup "/opt/cloud/bin/master.py --backup" + notify_master "/opt/cloud/bin/master.py --master" notify_fault "/opt/cloud/bin/master.py --fault" -} +} \ No newline at end of file diff --git a/systemvm/test/python/TestCsCmdLine.py b/systemvm/test/python/TestCsCmdLine.py index ccd05853a9a..b89d65d37aa 100644 --- a/systemvm/test/python/TestCsCmdLine.py +++ b/systemvm/test/python/TestCsCmdLine.py @@ -32,13 +32,6 @@ class TestCsCmdLine(unittest.TestCase): def test_idata(self): self.assertTrue(self.cscmdline.idata() == {}) - def test_get_priority(self): - self.assertTrue(self.cscmdline.get_priority() == 99) - - def test_set_priority(self): - self.cscmdline.set_priority(100) - self.assertTrue(self.cscmdline.get_priority() == 100) - def test_is_redundant(self): self.assertTrue(self.cscmdline.is_redundant() is False) self.cscmdline.set_redundant() diff --git a/test/integration/component/test_vpc_redundant.py b/test/integration/component/test_vpc_redundant.py index 8fe44c28d8d..32e52ec90a3 100644 --- a/test/integration/component/test_vpc_redundant.py +++ b/test/integration/component/test_vpc_redundant.py @@ -283,7 +283,8 @@ class TestVPCRedundancy(cloudstackTestCase): cnts = [0, 0, 0] self.query_routers(count, showall) for router in self.routers: - cnts[vals.index(router.redundantstate)] += 1 + if router.state == "Running": + cnts[vals.index(router.redundantstate)] += 1 if cnts[vals.index('MASTER')] != 1: self.fail("No Master or too many master routers found %s" % cnts[vals.index('MASTER')]) # if cnts[vals.index('UNKNOWN')] > 0: @@ -431,6 +432,7 @@ class TestVPCRedundancy(cloudstackTestCase): self.do_vpc_test(False) self.stop_router("MASTER") + # wait for the backup router to transit to master state time.sleep(30) self.check_master_status(1) self.do_vpc_test(False) @@ -441,7 +443,6 @@ class TestVPCRedundancy(cloudstackTestCase): self.start_router() self.add_nat_rules() - time.sleep(60) self.check_master_status(2) self.do_vpc_test(False) @@ -459,6 +460,7 @@ class TestVPCRedundancy(cloudstackTestCase): vm.set_ip(self.acquire_publicip(o.get_net())) if vm.get_nat() is None: vm.set_nat(self.create_natrule(vm.get_vm(), vm.get_ip(), o.get_net())) + time.sleep(5) def do_vpc_test(self, expectFail): retries = 20