kvm: Aqcuire lock when running security group Python script

It could happen that when multiple instances are starting at the same
time on a KVM host the Agent spawns multiple instances of security_group.py
which both try to modify iptables/ebtables rules.

This fails with on of the two processes failing.

The instance is still started, but it doesn't have any IP connectivity due
to the failed programming of the security groups.

This modification lets the script aqcuire a exclusive lock on a file so that
only one instance of the scripts talks to iptables/ebtables at once.

Other instances of the script which start will poll every 500ms if they can
obtain the lock and otherwise execute anyway after 15 seconds.

The lock will be released as soon as the script exists, which is usually within
a few hundred ms.
This commit is contained in:
Wido den Hollander 2016-02-09 21:20:58 +01:00
parent 7017a829ea
commit 26becef722
No known key found for this signature in database
GPG Key ID: 019B582DDB3ECA42

View File

@ -26,8 +26,11 @@ import xml.dom.minidom
from optparse import OptionParser, OptionGroup, OptParseError, BadOptionError, OptionError, OptionConflictError, OptionValueError
import re
import libvirt
import fcntl
import time
logpath = "/var/run/cloud/" # FIXME: Logs should reside in /var/log/cloud
lock_file = "/var/lock/cloudstack_security_group.lock"
iptables = Command("iptables")
bash = Command("/bin/bash")
ebtables = Command("ebtables")
@ -36,6 +39,21 @@ cfo = configFileOps("/etc/cloudstack/agent/agent.properties")
hyper = cfo.getEntry("hypervisor.type")
if hyper == "lxc":
driver = "lxc:///"
lock_handle = None
def obtain_file_lock(path):
global lock_handle
try:
lock_handle = open(path, 'w')
fcntl.flock(lock_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
except IOError:
pass
return False
def execute(cmd):
logging.debug(cmd)
return bash("-c", cmd).stdout
@ -303,7 +321,7 @@ def default_network_rules_systemvm(vm_name, localbrname):
for bridge in bridges:
if bridge != localbrname:
if not addFWFramework(bridge):
return False
return False
brfw = getBrfw(bridge)
vifs = getVifsForBridge(vm_name, bridge)
for vif in vifs:
@ -1029,6 +1047,14 @@ if __name__ == '__main__':
sys.exit(1)
cmd = args[0]
logging.debug("Executing command: " + str(cmd))
for i in range(0, 30):
if obtain_file_lock(lock_file) is False:
logging.warn("Lock on %s is being held by other process. Waiting for release." % lock_file)
time.sleep(0.5)
else:
break
if cmd == "can_bridge_firewall":
can_bridge_firewall(args[1])
elif cmd == "default_network_rules":