Merge pull request #1408 from wido/security-group-lock

kvm: Aqcuire lock when running security group Python scriptIt could happen that when multiple instances are starting at the same
time on a KVM host the Agent spawns multiple instances of security_group.py
which both try to modify iptables/ebtables rules.

This fails with on of the two processes failing.

The instance is still started, but it doesn't have any IP connectivity due
to the failed programming of the security groups.

This modification lets the script aqcuire a exclusive lock on a file so that
only one instance of the scripts talks to iptables/ebtables at once.

Other instances of the script which start will poll every 500ms if they can
obtain the lock and otherwise execute anyway after 15 seconds.

* pr/1408:
  kvm: Aqcuire lock when running security group Python script

Signed-off-by: Will Stevens <williamstevens@gmail.com>
This commit is contained in:
Will Stevens 2016-05-04 10:32:47 -04:00
commit 62d2954f51

View File

@ -26,8 +26,11 @@ import xml.dom.minidom
from optparse import OptionParser, OptionGroup, OptParseError, BadOptionError, OptionError, OptionConflictError, OptionValueError
import re
import libvirt
import fcntl
import time
logpath = "/var/run/cloud/" # FIXME: Logs should reside in /var/log/cloud
lock_file = "/var/lock/cloudstack_security_group.lock"
iptables = Command("iptables")
bash = Command("/bin/bash")
ebtables = Command("ebtables")
@ -36,6 +39,21 @@ cfo = configFileOps("/etc/cloudstack/agent/agent.properties")
hyper = cfo.getEntry("hypervisor.type")
if hyper == "lxc":
driver = "lxc:///"
lock_handle = None
def obtain_file_lock(path):
global lock_handle
try:
lock_handle = open(path, 'w')
fcntl.flock(lock_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
except IOError:
pass
return False
def execute(cmd):
logging.debug(cmd)
return bash("-c", cmd).stdout
@ -1029,6 +1047,14 @@ if __name__ == '__main__':
sys.exit(1)
cmd = args[0]
logging.debug("Executing command: " + str(cmd))
for i in range(0, 30):
if obtain_file_lock(lock_file) is False:
logging.warn("Lock on %s is being held by other process. Waiting for release." % lock_file)
time.sleep(0.5)
else:
break
if cmd == "can_bridge_firewall":
can_bridge_firewall(args[1])
elif cmd == "default_network_rules":