mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
kvm: Aqcuire lock when running security group Python script
It could happen that when multiple instances are starting at the same time on a KVM host the Agent spawns multiple instances of security_group.py which both try to modify iptables/ebtables rules. This fails with on of the two processes failing. The instance is still started, but it doesn't have any IP connectivity due to the failed programming of the security groups. This modification lets the script aqcuire a exclusive lock on a file so that only one instance of the scripts talks to iptables/ebtables at once. Other instances of the script which start will poll every 500ms if they can obtain the lock and otherwise execute anyway after 15 seconds. The lock will be released as soon as the script exists, which is usually within a few hundred ms.
This commit is contained in:
parent
7017a829ea
commit
26becef722
@ -26,8 +26,11 @@ import xml.dom.minidom
|
||||
from optparse import OptionParser, OptionGroup, OptParseError, BadOptionError, OptionError, OptionConflictError, OptionValueError
|
||||
import re
|
||||
import libvirt
|
||||
import fcntl
|
||||
import time
|
||||
|
||||
logpath = "/var/run/cloud/" # FIXME: Logs should reside in /var/log/cloud
|
||||
lock_file = "/var/lock/cloudstack_security_group.lock"
|
||||
iptables = Command("iptables")
|
||||
bash = Command("/bin/bash")
|
||||
ebtables = Command("ebtables")
|
||||
@ -36,6 +39,21 @@ cfo = configFileOps("/etc/cloudstack/agent/agent.properties")
|
||||
hyper = cfo.getEntry("hypervisor.type")
|
||||
if hyper == "lxc":
|
||||
driver = "lxc:///"
|
||||
|
||||
lock_handle = None
|
||||
|
||||
def obtain_file_lock(path):
|
||||
global lock_handle
|
||||
|
||||
try:
|
||||
lock_handle = open(path, 'w')
|
||||
fcntl.flock(lock_handle, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return True
|
||||
except IOError:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
def execute(cmd):
|
||||
logging.debug(cmd)
|
||||
return bash("-c", cmd).stdout
|
||||
@ -303,7 +321,7 @@ def default_network_rules_systemvm(vm_name, localbrname):
|
||||
for bridge in bridges:
|
||||
if bridge != localbrname:
|
||||
if not addFWFramework(bridge):
|
||||
return False
|
||||
return False
|
||||
brfw = getBrfw(bridge)
|
||||
vifs = getVifsForBridge(vm_name, bridge)
|
||||
for vif in vifs:
|
||||
@ -1029,6 +1047,14 @@ if __name__ == '__main__':
|
||||
sys.exit(1)
|
||||
cmd = args[0]
|
||||
logging.debug("Executing command: " + str(cmd))
|
||||
|
||||
for i in range(0, 30):
|
||||
if obtain_file_lock(lock_file) is False:
|
||||
logging.warn("Lock on %s is being held by other process. Waiting for release." % lock_file)
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
break
|
||||
|
||||
if cmd == "can_bridge_firewall":
|
||||
can_bridge_firewall(args[1])
|
||||
elif cmd == "default_network_rules":
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user