cloudstack/python/bindir/mycloud-setup-agent
2011-05-06 14:53:53 -04:00

888 lines
29 KiB
Python
Executable File

#!/usr/bin/python
from subprocess import PIPE, Popen
from signal import alarm, signal, SIGALRM, SIGKILL
import tempfile
import shutil
import os
import logging
import sys
import re
import traceback
import socket
import uuid
from optparse import OptionParser
class CloudRuntimeException(Exception):
def __init__(self, errMsg):
self.errMsg = errMsg
def __str__(self):
return self.errMsg
def formatExceptionInfo(maxTBlevel=5):
cla, exc, trbk = sys.exc_info()
excTb = traceback.format_tb(trbk, maxTBlevel)
msg = str(exc) + "\n"
for tb in excTb:
msg += tb
return msg
class bash:
def __init__(self, args, timeout=600):
self.args = args
logging.debug("execute:%s"%args)
self.timeout = timeout
self.process = None
self.success = False
self.run()
def run(self):
class Alarm(Exception):
pass
def alarm_handler(signum, frame):
raise Alarm
try:
self.process = Popen(self.args, shell=True, stdout=PIPE, stderr=PIPE)
if self.timeout != -1:
signal(SIGALRM, alarm_handler)
alarm(self.timeout)
try:
self.stdout, self.stderr = self.process.communicate()
if self.timeout != -1:
alarm(0)
except Alarm:
os.kill(self.process.pid, SIGKILL)
raise CloudRuntimeException("Timeout during command execution")
self.success = self.process.returncode == 0
except:
raise CloudRuntimeException(formatExceptionInfo())
if not self.success:
raise CloudRuntimeException(self.getStderr())
def isSuccess(self):
return self.success
def getStdout(self):
return self.stdout.strip("\n")
def getLines(self):
return self.stdout.split("\n")
def getStderr(self):
return self.stderr.strip("\n")
def initLoging(logFile=None):
try:
if logFile is None:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(filename=logFile, level=logging.DEBUG)
except:
logging.basicConfig(level=logging.DEBUG)
class configFileOps():
class entry():
def __init__(self, name, value):
self.name = name
self.value = value
self.state = "new"
def setState(self, state):
self.state = state
def getState(self):
return self.state
def __init__(self, fileName):
self.fileName = fileName
self.entries = []
self.backups = []
def addEntry(self, name, value):
e = self.entry(name, value)
self.entries.append(e)
def getEntry(self, name):
try:
ctx = file(self.fileName).read(-1)
match = re.search("^" + name + ".*", ctx, re.MULTILINE)
if match is None:
return ""
line = match.group(0).split("=")
return line[1]
except:
return ""
def save(self):
fh, abs_path = tempfile.mkstemp()
new_file = open(abs_path, "w")
fp = open(self.fileName, "r")
for line in fp.readlines():
for entry in self.entries:
if line.startswith(entry.name):
line = entry.name + "=" + entry.value + "\n"
entry.setState("set")
new_file.write(line)
for entry in self.entries:
if entry.getState() != "set":
new_file.write("\n" + entry.name + "=" + entry.value + "\n")
new_file.close()
fp.close()
shutil.move(abs_path, self.fileName)
def replace_line(self, startswith,stanza,always_add=False):
lines = [ s.strip() for s in file(self.fileName).readlines() ]
newlines = []
replaced = False
for line in lines:
if re.search(startswith, line):
if stanza is not None:
newlines.append(stanza)
self.backups.append([line, stanza])
replaced = True
else: newlines.append(line)
if not replaced and always_add:
newlines.append(stanza)
self.backups.append([None, stanza])
newlines = [ s + '\n' for s in newlines ]
file(self.fileName,"w").writelines(newlines)
def replace_or_add_line(self, startswith,stanza):
return self.replace_line(startswith,stanza,always_add=True)
def add_lines(self, lines):
fp = file(self.fileName).read(-1)
sh = re.escape(lines)
match = re.search(sh, fp, re.MULTILINE)
if match is not None:
return
fp += lines
file(self.fileName, "w").write(fp)
self.backups.append([None, lines])
def replace_lines(self, src, dst, addToBackup=True):
fp = file(self.fileName).read(-1)
sh = re.escape(src)
if dst is None:
dst = ""
repl,nums = re.subn(sh, dst, fp)
if nums <=0:
return
file(self.fileName, "w").write(repl)
if addToBackup:
self.backups.append([src, dst])
def backup(self):
for oldLine, newLine in self.backups:
self.replace_lines(newLine, oldLine, False)
class networkConfig():
class devInfo():
def __init__(self, macAddr, ipAddr, netmask, gateway, type, name):
self.name = name
self.macAdrr = macAddr
self.ipAddr = ipAddr
self.netmask = netmask
self.gateway = gateway
self.type = type
self.name = name
#dhcp or static
self.method = None
@staticmethod
def getDefaultNetwork():
cmd = bash("route -n|awk \'/^0.0.0.0/ {print $2,$8}\'")
if not cmd.isSuccess():
logging.debug("Failed to get default route")
return None
result = cmd.getStdout().split(" ")
gateway = result[0]
dev = result[1]
pdi = networkConfig.getDevInfo(dev)
logging.debug("Found default network device:%s"%pdi.name)
pdi.gateway = gateway
return pdi
@staticmethod
def createBridge(dev, brName):
if not networkConfig.isBridgeSupported():
logging.debug("bridge is not supported")
return False
if networkConfig.isBridgeEnslavedWithDevices(brName):
logging.debug("bridge: %s has devices enslaved"%brName)
return False
cmds = ""
if not networkConfig.isBridge(brName):
cmds = "brctl addbr %s ;"%brName
cmds += "ifconfig %s up;"%brName
cmds += "brctl addif %s %s"%(brName, dev)
return bash(cmds).isSuccess()
@staticmethod
def isBridgeEnslavedWithDevices(brName):
if not networkConfig.isBridge(brName):
return False
if not os.listdir("/sys/class/net/%s/brif"%brName):
return False
return True
@staticmethod
def isBridgeSupported():
if os.path.exists("/proc/sys/net/bridge"):
return True
return bash("modprobe bridge").isSucess()
@staticmethod
def isNetworkDev(devName):
return os.path.exists("/sys/class/net/%s"%devName)
@staticmethod
def isBridgePort(devName):
return os.path.exists("/sys/class/net/%s/brport"%devName)
@staticmethod
def isBridge(devName):
return os.path.exists("/sys/class/net/%s/bridge"%devName)
@staticmethod
def getBridge(devName):
bridgeName = None
if os.path.exists("/sys/class/net/%s/brport/bridge"%devName):
realPath = os.path.realpath("/sys/class/net/%s/brport/bridge"%devName)
bridgeName = realPath.split("/")[-1]
return bridgeName
@staticmethod
def getEnslavedDev(br, brPort):
if not networkConfig.isBridgeEnslavedWithDevices(br):
return None
for dev in os.listdir("/sys/class/net/%s/brif"%br):
br_port = int(file("/sys/class/net/%s/brif/%s/port_no"%(br,dev)).readline().strip("\n"), 16)
if br_port == brPort:
return dev
return None
@staticmethod
def getDevInfo(dev):
if not networkConfig.isNetworkDev(dev):
logging.debug("dev: " + dev + " is not a network device")
return None
netmask = None
ipAddr = None
macAddr = None
cmd = bash("ifconfig " + dev)
if not cmd.isSuccess():
logging.debug("Failed to get address from ifconfig")
return None
for line in cmd.getLines():
if line.find("HWaddr") != -1:
macAddr = line.split("HWaddr ")[1].strip(" ")
elif line.find("inet ") != -1:
m = re.search("addr:(.*)\ *Bcast:(.*)\ *Mask:(.*)", line)
if m is not None:
ipAddr = m.group(1).rstrip(" ")
netmask = m.group(3).rstrip(" ")
if networkConfig.isBridgePort(dev):
type = "brport"
elif networkConfig.isBridge(dev):
type = "bridge"
else:
type = "dev"
return networkConfig.devInfo(macAddr, ipAddr, netmask, None, type, dev)
class networkConfigUbuntu(networkConfig):
def __init__(self, syscfg):
self.syscfg = syscfg
self.netCfgFile = "/etc/network/interfaces"
self.brName = None
self.dev = None
self.status = None
self.cfoHandlers = []
def getNetworkMethod(self, line):
if line.find("static") != -1:
return "static"
elif line.find("dhcp") != -1:
return "dhcp"
else:
logging.debug("Failed to find the network method from:%s"%line)
return None
def matchEndOfStanzas(self, line):
if line.match("\^ *iface|\^ *mapping|\^ *auto | \^ *allow-") is not None:
return True
else:
return False
def addBridge(self, br, dev):
bash("ifdown %s"%dev.name)
for line in file(self.netCfgFile).readlines():
match = re.match("^ *iface %s.*"%dev.name, line)
if match is not None:
dev.method = self.getNetworkMethod(match.group(0))
bridgeCfg = "\niface %s inet manual\n \
auto %s\n \
iface %s inet %s\n \
bridge_ports %s\n"%(dev.name, br, br, dev.method, dev.name)
cfo = configFileOps(self.netCfgFile)
cfo.replace_line("^ *iface %s.*"%dev.name, bridgeCfg)
self.cfoHandlers.append(cfo)
def addDev(self, br, dev):
logging.debug("Haven't implement yet")
def addBridgeAndDev(self, br, dev):
logging.debug("Haven't implement yet")
def writeToCfgFile(self, br, dev):
cfg = file(self.netCfgFile).read()
ifaceDev = re.search("^ *iface %s.*"%dev.name, cfg, re.MULTILINE)
ifaceBr = re.search("^ *iface %s.*"%br, cfg, re.MULTILINE)
if ifaceDev is not None and ifaceBr is not None:
logging.debug("%s:%s already configured"%(br, dev.name))
return True
elif ifaceDev is not None and ifaceBr is None:
#reconfig bridge
self.addBridge(br, dev)
elif ifaceDev is None and ifaceBr is not None:
#reconfig dev
self.addDev(br, dev)
else:
#both need to be reconfigured
self.addbridgeAndDev(br, dev)
def cfgNetwork(self, dev=None, brName=None):
if dev is None:
device = networkConfig.getDefaultNetwork()
else:
device = networkConfig.getDevInfo(dev)
if device.type == "dev":
#Need to create a bridge on it
if brName is None:
brName = "cloudbr0"
'''
if not networkConfig.createBridge(device.name, brName):
logging.debug("Failed to create bridge:%s on dev %s"%(brName, device.name))
return False
'''
self.writeToCfgFile(brName, device)
elif device.type == "brport":
brName = networkConfig.getBridge(dev)
brDevice = networkConfig.getDevInfo(brName)
self.writeToCfgFile(brDevice, device)
elif device.type == "bridge":
#Fixme, assuming the outgoing physcial device is on port 1
enslavedDev = networkConfig.getEnslavedDev(device.name, 1)
brDevice = device
device = networkConfig.getDevInfo(enslavedDev)
brName = brDevice.name
self.writeToCfgFile(brName, device)
self.brName = brName
self.dev = device.name
def config(self):
writeProgressBar("Configure Network...", None)
try:
self.cfgNetwork(self.syscfg.env.defaultNic)
self.netMgrRunning = self.syscfg.isServiceRunning("network-manager")
if self.netMgrRunning:
self.syscfg.stopService("network-manager")
self.syscfg.disableService("network-manager")
bash("ifup %s"%self.brName)
self.syscfg.env.nics.append(self.brName)
self.syscfg.env.nics.append(self.brName)
self.syscfg.env.nics.append(self.brName)
writeProgressBar(None, True)
self.status = True
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
self.status = False
def restore(self):
if self.status is None:
return
#restore cfg file at first
writeProgressBar("Restoring Network...", None)
for cfo in self.cfoHandlers:
cfo.backup()
try:
if self.netMgrRunning:
self.syscfg.enableService("network-manager")
self.syscfg.startService("network-manager")
bash("/etc/init.d/networking stop")
bash("/etc/init.d/networking start")
writeProgressBar(None, True)
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
class cgroupConfigUbuntu():
def __init__(self, syscfg):
self.syscfg = syscfg
self.backup = []
self.status = None
self.cfoHandlers = []
def config(self):
writeProgressBar("Configure cgroup...", None)
try:
cfo = configFileOps("/etc/cgconfig.conf")
addConfig = "group virt {\n \
cpu {\n \
cpu.shares = 9216;\n \
}\n \
}\n"
cfo.add_lines(addConfig)
self.cfoHandlers.append(cfo)
self.syscfg.stopService("cgconfig")
self.syscfg.enableService("cgconfig",forcestart=True)
cfo = configFileOps("/etc/cgrules.conf")
cfgline = "root:/usr/sbin/libvirtd cpu virt/\n"
cfo.add_lines(cfgline)
self.cfoHandlers.append(cfo)
self.syscfg.stopService("cgred")
self.syscfg.enableService("cgred")
writeProgressBar(None, True)
self.status = True
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
self.status = False
raise CloudRuntimeException("Failed to configure cgroup, please see the /var/log/cloud/setupAgent.log for detail")
def restore(self):
if self.status is None:
return
writeProgressBar("Restoring cgroup...", None)
try:
for cfo in self.cfoHandlers:
cfo.backup()
self.syscfg.stopService("cgconfig")
self.syscfg.enableService("cgconfig",forcestart=True)
self.syscfg.stopService("cgred")
self.syscfg.enableService("cgred")
writeProgressBar(None, True)
except:
writeProgressBar(None, False)
class securityPolicyConfigUbuntu():
def __init__(self, syscfg):
self.syscfg = syscfg
self.status = None
def config(self):
writeProgressBar("Configure Security Policy...", None)
try:
if bash("service apparmor status").getStdout() == "":
self.spRunning = False
return
bash("service apparmor stop")
bash("update-rc.d -f apparmor remove")
self.status = True
writeProgressBar(None, True)
except:
logging.debug(formatExceptionInfo())
self.status = False
writeProgressBar(None, False)
raise CloudRuntimeException("Failed to configure apparmor, please see the /var/log/cloud/setupAgent.log for detail, or you can manually disable it before starting cloudKit")
def restore(self):
if self.status is None:
return
writeProgressBar("Restoring Security Policy...", None)
try:
self.syscfg.enableService("apparmor")
self.syscfg.startService("apparmor")
writeProgressBar(None, True)
except:
writeProgressBar(None, False)
class libvirtConfigUbuntu():
def __init__(self, syscfg):
self.syscfg = syscfg
self.status = None
self.cfoHandlers = []
def setupLiveMigration(self):
stanzas = (
"listen_tcp=1",
'tcp_port="16509"',
'auth_tcp="none"',
"listen_tls=0",
)
cfo = configFileOps("/etc/libvirt/libvirtd.conf")
for stanza in stanzas:
startswith = stanza.split("=")[0] + '='
cfo.replace_or_add_line(startswith,stanza)
self.cfoHandlers.append(cfo)
if os.path.exists("/etc/init/libvirt-bin.conf"):
cfo = configFileOps("/etc/init/libvirt-bin.conf")
cfo.replace_line("exec /usr/sbin/libvirtd","exec /usr/sbin/libvirtd -d -l")
self.cfoHandlers.append(cfo)
else:
cfo = configFileOps("/etc/default/libvirt-bin")
cfo.replace_or_add_line("libvirtd_opts=","libvirtd_opts='-l -d'")
self.cfoHandlers.append(cfo)
def config(self):
writeProgressBar("Configure Libvirt...", None)
try:
cfgline = "export CGROUP_DAEMON='cpu:/virt'"
libvirtfile = "/etc/default/libvirt-bin"
cfo = configFileOps(libvirtfile)
cfo.add_lines(cfgline)
self.cfoHandlers.append(cfo)
self.setupLiveMigration()
cfgline = "cgroup_controllers = [ \"cpu\" ]\n" \
"security_driver = \"none\"\n"
filename = "/etc/libvirt/qemu.conf"
cfo = configFileOps(filename)
cfo.add_lines(cfgline)
self.cfoHandlers.append(cfo)
self.syscfg.stopService("libvirt-bin")
self.syscfg.enableService("libvirt-bin")
writeProgressBar(None, True)
self.status = True
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
self.status = False
raise CloudRuntimeException("Failed to configure libvirt, please see the /var/log/cloud/setupAgent.log for detail")
def restore(self):
if self.status is None:
return
writeProgressBar("Restoring Libvirt...", None)
for cfo in self.cfoHandlers:
cfo.backup()
try:
self.syscfg.stopService("libvirt-bin")
self.syscfg.startService("libvirt-bin")
writeProgressBar(None, True)
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
class firewawllConfigUbuntu():
def __init__(self, syscfg):
self.syscfg = syscfg
self.status = None
def config(self):
writeProgressBar("Configure Firewall...", None)
try:
ports = "22 1798 16509".split()
for p in ports:
bash("ufw allow %s"%p)
bash("ufw allow proto tcp from any to any port 5900:6100")
bash("ufw allow proto tcp from any to any port 49152:49216")
self.syscfg.stopService("ufw")
self.syscfg.startService("ufw")
writeProgressBar(None, True)
self.status = True
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
self.status = False
raise CloudRuntimeException("Failed to configure firewall, please see the /var/log/cloud/setupAgent.log for detail")
def restore(self):
if self.status is None:
return
print "not implemented yet"
return
class cloudKitConfig():
def __init__(self, syscfg):
self.syscfg = syscfg
self.status = None
def config(self):
writeProgressBar("Configure CloudKit...", None)
try:
cfo = configFileOps("/etc/cloud/agent/agent.properties")
cfo.addEntry("host", self.syscfg.env.mgtSvr)
cfo.addEntry("zone", self.syscfg.env.zoneToken)
cfo.addEntry("port", "443")
cfo.addEntry("private.network.device", self.syscfg.env.nics[0])
cfo.addEntry("public.network.device", self.syscfg.env.nics[1])
cfo.addEntry("guest.network.device", self.syscfg.env.nics[2])
cfo.addEntry("guid", str(self.syscfg.env.uuid))
cfo.addEntry("mount.path", "/mnt")
cfo.addEntry("resource", "com.cloud.storage.resource.LocalSecondaryStorageResource|com.cloud.agent.resource.computing.CloudZonesComputingResource")
cfo.save()
self.syscfg.stopService("cloud-agent")
self.syscfg.startService("cloud-agent")
writeProgressBar(None, True)
self.status = True
except:
logging.debug(formatExceptionInfo())
writeProgressBar(None, False)
self.status = False
raise CloudRuntimeException("Failed to configure cloudKit, please see the /var/log/cloud/setupAgent.log for detail")
def restore(self):
pass
Unknown = 0
Fedora = 1
CentOS = 2
RHEL6 = 3
RHEL5 = 4
Ubuntu = 5
class DistributionDetector():
def __init__(self):
self.distro = Unknown
if os.path.exists("/etc/fedora-release"):
self.distro = Fedora
elif os.path.exists("/etc/centos-release"):
self.distro = CentOS
elif os.path.exists("/etc/redhat-release"):
version = file("/etc/redhat-release").readline()
if version.find("Red Hat Enterprise Linux Server release 6") != -1:
self.distro = RHEL6
elif version.find("CentOS release") != -1:
self.distro = CentOS
else:
self.distro = RHEL5
elif os.path.exists("/etc/legal") and "Ubuntu" in file("/etc/legal").read(-1):
self.distro = Ubuntu
else:
self.distro = Unknown
def getVersion(self):
return self.distro
class sysConfig(object):
@staticmethod
def getSysConfigFactory(glbEnv):
distribution = DistributionDetector().getVersion()
if distribution == Ubuntu:
return sysConfigUbuntu(glbEnv)
else:
return sysConfig()
def config(self):
pass
def restore(self):
pass
class sysConfigUbuntu(sysConfig):
def __init__(self, glbEnv):
self.env = glbEnv
self.services = [cgroupConfigUbuntu(self),
securityPolicyConfigUbuntu(self),
networkConfigUbuntu(self),
libvirtConfigUbuntu(self),
firewawllConfigUbuntu(self),
cloudKitConfig(self)]
def config(self):
if not self.check():
return False
for service in self.services:
service.config()
def restore(self):
for service in self.services:
service.restore()
def check(self):
try:
return self.isKVMEnabled()
except:
raise CloudRuntimeException("Checking KVM...[Failed]\nPlease enable KVM on this machine\n")
def isServiceRunning(self, servicename):
try:
o = bash("service " + servicename + " status")
if "start/running" in o.getStdout():
return True
else:
return False
except:
return False
def stopService(self, servicename,force=False):
if self.isServiceRunning(servicename) or force:
return bash("service " + servicename +" stop").isSuccess()
def disableService(self, servicename):
self.stopService(servicename)
bash("update-rc.d -f " + servicename + " remove")
def startService(self, servicename,force=False):
if not self.isServiceRunning(servicename) or force:
bash("service " + servicename + " start")
def enableService(self, servicename,forcestart=False):
bash("update-rc.d -f " + servicename + " remove")
bash("update-rc.d -f " + servicename + " start 2 3 4 5 .")
self.startService(servicename,force=forcestart)
def isKVMEnabled(self):
return bash("kvm-ok").isSuccess()
def getUserInputs():
print "Welcome to CloudKit Setup:"
cfo = configFileOps("/etc/cloud/agent/agent.properties")
oldMgt = cfo.getEntry("host")
mgtSvr = raw_input("Please input the Management Server Name/IP:[%s]"%oldMgt)
if mgtSvr == "":
mgtSvr = oldMgt
try:
socket.getaddrinfo(mgtSvr, 443)
except:
print "Failed to resolve %s. Please input correct server name or IP."%mgtSvr
exit(1)
oldToken = cfo.getEntry("zone")
zoneToken = raw_input("Please input the Zone Token:[%s]"%oldToken)
if zoneToken == "":
zoneToken = oldToken
try:
defaultNic = networkConfig.getDefaultNetwork()
except:
print "Failed to get default route. Please configure your network to have a default route"
exit(1)
defNic = defaultNic.name
network = raw_input("Please choose which network used to create VM:[%s]"%defNic)
if network == "":
if defNic == "":
print "You need to specifiy one of Nic or bridge on your system"
exit(1)
elif network == "":
network = defNic
return [mgtSvr,zoneToken, network]
def writeProgressBar(msg, result):
if msg is not None:
output = "%-30s"%msg
elif result is True:
output = "[%-2s]\n"%"OK"
elif result is False:
output = "[%-6s]\n"%"Failed"
sys.stdout.write(output)
sys.stdout.flush()
class globalEnv():
pass
if __name__ == '__main__':
#todo: check executing permission
initLoging("/var/log/cloud/setupAgent.log")
glbEnv = globalEnv()
parser = OptionParser()
parser.add_option("-y", "--yes", action="store_true", dest="auto", help="answer yes for all questions")
parser.add_option("-m", "--mgtSvr", dest="mgt", help="myCloud management server name or IP")
parser.add_option("-z", "--zone-token", dest="zone", help="zone token")
(options, args) = parser.parse_args()
if options.auto is None:
userInputs = getUserInputs()
glbEnv.mgtSvr = userInputs[0]
glbEnv.zoneToken = userInputs[1]
glbEnv.defaultNic = userInputs[2]
else:
if options.mgt is None or options.zone is None:
print "myCloud mgt server or zone token can not be NULL"
sys.exit(1)
glbEnv.mgtSvr = options.mgt
glbEnv.zoneToken = options.zone
try:
defaultNic = networkConfig.getDefaultNetwork()
glbEnv.defaultNic = defaultNic.name
except:
print "Failed to get default route. Please configure your network to have a default route"
sys.exit(2)
glbEnv.nics = []
#generate UUID
glbEnv.uuid = configFileOps("/etc/cloud/agent/agent.properties").getEntry("guid")
if glbEnv.uuid == "":
glbEnv.uuid = uuid.uuid1()
print "Starting to configure your system:"
syscfg = sysConfig.getSysConfigFactory(glbEnv)
try:
syscfg.config()
print "Cloudkit setup is Done!"
except CloudRuntimeException, e:
print e
print "Try to restore your system:"
try:
syscfg.restore()
except:
pass