mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
260 lines
8.7 KiB
Python
Executable File
260 lines
8.7 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# TODO:
|
|
# add multipath -ll
|
|
# add iscsiadm output
|
|
#
|
|
import time
|
|
import socket
|
|
import getopt
|
|
import sys
|
|
import subprocess, threading
|
|
import logging
|
|
import logging.handlers
|
|
import re
|
|
import shutil
|
|
import os
|
|
|
|
""" a class to do checks with as a thread so we can have nice timeouts """
|
|
class Check(object):
|
|
def __init__(self, cmd="", failcmd="", primary="",
|
|
file="", timeout="120", interval=1, logger="",
|
|
check=False):
|
|
self.file=file
|
|
self.cmd=cmd
|
|
self.failcmd=failcmd
|
|
self.primary=primary
|
|
self.timeout=timeout
|
|
self.interval=interval
|
|
self.process=None
|
|
self.logger=logger
|
|
self.check=check
|
|
self.ok=None
|
|
self.results={}
|
|
|
|
def readhb(self,file=""):
|
|
if os.path.isfile(file):
|
|
text_file = open("%s" % file, "r")
|
|
line=text_file.readline()
|
|
text_file.close()
|
|
return line
|
|
return 0
|
|
|
|
def writehb(self,file=""):
|
|
if file:
|
|
nfile="%s.new" % (file)
|
|
epoch=time.time()
|
|
text_file = open("%s" % nfile, "w")
|
|
text_file.write("%s" % epoch)
|
|
text_file.close()
|
|
shutil.move(nfile,file)
|
|
self.logger.debug('Worked on file %s for %s' %
|
|
(file, (time.time() - epoch)))
|
|
|
|
""" We only want mounted nfs filesystems """
|
|
def nfsoutput(self):
|
|
command="mount -v -t nfs"
|
|
p=subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
|
|
lines=map(lambda line: line.split()[2], p.stdout.readlines())
|
|
test=re.compile("^%s" % (primary))
|
|
lines=filter(test.search, lines)
|
|
return lines
|
|
|
|
"""
|
|
The main run for all checks we do,
|
|
everything is in here on purpose.
|
|
|
|
the other FSs to heartbeat should be added to filesystems...!
|
|
"""
|
|
def run(self, timeout):
|
|
def target():
|
|
filesystems=[]
|
|
filesystems.extend(self.nfsoutput())
|
|
for fs in filesystems:
|
|
if self.file:
|
|
if self.check==False:
|
|
self.writehb("%s/%s" % (fs,file))
|
|
else:
|
|
res=self.readhb("%s/%s" % (fs,file))
|
|
delay = time.time() - float(res)
|
|
if (delay < timeout) and self.ok == None:
|
|
self.logger.info("%s/%s is ok %s with %s" % (fs,file,timeout,delay))
|
|
self.ok = True
|
|
elif (delay > timeout):
|
|
self.logger.warning("%s/%s exceeded timeout %s with %s" % (fs,file,timeout, delay))
|
|
self.ok = False
|
|
self.results[fs] = [self.ok, delay]
|
|
|
|
epoch=time.time()
|
|
if self.cmd:
|
|
self.logger.debug('Executing: %s' % (cmd))
|
|
self.process = subprocess.Popen(self.cmd, shell=True)
|
|
self.process.communicate()
|
|
self.logger.info('Executed: %s in %s' %
|
|
(cmd, (time.time() - epoch)))
|
|
|
|
thread = threading.Thread(target=target)
|
|
thread.start()
|
|
thread.join(self.timeout)
|
|
if thread.isAlive() and self.check == False:
|
|
self.logger.critical('Critical: thread timeout; %s' % (timeout))
|
|
if self.failcmd:
|
|
self.logger.critical('Critical: executing; %s' % (failcmd))
|
|
p=subprocess.Popen(failcmd, shell=True, stdout=subprocess.PIPE)
|
|
|
|
""" here we figure out what we're running on more or less """
|
|
def figureOutPrimary():
|
|
redhat="/etc/redhat-release"
|
|
if os.path.isfile(redhat):
|
|
for line in open(redhat):
|
|
if "XenServer" in line:
|
|
return "/var/run/sr-mount"
|
|
if "Oracle VM server" in line:
|
|
return "/OVS/Repositories/"
|
|
print "Unknown hypervisor, consider adding it, exiting"
|
|
sys.exit(42)
|
|
|
|
""" The logger is here """
|
|
def Logger(level=logging.DEBUG):
|
|
logger = logging.getLogger('cs-heartbeat')
|
|
logger.setLevel(level)
|
|
handler = logging.handlers.SysLogHandler(address = '/dev/log')
|
|
logger.addHandler(handler)
|
|
return logger
|
|
|
|
""" main for preso-dent """
|
|
if __name__ == '__main__':
|
|
me=os.path.basename(__file__)
|
|
timeout=120
|
|
interval=1
|
|
hostname=socket.gethostname()
|
|
file=".hb-%s" % (hostname)
|
|
cmd=""
|
|
level=logging.DEBUG
|
|
primary=""
|
|
checkstate=False
|
|
failcmd=("echo 1 > /proc/sys/kernel/sysrq "
|
|
"&& "
|
|
"echo c > /proc/sysrq-trigger")
|
|
|
|
# xenserver:
|
|
if me == "heartbeat":
|
|
# String result = callHostPluginPremium(conn, "heartbeat",
|
|
# "host", _host.uuid,
|
|
# "timeout", Integer.toString(_heartbeatTimeout),
|
|
# "interval", Integer.toString(_heartbeatInterval));
|
|
# if (result == null || !result.contains("> DONE <")) {
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "h:y:i:s",
|
|
[ 'host', 'timeout', 'interval', 'state'])
|
|
except getopt.GetoptError:
|
|
print """Usage:
|
|
host: host guid.
|
|
timeout: timeout to fail on
|
|
interval: time between checks
|
|
state: check the state"""
|
|
sys.exit()
|
|
for o, a in opts:
|
|
if o in ('host'):
|
|
file="hb-%s" % (a)
|
|
if o in ('timeout'):
|
|
timeout=a
|
|
if o in ('interval'):
|
|
interval=a
|
|
if o in ('state'):
|
|
checkstate=True
|
|
# OVM3:
|
|
else:
|
|
# get options
|
|
try:
|
|
opts, args = getopt.getopt(sys.argv[1:], "g:p:f:c:t:i:s",
|
|
[ 'guid=', 'primary=','failcmd=','cmd=','timeout=','interval', 'state'])
|
|
except getopt.GetoptError:
|
|
print """Usage:
|
|
--guid|-g: guid of the host to check
|
|
--primary|-p: match for primary storage to monitor.
|
|
--failcmd|-f: executed on timeout.
|
|
--cmd|-c: command to execute next to hb file(s) on primary.
|
|
--timeout|-t: excute failcmd after timeout(s) is hit.
|
|
--interval|-i: run the checks every %ss>
|
|
--state|-s check state"""
|
|
sys.exit()
|
|
|
|
for o, a in opts:
|
|
if o in ('-g', '--guid'):
|
|
file=".hb-%s" % (a)
|
|
if o in ('-p', '--primary'):
|
|
primary=a
|
|
if o in ('-f', '--failcmd'):
|
|
failcmd=a
|
|
if o in ('-c', '--cmd'):
|
|
cmd=a
|
|
if o in ('-t', '--timeout'):
|
|
timeout=int(a)
|
|
if o in ('-i', '--interval'):
|
|
interval=int(a)
|
|
if o in ('-s', '--state'):
|
|
checkstate=True
|
|
|
|
if primary == "":
|
|
primary=figureOutPrimary()
|
|
|
|
logger=Logger(level=level)
|
|
if checkstate == False:
|
|
os.chdir("/")
|
|
# os.setsid()
|
|
os.umask(0)
|
|
try:
|
|
pid = os.fork()
|
|
if pid > 0:
|
|
# exit first parent
|
|
if me == "heartbeat":
|
|
print "> DONE <"
|
|
sys.exit(0)
|
|
except OSError, e:
|
|
print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror)
|
|
sys.exit(1)
|
|
|
|
checker=Check(cmd=cmd,
|
|
failcmd=failcmd,
|
|
file=file,
|
|
timeout=timeout,
|
|
interval=interval,
|
|
logger=logger,
|
|
check=checkstate);
|
|
|
|
while True:
|
|
start=time.time()
|
|
checker.run(timeout)
|
|
runtime=time.time() - start
|
|
logger.debug("cmd time: %s" % (runtime))
|
|
if checkstate:
|
|
for fs in checker.results:
|
|
print "%s: %s" % (fs, checker.results[fs])
|
|
if checker.ok == False:
|
|
sys.exit(1)
|
|
else:
|
|
sys.exit(0)
|
|
if runtime > interval:
|
|
logger.warning('Warning: runtime %s bigger than interval %s' %
|
|
(runtime, interval))
|
|
else:
|
|
time.sleep(interval)
|