mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
261 lines
7.4 KiB
Python
Executable File
261 lines
7.4 KiB
Python
Executable File
#!/usr/bin/python
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
|
|
|
|
|
|
__author__ = 'jayapalreddy'
|
|
|
|
from ConfigParser import SafeConfigParser
|
|
from subprocess import *
|
|
from os import path
|
|
import time
|
|
|
|
monitor_log='/var/log/monitor.log'
|
|
class StatusCodes:
|
|
SUCCESS = 0
|
|
FAILED = 1
|
|
INVALID_INP = 2
|
|
RUNNING = 3
|
|
STOPPED = 4
|
|
STARTING = 5
|
|
|
|
class log:
|
|
INFO = 'INFO'
|
|
ALERT = 'ALERT'
|
|
CRIT = 'CRIT'
|
|
NOTIF = 'NOTIF'
|
|
|
|
|
|
|
|
|
|
def getConfig( config_file_path = "/etc/monitor.conf" ):
|
|
process_dict = {}
|
|
parser = SafeConfigParser()
|
|
parser.read( config_file_path )
|
|
|
|
#print 'Read values:\n'
|
|
|
|
for section in parser.sections():
|
|
# print section
|
|
process_dict[section] = {}
|
|
|
|
for name, value in parser.items(section):
|
|
process_dict[section][name] = value
|
|
# print ' %s = %r' % (name, value)
|
|
|
|
return process_dict
|
|
|
|
def printd (msg):
|
|
|
|
return 0
|
|
|
|
f= open(monitor_log,'r+')
|
|
f.seek(0, 2)
|
|
f.write(str(msg)+"\n")
|
|
f.close()
|
|
|
|
def raisealert(severity, msg, process_name=None):
|
|
#timeStr=str(time.ctime())
|
|
if process_name is not None:
|
|
log = '['+severity +']'+" " + '['+process_name+']' + " " + msg +"\n"
|
|
else:
|
|
log = '['+severity+']' + " " + msg +"\n"
|
|
|
|
msg = 'logger -t monit '+ log
|
|
pout = Popen(msg, shell=True, stdout=PIPE)
|
|
|
|
|
|
def isPidMatchPidFile(pidfile, pids):
|
|
|
|
if pids is None or isinstance(pids,list) != True or len(pids) == 0:
|
|
print "Invalid Arguments"
|
|
return StatusCodes.FAILED
|
|
if not path.isfile(pidfile):
|
|
#It seems there is no pid file for this service
|
|
printd("The pid file "+pidfile+" is not there for this process")
|
|
return StatusCodes.FAILED
|
|
|
|
fd=None
|
|
try:
|
|
fd = open(pidfile,'r')
|
|
except:
|
|
printd("pid file: "+ pidfile +" open failed")
|
|
return StatusCodes.FAILED
|
|
|
|
|
|
inp = fd.read()
|
|
printd("file content "+str(inp))
|
|
printd(pids)
|
|
tocheck_pid = inp.strip()
|
|
for item in pids:
|
|
if str(tocheck_pid) == item.strip():
|
|
printd("pid file matched")
|
|
return StatusCodes.SUCCESS
|
|
|
|
fd.close()
|
|
return StatusCodes.FAILED
|
|
|
|
|
|
|
|
def checkProcessStatus( process ):
|
|
process_name = process.get('processname')
|
|
service_name = process.get('servicename')
|
|
pidfile = process.get('pidfile')
|
|
#temp_out = None
|
|
restartFailed=False
|
|
pidFileMatched=1
|
|
cmd=''
|
|
if process_name is None:
|
|
print "\n Invalid Process Name"
|
|
return StatusCodes.INVALID_INP
|
|
else:
|
|
msg="checking the process " + process_name
|
|
printd(msg)
|
|
cmd = 'pidof ' + process_name
|
|
printd(cmd)
|
|
#cmd = 'service ' + process_name + ' status'
|
|
pout = Popen(cmd, shell=True, stdout=PIPE)
|
|
exitStatus = pout.wait()
|
|
temp_out = pout.communicate()[0]
|
|
|
|
#check there is only one pid or not
|
|
if exitStatus == 0:
|
|
msg="pids: " +temp_out;
|
|
printd(msg)
|
|
pids = temp_out.split(' ')
|
|
|
|
#there is more than one process so match the pid file
|
|
#if not matched set pidFileMatched=0
|
|
printd("Checking pid file")
|
|
if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
|
|
pidFileMatched = 1;
|
|
else:
|
|
pidFileMatched = 0;
|
|
|
|
printd(pidFileMatched)
|
|
if exitStatus == 0 and pidFileMatched == 1:
|
|
printd("The process is running ....")
|
|
return StatusCodes.RUNNING
|
|
else:
|
|
printd('exit status:'+str(exitStatus))
|
|
msg="The process " + process_name +" is not running trying recover "
|
|
printd(msg)
|
|
#Retry the process state for few seconds
|
|
for i in range(1,10):
|
|
pout = Popen(cmd, shell=True, stdout=PIPE)
|
|
exitStatus = pout.wait()
|
|
temp_out = pout.communicate()[0]
|
|
|
|
if i < 5: # this is just for trying few more times
|
|
if exitStatus == 0:
|
|
pids = temp_out.split(' ')
|
|
|
|
if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
|
|
pidFileMatched = 1;
|
|
printd("pid file is matched ...")
|
|
raisealert(log.ALERT, "The process detected as running", process_name)
|
|
break
|
|
else:
|
|
printd("pid file is not matched ...")
|
|
pidFileMatched = 0;
|
|
continue
|
|
time.sleep(1)
|
|
else:
|
|
msg="The process " +process_name+" is not running trying recover "
|
|
raisealert(log.INFO,process_name,msg)
|
|
|
|
if service_name == 'apache2':
|
|
# Killing apache2 process with this the main service will not start
|
|
for pid in pids:
|
|
cmd = 'kill -9 '+pid;
|
|
printd(cmd)
|
|
Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
|
|
|
|
cmd = 'service ' + service_name + ' restart'
|
|
|
|
time.sleep(1)
|
|
#return_val= check_call(cmd , shell=True)
|
|
|
|
cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
|
|
return_val = cout.wait()
|
|
|
|
if return_val == 0:
|
|
printd("The process" + process_name +" recovered successfully ")
|
|
msg="The process " +process_name+" is recovered successfully "
|
|
raisealert(log.INFO,msg,process_name)
|
|
|
|
break;
|
|
else:
|
|
#retry restarting the process for few tries
|
|
printd("process restart failing trying again ....")
|
|
restartFailed=True
|
|
time.sleep(1)
|
|
continue
|
|
#for end here
|
|
|
|
if restartFailed == True:
|
|
msg="The process %s recover failed "%process_name
|
|
raisealert(log.ALERT,process_name,msg)
|
|
|
|
printd("Restart failed after number of retries")
|
|
return StatusCodes.STOPPED
|
|
|
|
return StatusCodes.RUNNING
|
|
|
|
def raiseAlert( process_name ):
|
|
print "process name %s is raised "%process_name
|
|
|
|
def monitProcess( processes_info ):
|
|
if len( processes_info ) == 0:
|
|
print "Invalid Input"
|
|
return StatusCodes.INVALID_INP
|
|
for process,properties in processes_info.items():
|
|
if checkProcessStatus( properties) != StatusCodes.RUNNING:
|
|
print "\n Process %s is not Running"%process
|
|
|
|
|
|
def main():
|
|
'''
|
|
Step1 : Get Config
|
|
'''
|
|
|
|
printd("monitoring started")
|
|
temp_dict = getConfig()
|
|
|
|
'''
|
|
Step2: Get Previous Run Log
|
|
'''
|
|
|
|
'''
|
|
Step3: Monitor and Raise Alert
|
|
'''
|
|
#raisealert(log.INFO, 'Monit started')
|
|
monitProcess( temp_dict )
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|