mirror of
				https://github.com/apache/cloudstack.git
				synced 2025-10-26 08:42:29 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			261 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			261 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/python
 | |
| # Licensed to the Apache Software Foundation (ASF) under one
 | |
| # or more contributor license agreements.  See the NOTICE file
 | |
| # distributed with this work for additional information
 | |
| # regarding copyright ownership.  The ASF licenses this file
 | |
| # to you under the Apache License, Version 2.0 (the
 | |
| # "License"); you may not use this file except in compliance
 | |
| # with the License.  You may obtain a copy of the License at
 | |
| #
 | |
| #   http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing,
 | |
| # software distributed under the License is distributed on an
 | |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 | |
| # KIND, either express or implied.  See the License for the
 | |
| # specific language governing permissions and limitations
 | |
| # under the License.
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| __author__ = 'jayapalreddy'
 | |
| 
 | |
| from ConfigParser import SafeConfigParser
 | |
| from subprocess import *
 | |
| from os import path
 | |
| import time
 | |
| 
 | |
| monitor_log='/var/log/monitor.log'
 | |
| class StatusCodes:
 | |
|     SUCCESS      = 0
 | |
|     FAILED       = 1
 | |
|     INVALID_INP  = 2
 | |
|     RUNNING      = 3
 | |
|     STOPPED      = 4
 | |
|     STARTING     = 5
 | |
| 
 | |
| class log:
 | |
|     INFO = 'INFO'
 | |
|     ALERT = 'ALERT'
 | |
|     CRIT  = 'CRIT'
 | |
|     NOTIF = 'NOTIF'
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| def getConfig( config_file_path = "/etc/monitor.conf" ):
 | |
|     process_dict = {}
 | |
|     parser = SafeConfigParser()
 | |
|     parser.read( config_file_path )
 | |
| 
 | |
|     #print 'Read values:\n'
 | |
| 
 | |
|     for section in parser.sections():
 | |
|         #   print section
 | |
|         process_dict[section] = {}
 | |
| 
 | |
|         for name, value in parser.items(section):
 | |
|             process_dict[section][name] = value
 | |
| #           print '  %s = %r' % (name, value)
 | |
| 
 | |
|     return  process_dict
 | |
| 
 | |
| def printd (msg):
 | |
| 
 | |
|     return 0
 | |
| 
 | |
|     f= open(monitor_log,'r+')
 | |
|     f.seek(0, 2)
 | |
|     f.write(str(msg)+"\n")
 | |
|     f.close()
 | |
| 
 | |
| def raisealert(severity, msg, process_name=None):
 | |
|     #timeStr=str(time.ctime())
 | |
|     if process_name is not None:
 | |
|         log = '['+severity +']'+" " + '['+process_name+']' + " " + msg +"\n"
 | |
|     else:
 | |
|         log = '['+severity+']' + " " + msg +"\n"
 | |
| 
 | |
|     msg = 'logger -t monit '+ log
 | |
|     pout = Popen(msg, shell=True, stdout=PIPE)
 | |
| 
 | |
| 
 | |
| def isPidMatchPidFile(pidfile, pids):
 | |
| 
 | |
|     if pids is None or isinstance(pids,list) != True or len(pids) == 0:
 | |
|         print "Invalid Arguments"
 | |
|         return StatusCodes.FAILED
 | |
|     if not path.isfile(pidfile):
 | |
|         #It seems there is no pid file for this service
 | |
|         printd("The pid file "+pidfile+" is not there for this process")
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
|     fd=None
 | |
|     try:
 | |
|         fd = open(pidfile,'r')
 | |
|     except:
 | |
|         printd("pid file: "+ pidfile +" open failed")
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
| 
 | |
|     inp = fd.read()
 | |
|     printd("file content "+str(inp))
 | |
|     printd(pids)
 | |
|     tocheck_pid  =  inp.strip()
 | |
|     for item in pids:
 | |
|         if str(tocheck_pid) ==  item.strip():
 | |
|             printd("pid file matched")
 | |
|             return StatusCodes.SUCCESS
 | |
| 
 | |
|     fd.close()
 | |
|     return StatusCodes.FAILED
 | |
| 
 | |
| 
 | |
| 
 | |
| def checkProcessStatus( process ):
 | |
|     process_name = process.get('processname')
 | |
|     service_name = process.get('servicename')
 | |
|     pidfile = process.get('pidfile')
 | |
|     #temp_out = None
 | |
|     restartFailed=False
 | |
|     pidFileMatched=1
 | |
|     cmd=''
 | |
|     if process_name is None:
 | |
|         print "\n Invalid Process Name"
 | |
|         return StatusCodes.INVALID_INP
 | |
|     else:
 | |
|         msg="checking the process " + process_name
 | |
|         printd(msg)
 | |
|         cmd = 'pidof ' + process_name
 | |
|         printd(cmd)
 | |
|         #cmd = 'service ' + process_name + ' status'
 | |
|         pout = Popen(cmd, shell=True, stdout=PIPE)
 | |
|         exitStatus = pout.wait()
 | |
|         temp_out = pout.communicate()[0]
 | |
| 
 | |
|     #check there is only one pid or not
 | |
|     if exitStatus == 0:
 | |
|         msg="pids: " +temp_out;
 | |
|         printd(msg)
 | |
|         pids = temp_out.split(' ')
 | |
| 
 | |
|         #there is more than one process so match the pid file
 | |
|         #if not matched set pidFileMatched=0
 | |
|         printd("Checking pid file")
 | |
|         if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
 | |
|             pidFileMatched = 1;
 | |
|         else:
 | |
|             pidFileMatched = 0;
 | |
| 
 | |
|     printd(pidFileMatched)
 | |
|     if exitStatus == 0 and pidFileMatched == 1:
 | |
|         printd("The process is running ....")
 | |
|         return  StatusCodes.RUNNING
 | |
|     else:
 | |
|         printd('exit status:'+str(exitStatus))
 | |
|         msg="The process " + process_name +" is not running trying recover "
 | |
|         printd(msg)
 | |
|         #Retry the process state for few seconds
 | |
|         for i in range(1,10):
 | |
|             pout = Popen(cmd, shell=True, stdout=PIPE)
 | |
|             exitStatus = pout.wait()
 | |
|             temp_out = pout.communicate()[0]
 | |
| 
 | |
|             if i < 5: # this is just for trying few more times
 | |
|                 if exitStatus == 0:
 | |
|                     pids = temp_out.split(' ')
 | |
| 
 | |
|                     if isPidMatchPidFile(pidfile, pids) == StatusCodes.SUCCESS:
 | |
|                         pidFileMatched = 1;
 | |
|                         printd("pid file is matched ...")
 | |
|                         raisealert(log.ALERT, "The process detected as running", process_name)
 | |
|                         break
 | |
|                     else:
 | |
|                         printd("pid file is not matched ...")
 | |
|                         pidFileMatched = 0;
 | |
|                         continue
 | |
|                     time.sleep(1)
 | |
|             else:
 | |
|                 msg="The process " +process_name+" is not running trying recover "
 | |
|                 raisealert(log.INFO,process_name,msg)
 | |
| 
 | |
|                 if service_name == 'apache2':
 | |
|                     # Killing apache2 process with this the main service will not start
 | |
|                     for pid in pids:
 | |
|                         cmd = 'kill -9 '+pid;
 | |
|                         printd(cmd)
 | |
|                         Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
 | |
| 
 | |
|                 cmd = 'service ' + service_name + ' restart'
 | |
| 
 | |
|                 time.sleep(1)
 | |
|                 #return_val= check_call(cmd , shell=True)
 | |
| 
 | |
|                 cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
 | |
|                 return_val = cout.wait()
 | |
| 
 | |
|                 if return_val == 0:
 | |
|                     printd("The process" + process_name +" recovered successfully ")
 | |
|                     msg="The process " +process_name+" is recovered successfully "
 | |
|                     raisealert(log.INFO,msg,process_name)
 | |
| 
 | |
|                     break;
 | |
|                 else:
 | |
|                     #retry restarting the process for few tries
 | |
|                     printd("process restart failing trying again ....")
 | |
|                     restartFailed=True
 | |
|                     time.sleep(1)
 | |
|                     continue
 | |
|         #for end here
 | |
| 
 | |
|         if restartFailed == True:
 | |
|             msg="The process %s recover failed "%process_name
 | |
|             raisealert(log.ALERT,process_name,msg)
 | |
| 
 | |
|             printd("Restart failed after number of retries")
 | |
|             return StatusCodes.STOPPED
 | |
| 
 | |
|     return  StatusCodes.RUNNING
 | |
| 
 | |
| def raiseAlert( process_name ):
 | |
|     print "process name %s is raised "%process_name
 | |
| 
 | |
| def monitProcess( processes_info ):
 | |
|     if len( processes_info ) == 0:
 | |
|         print "Invalid Input"
 | |
|         return  StatusCodes.INVALID_INP
 | |
|     for process,properties in processes_info.items():
 | |
|         if checkProcessStatus( properties) != StatusCodes.RUNNING:
 | |
|             print "\n Process %s is not Running"%process
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     '''
 | |
|     Step1 : Get Config
 | |
|     '''
 | |
| 
 | |
|     printd("monitoring started")
 | |
|     temp_dict  = getConfig()
 | |
| 
 | |
|     '''
 | |
|     Step2: Get Previous Run Log
 | |
|     '''
 | |
| 
 | |
|     '''
 | |
|     Step3: Monitor and Raise Alert
 | |
|     '''
 | |
|     #raisealert(log.INFO, 'Monit started')
 | |
|     monitProcess( temp_dict )
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 |