mirror of
				https://github.com/apache/cloudstack.git
				synced 2025-10-26 08:42:29 +01:00 
			
		
		
		
	- Refactors and simplifies systemvm codebase file structures keeping the same resultant systemvm.iso packaging - Password server systemd script and new postinit script that runs before sshd starts - Fixes to keepalived and conntrackd config to make rVRs work again - New /etc/issue featuring ascii based cloudmonkey logo/message and systemvmtemplate version - SystemVM python codebase linted and tested. Added pylint/pep to Travis. - iptables re-application fixes for non-VR systemvms. - SystemVM template build fixes. - Default secondary storage vm service offering boosted to have 2vCPUs and RAM equal to console proxy. - Fixes to several marvin based smoke tests, especially rVR related tests. rVR tests to consider 3*advert_int+skew timeout before status is checked. Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
		
			
				
	
	
		
			381 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			381 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/python
 | |
| # Licensed to the Apache Software Foundation (ASF) under one
 | |
| # or more contributor license agreements.  See the NOTICE file
 | |
| # distributed with this work for additional information
 | |
| # regarding copyright ownership.  The ASF licenses this file
 | |
| # to you under the Apache License, Version 2.0 (the
 | |
| # "License"); you may not use this file except in compliance
 | |
| # with the License.  You may obtain a copy of the License at
 | |
| #
 | |
| #   http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing,
 | |
| # software distributed under the License is distributed on an
 | |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 | |
| # KIND, either express or implied.  See the License for the
 | |
| # specific language governing permissions and limitations
 | |
| # under the License.
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| from ConfigParser import SafeConfigParser
 | |
| from subprocess import *
 | |
| from os import path
 | |
| import time
 | |
| import os
 | |
| import logging
 | |
| 
 | |
| class StatusCodes:
 | |
|     SUCCESS      = 0
 | |
|     FAILED       = 1
 | |
|     INVALID_INP  = 2
 | |
|     RUNNING      = 3
 | |
|     STOPPED      = 4
 | |
|     STARTING     = 5
 | |
| 
 | |
| class Log:
 | |
|     INFO = 'INFO'
 | |
|     ALERT = 'ALERT'
 | |
|     CRIT  = 'CRIT'
 | |
|     NOTIF = 'NOTIF'
 | |
| 
 | |
| class Config:
 | |
|     MONIT_AFTER_MINS = 30
 | |
|     SLEEP_SEC = 1
 | |
|     RETRY_ITERATIONS = 10
 | |
|     RETRY_FOR_RESTART = 5
 | |
|     MONITOR_LOG = '/var/log/monitor.log'
 | |
|     UNMONIT_PS_FILE = '/etc/unmonit_psList.txt'
 | |
| 
 | |
| 
 | |
| def getConfig( config_file_path = "/etc/monitor.conf" ):
 | |
|     """
 | |
|     Reads the process configuration from the config file.
 | |
|     Config file contains the processes to be monitored.
 | |
| 
 | |
|     """
 | |
|     process_dict = {}
 | |
|     parser = SafeConfigParser()
 | |
|     parser.read( config_file_path )
 | |
| 
 | |
| 
 | |
|     for section in parser.sections():
 | |
|         process_dict[section] = {}
 | |
| 
 | |
|         for name, value in parser.items(section):
 | |
|             process_dict[section][name] = value
 | |
| #           printd (" %s = %r" % (name, value))
 | |
| 
 | |
|     return  process_dict
 | |
| 
 | |
| def printd (msg):
 | |
|     """
 | |
|     prints the debug messages
 | |
|     """
 | |
| 
 | |
|     #for debug
 | |
|     #print msg
 | |
|     return 0
 | |
| 
 | |
|     f= open(Config.MONITOR_LOG,'r+')
 | |
|     f.seek(0, 2)
 | |
|     f.write(str(msg)+"\n")
 | |
|     f.close()
 | |
| 
 | |
| def raisealert(severity, msg, process_name=None):
 | |
|     """ Writes the alert message"""
 | |
| 
 | |
|     #timeStr=str(time.ctime())
 | |
|     if process_name is not None:
 | |
|         log = '['+severity +']'+" " + '['+process_name+']' + " " + msg +"\n"
 | |
|     else:
 | |
|         log = '['+severity+']' + " " + msg +"\n"
 | |
| 
 | |
|     logging.basicConfig(level=logging.INFO,filename='/var/log/routerServiceMonitor.log',format='%(asctime)s %(message)s')
 | |
|     logging.info(log)
 | |
|     msg = 'logger -t monit '+ log
 | |
|     pout = Popen(msg, shell=True, stdout=PIPE)
 | |
| 
 | |
| 
 | |
| def isPidMatchPidFile(pidfile, pids):
 | |
|     """ Compares the running process pid with the pid in pid file.
 | |
|         If a process with multiple pids then it matches with pid file
 | |
|     """
 | |
| 
 | |
|     if pids is None or isinstance(pids,list) != True or len(pids) == 0:
 | |
|         printd ("Invalid Arguments")
 | |
|         return StatusCodes.FAILED
 | |
|     if not path.isfile(pidfile):
 | |
|         #It seems there is no pid file for this service
 | |
|         printd("The pid file "+pidfile+" is not there for this process")
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
|     fd=None
 | |
|     try:
 | |
|         fd = open(pidfile,'r')
 | |
|     except:
 | |
|         printd("pid file: "+ pidfile +" open failed")
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
| 
 | |
|     inp = fd.read()
 | |
| 
 | |
|     if not inp:
 | |
|         fd.close()
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
|     printd("file content "+str(inp))
 | |
|     printd(pids)
 | |
|     tocheck_pid  =  inp.strip()
 | |
|     for item in pids:
 | |
|         if str(tocheck_pid) ==  item.strip():
 | |
|             printd("pid file matched")
 | |
|             fd.close()
 | |
|             return StatusCodes.SUCCESS
 | |
| 
 | |
|     fd.close()
 | |
|     return StatusCodes.FAILED
 | |
| 
 | |
| def checkProcessRunningStatus(process_name, pidFile):
 | |
|     printd("checking the process " + process_name)
 | |
|     cmd = ''
 | |
|     pids = []
 | |
|     cmd = 'pidof ' + process_name
 | |
|     printd(cmd)
 | |
| 
 | |
|     #cmd = 'service ' + process_name + ' status'
 | |
|     pout = Popen(cmd, shell=True, stdout=PIPE)
 | |
|     exitStatus = pout.wait()
 | |
|     temp_out = pout.communicate()[0]
 | |
| 
 | |
|     #check there is only one pid or not
 | |
|     if exitStatus == 0:
 | |
|         pids = temp_out.split(' ')
 | |
|         printd("pid(s) of process %s are %s " %(process_name, pids))
 | |
| 
 | |
|         #there is more than one process so match the pid file
 | |
|         #if not matched set pidFileMatched=False
 | |
|         printd("Checking pid file")
 | |
|         if isPidMatchPidFile(pidFile, pids) == StatusCodes.SUCCESS:
 | |
|             return True,pids
 | |
| 
 | |
|     printd("pid of exit status %s" %exitStatus)
 | |
| 
 | |
|     return False,pids
 | |
| 
 | |
| def restartService(service_name):
 | |
| 
 | |
|     cmd = 'service ' + service_name + ' restart'
 | |
|     cout = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
 | |
|     return_val = cout.wait()
 | |
| 
 | |
|     if return_val == 0:
 | |
|         printd("The service " + service_name +" recovered successfully ")
 | |
|         msg="The process " +service_name+" is recovered successfully "
 | |
|         raisealert(Log.INFO,msg,service_name)
 | |
|         return True
 | |
|     else:
 | |
|         printd("process restart failed ....")
 | |
| 
 | |
|     return False
 | |
| 
 | |
| 
 | |
| 
 | |
| def checkProcessStatus( process ):
 | |
|     """
 | |
|     Check the process running status, if not running tries to restart
 | |
|     """
 | |
|     process_name = process.get('processname')
 | |
|     service_name = process.get('servicename')
 | |
|     pidfile = process.get('pidfile')
 | |
|     #temp_out = None
 | |
|     restartFailed=False
 | |
|     pidFileMatched=False
 | |
|     pids=''
 | |
|     cmd=''
 | |
|     if process_name is None:
 | |
|         printd ("\n Invalid Process Name")
 | |
|         return StatusCodes.INVALID_INP
 | |
| 
 | |
|     status, pids = checkProcessRunningStatus(process_name, pidfile)
 | |
| 
 | |
|     if status == True:
 | |
|         printd("The process is running ....")
 | |
|         return  StatusCodes.RUNNING
 | |
|     else:
 | |
|         printd("Process %s is not running trying to recover" %process_name)
 | |
|         #Retry the process state for few seconds
 | |
| 
 | |
|         for i in range(1, Config.RETRY_ITERATIONS):
 | |
|             time.sleep(Config.SLEEP_SEC)
 | |
| 
 | |
|             if i < Config.RETRY_FOR_RESTART: # this is just for trying few more times
 | |
| 
 | |
|                 status, pids = checkProcessRunningStatus(process_name, pidfile)
 | |
|                 if status == True:
 | |
|                     raisealert(Log.ALERT, "The process detected as running", process_name)
 | |
|                     break
 | |
|                 else:
 | |
|                     printd("Process %s is not running checking the status again..." %process_name)
 | |
|                     continue
 | |
|             else:
 | |
|                 msg="The process " +process_name+" is not running trying recover "
 | |
|                 raisealert(Log.INFO,process_name,msg)
 | |
| 
 | |
|                 if service_name == 'apache2':
 | |
|                     # Killing apache2 process with this the main service will not start
 | |
|                     for pid in pids:
 | |
|                         cmd = 'kill -9 '+pid
 | |
|                         printd(cmd)
 | |
|                         Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
 | |
| 
 | |
|                 if restartService(service_name) == True:
 | |
|                     break
 | |
|                 else:
 | |
|                     restartFailed = True
 | |
|                     continue
 | |
|         #for end here
 | |
| 
 | |
|         if restartFailed == True:
 | |
|             msg="The process %s recover failed "%process_name
 | |
|             raisealert(Log.ALERT,process_name,msg)
 | |
| 
 | |
|             printd("Restart failed after number of retries")
 | |
|             return StatusCodes.STOPPED
 | |
| 
 | |
|     return  StatusCodes.RUNNING
 | |
| 
 | |
| 
 | |
| def monitProcess( processes_info ):
 | |
|     """
 | |
|     Monitors the processes which got from the config file
 | |
|     """
 | |
|     if len( processes_info ) == 0:
 | |
|         printd("Invalid Input")
 | |
|         return  StatusCodes.INVALID_INP
 | |
| 
 | |
|     dict_unmonit={}
 | |
|     umonit_update={}
 | |
|     unMonitPs=False
 | |
| 
 | |
|     if not path.isfile(Config.UNMONIT_PS_FILE):
 | |
|         printd('Unmonit File not exist')
 | |
|     else:
 | |
|         #load the dictionary with unmonit process list
 | |
|         dict_unmonit = loadPsFromUnMonitFile()
 | |
| 
 | |
|     #time for noting process down time
 | |
|     csec = repr(time.time()).split('.')[0]
 | |
| 
 | |
|     for process,properties in processes_info.items():
 | |
|         #skip the process it its time stamp less than Config.MONIT_AFTER_MINS
 | |
|         printd ("checking the service %s \n" %process)
 | |
| 
 | |
|         if not is_emtpy(dict_unmonit):
 | |
|             if dict_unmonit.has_key(process):
 | |
|                 ts = dict_unmonit[process]
 | |
| 
 | |
|                 if checkPsTimeStampForMonitor (csec, ts, properties) == False:
 | |
|                     unMonitPs = True
 | |
|                     continue
 | |
| 
 | |
|         if checkProcessStatus( properties) != StatusCodes.RUNNING:
 | |
|             printd( "\n Service %s is not Running"%process)
 | |
|             #add this process into unmonit list
 | |
|             printd ("updating the service for unmonit %s\n" %process)
 | |
|             umonit_update[process]=csec
 | |
| 
 | |
|     #if dict is not empty write to file else delete it
 | |
|     if not is_emtpy(umonit_update):
 | |
|         writePsListToUnmonitFile(umonit_update)
 | |
|     else:
 | |
|         if is_emtpy(umonit_update) and unMonitPs == False:
 | |
|             #delete file it is there
 | |
|             removeFile(Config.UNMONIT_PS_FILE)
 | |
| 
 | |
| 
 | |
| def checkPsTimeStampForMonitor(csec,ts, process):
 | |
|     printd("Time difference=%s" %str(int(csec) - int(ts)))
 | |
|     tmin = (int(csec) - int(ts) )/60
 | |
| 
 | |
|     if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS:
 | |
|         raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS))
 | |
|         printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin)))
 | |
|         return False
 | |
| 
 | |
|     return  True
 | |
| 
 | |
| def removeFile(fileName):
 | |
|     if path.isfile(fileName):
 | |
|         printd("Removing the file %s" %fileName)
 | |
|         os.remove(fileName)
 | |
| 
 | |
| def loadPsFromUnMonitFile():
 | |
| 
 | |
|     dict_unmonit = {}
 | |
| 
 | |
|     try:
 | |
|         fd = open(Config.UNMONIT_PS_FILE)
 | |
|     except:
 | |
|         printd("Failed to open file %s " %(Config.UNMONIT_PS_FILE))
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
|     ps = fd.read()
 | |
| 
 | |
|     if not ps:
 | |
|         printd("File %s content is empty " %Config.UNMONIT_PS_FILE)
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
|     printd(ps)
 | |
|     plist = ps.split(',')
 | |
|     plist.remove('')
 | |
|     for i in plist:
 | |
|         dict_unmonit[i.split(':')[0]] = i.split(':')[1]
 | |
| 
 | |
|     fd.close()
 | |
| 
 | |
|     return dict_unmonit
 | |
| 
 | |
| 
 | |
| def writePsListToUnmonitFile(umonit_update):
 | |
|     printd("Write updated unmonit list to file")
 | |
|     line=''
 | |
|     for i in umonit_update:
 | |
|         line+=str(i)+":"+str(umonit_update[i])+','
 | |
|     printd(line)
 | |
|     try:
 | |
|         fd=open(Config.UNMONIT_PS_FILE,'w')
 | |
|     except:
 | |
|         printd("Failed to open file %s " %Config.UNMONIT_PS_FILE)
 | |
|         return StatusCodes.FAILED
 | |
| 
 | |
|     fd.write(line)
 | |
|     fd.close()
 | |
| 
 | |
| 
 | |
| def is_emtpy(struct):
 | |
|     """
 | |
|     Checks wether the given struct is empty or not
 | |
|     """
 | |
|     if struct:
 | |
|         return False
 | |
|     else:
 | |
|         return True
 | |
| 
 | |
| def main():
 | |
|     '''
 | |
|     Step1 : Get Config
 | |
|     '''
 | |
|     printd("monitoring started")
 | |
|     temp_dict  = getConfig()
 | |
| 
 | |
|     '''
 | |
|     Step2: Monitor and Raise Alert
 | |
|     '''
 | |
|     monitProcess( temp_dict )
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |