#!/bin/bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. . /etc/rc.d/init.d/functions #set -x usage() { printf "\nThe tool stopping/starting running system vms and domain routers \n\nUsage: %s: [-d] [-u] [-p] [-m] [-s] [-r] [-a] [-t] [-n] [-z] [-v]\n\n -d - cloud DB server ip address, defaulted to localhost if not specified \n -u - user name to access cloud DB, defaulted to "root" if not specified \n -p - cloud DB user password, defaulted to no password if not specified \n\n -m - the ip address of management server, defaulted to localhost if not specified\n\n -s - stop then start all running SSVMs and Console Proxies \n -r - stop then start all running Virtual Routers\n -a - stop then start all running SSVMs, Console Proxies, and Virtual Routers \n -n - restart all Guest networks \n -t - number of parallel threads used for stopping Domain Routers. Default is 10.\n -l - log file location. Default is cloud.log under current directory.\n -z - do restart only for the instances in the specific zone. If not specified, restart will apply to instances in all zones\n -v - do restart all VPCs in the entire system\n\n" $(basename $0) >&2 } system= router= all= vpc= db=localhost ms=localhost user=root password= help= maxthreads=10 LOGFILE=cloud.log zone="" inzone="" while getopts 'sarhnvd:m:u:p:t:l:z:' OPTION do case $OPTION in s) system=1 ;; r) router=1 ;; n) redundant=1 ;; a) all=1 ;; v) vpc=1 ;; d) db="$OPTARG" ;; u) user="$OPTARG" ;; p) password="$OPTARG" ;; h) help=1 ;; m) ms="$OPTARG" ;; t) maxthreads="$OPTARG" ;; l) LOGFILE="$OPTARG" ;; z) zone=" AND data_center_id=""$OPTARG" inzone=" in zone id=""$OPTARG" esac done stop_start_system() { secondary=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select id from vm_instance where state=\"Running\" and type=\"SecondaryStorageVm\"$zone"`) console=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select id from vm_instance where state=\"Running\" and type=\"ConsoleProxy\"$zone"`) length_secondary=(${#secondary[@]}) length_console=(${#console[@]}) echo -e "\nStopping and starting $length_secondary secondary storage vm(s)$inzone..." echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Stopping and starting $length_secondary secondary storage vm(s)$inzone..." >>$LOGFILE for d in "${secondary[@]}"; do echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Stopping secondary storage vm with id $d" >>$LOGFILE jobresult=$(send_request stopSystemVm $d) if [ "$jobresult" != "1" ]; then echo -e "ERROR: Failed to stop secondary storage vm with id $d \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to stop secondary storage vm with id $d" >>$LOGFILE else echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Starting secondary storage vm with id $d" >>$LOGFILE jobresult=$(send_request startSystemVm $d SSVM) if [ "$jobresult" != "1" ]; then echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to start secondary storage vm with id $d" >>$LOGFILE echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to start secondary storage vm with id $d" >>$LOGFILE fi fi done if [ "$length_secondary" == "0" ];then echo -e "No running secondary storage vms found \n" else echo -e "Done stopping and starting secondary storage vm(s)$inzone" echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done stopping and starting secondary storage vm(s)$inzone." >>$LOGFILE fi echo -e "\nStopping and starting $length_console console proxy vm(s)$inzone..." echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Stopping and starting $length_console console proxy vm(s)$inzone..." >>$LOGFILE for d in "${console[@]}"; do echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Stopping console proxy with id $d" >>$LOGFILE jobresult=$(send_request stopSystemVm $d) if [ "$jobresult" != "1" ]; then echo -e "ERROR: Failed to stop console proxy vm with id $d \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to stop console proxy vm with id $d" >>$LOGFILE else echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Starting console proxy vm with id $d" >>$LOGFILE jobresult=$(send_request startSystemVm $d consoleProxy) if [ "$jobresult" != "1" ]; then echo -e "ERROR: Failed to start console proxy vm with id $d \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to start console proxy vm with id $d" >>$LOGFILE fi fi done if [ "$length_console" == "0" ];then echo -e "No running console proxy vms found \n" else echo "Done stopping and starting console proxy vm(s) $inzone." echo "[$(date "+%Y.%m.%d-%H.%M.%S")] Done stopping and starting console proxy vm(s) $inzone." >>$LOGFILE fi } stop_start_router() { router=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select id from vm_instance where state=\"Running\" and type=\"DomainRouter\"$zone"`) length_router=(${#router[@]}) echo -e "\nStopping and starting $length_router running routing vm(s)$inzone... " echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Stopping and starting $length_router running routing vm(s)$inzone... " >>$LOGFILE #Spawn reboot router in parallel - run commands in chunks - number of threads is configurable pids=() for d in "${router[@]}"; do reboot_router $d & pids=( "${pids[@]}" $! ) length_pids=(${#pids[@]}) unfinishedPids=(${#pids[@]}) if [ $maxthreads -gt $length_router ]; then maxthreads=$length_router fi if [ $length_pids -ge $maxthreads ]; then while [ $unfinishedPids -gt 0 ]; do sleep 10 count=0 for (( i = 0 ; i < $length_pids; i++ )); do if ! ps ax | grep -v grep | grep ${pids[$i]} > /dev/null; then count=`expr $count + 1` fi done if [ $count -eq $unfinishedPids ]; then unfinishedPids=0 fi done #remove all elements from pids if [ $unfinishedPids -eq 0 ]; then pids=() length_pids=(${#pids[@]}) fi fi done if [ "$length_router" == "0" ];then echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No running router vms found \n" >>$LOGFILE else while [ $unfinishedPids -gt 0 ]; do sleep 10 done echo -e "Done restarting router(s)$inzone. \n" echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting router(s)$inzone. \n" >>$LOGFILE fi } stop_start_all() { stop_start_system stop_start_router } send_request(){ jobid=`curl -sS "http://$ms:8096/?command=$1&id=$2&response=json" | sed 's/\"//g' | sed 's/ //g' | sed 's/{//g' | sed 's/}//g' | awk -F: {'print $3'}` if [ "$jobid" == "" ]; then echo 2 return fi jobresult=$(query_async_job_result $jobid) if [ "$jobresult" != "1" ]; then echo -e "ERROR: Failed to $1 id=$2; jobId is $jobid \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to $1 id=$2; jobId is $jobid" >>$LOGFILE fi echo $jobresult } reboot_router(){ echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Restarting router with id $1" >>$LOGFILE jobid=`curl -sS "http://$ms:8096/?command=rebootRouter&id=$1&response=json" | sed 's/\"//g' | sed 's/ //g' | sed 's/{//g' | sed 's/}//g' | awk -F: {'print $3'}` if [ "$jobid" == "" ]; then echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to restart domainRouter with id $1; unable to submit the job" >>$LOGFILE echo 2 return fi jobresult=$(query_async_job_result $jobid) if [ "$jobresult" != "1" ]; then echo -e "ERROR: Failed to restart domainRouter with id $1 \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to restart domainRouter with id $1; jobId $jobid" >>$LOGFILE exit 0 else echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Successfully restarted domainRouter with id $1; jobId $jobid" >>$LOGFILE exit 0 fi } restart_networks(){ networks=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select n.id from networks n, network_offerings no where n.network_offering_id = no.id and no.system_only = 0 and n.removed is null$zone"`) length_networks=(${#networks[@]}) echo -e "\nRestarting $length_networks networks$inzone... " echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Restarting $length_networks networks$inzone... " >>$LOGFILE #Spawn restart network in parallel - run commands in chunks - number of threads is configurable pids=() for d in "${networks[@]}"; do restart_network $d & pids=( "${pids[@]}" $! ) length_pids=(${#pids[@]}) unfinishedPids=(${#pids[@]}) if [ $maxthreads -gt $length_networks ]; then maxthreads=$length_networks fi if [ $length_pids -ge $maxthreads ]; then while [ $unfinishedPids -gt 0 ]; do sleep 10 count=0 for (( i = 0 ; i < $length_pids; i++ )); do if ! ps ax | grep -v grep | grep ${pids[$i]} > /dev/null; then count=`expr $count + 1` fi done if [ $count -eq $unfinishedPids ]; then unfinishedPids=0 fi done #remove all elements from pids if [ $unfinishedPids -eq 0 ]; then pids=() length_pids=(${#pids[@]}) fi fi done if [ "$length_networks" == "0" ];then echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No networks found \n" >>$LOGFILE else while [ $unfinishedPids -gt 0 ]; do sleep 10 done echo -e "Done restarting networks$inzone. \n" echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting networks$inzone. \n" >>$LOGFILE fi } restart_network(){ jobid=`curl -sS "http://$ms:8096/?command=restartNetwork&id=$1&response=json" | sed 's/\"//g' | sed 's/ //g' | sed 's/{//g' | sed 's/}//g' | awk -F: {'print $3'}` if [ "$jobid" == "" ]; then echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to restart network with id $1; unable to submit the job" >>$LOGFILE echo 2 return fi jobresult=$(query_async_job_result $jobid) if [ "$jobresult" != "1" ]; then echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to restart network with id $1; jobId $jobid" >>$LOGFILE else echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Successfully restarted network with id $1; jobId $jobid" >>$LOGFILE fi } restart_vpc(){ echo -e "INFO: Restarting vpc with id $1" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Restarting vpc with id $1" >>$LOGFILE jobid=`curl -sS "http://$ms:8096/?command=restartVPC&id=$1&response=json" | sed 's/\"//g' | sed 's/ //g' | sed 's/{//g' | sed 's/}//g' | awk -F: {'print $3'}` if [ "$jobid" == "" ]; then echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to restart vpc with id $1; unable to submit the job" >>$LOGFILE echo 2 return fi jobresult=$(query_async_job_result $jobid) if [ "$jobresult" != "1" ]; then echo -e "ERROR: Failed to restart vpc with id $1 \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] ERROR: Failed to restart vpc with id $1; jobId $jobid" >>$LOGFILE else echo -e "INFO: Successfully restarted vpc with id $1 \n" echo "[$(date "+%Y.%m.%d-%H.%M.%S")] INFO: Successfully restarted vpc with id $1; jobId $jobid" >>$LOGFILE fi } restart_vpcs(){ vpcs=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select id from vpc WHERE removed is null$zone"`) length_vpcs=(${#vpcs[@]}) echo -e "\nRestarting $length_vpcs vpcs... " echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Restarting $length_vpcs vpcs... " >>$LOGFILE #Spawn restart vpcs in parallel - run commands in chunks - number of threads is configurable pids=() for d in "${vpcs[@]}"; do restart_vpc $d & pids=( "${pids[@]}" $! ) length_pids=(${#pids[@]}) unfinishedPids=(${#pids[@]}) if [ $maxthreads -gt $length_vpcs ]; then maxthreads=$length_vpcs fi if [ $length_pids -ge $maxthreads ]; then while [ $unfinishedPids -gt 0 ]; do sleep 10 count=0 for (( i = 0 ; i < $length_pids; i++ )); do if ! ps ax | grep -v grep | grep ${pids[$i]} > /dev/null; then count=`expr $count + 1` fi done if [ $count -eq $unfinishedPids ]; then unfinishedPids=0 fi done #remove all elements from pids if [ $unfinishedPids -eq 0 ]; then pids=() length_pids=(${#pids[@]}) fi fi done if [ "$length_vpcs" == "0" ];then echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No vpcs found \n" >>$LOGFILE else while [ $unfinishedPids -gt 0 ]; do sleep 10 done echo -e "Done restarting vpcs$inzone. \n" echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting vpcs$inzone. \n" >>$LOGFILE fi } query_async_job_result() { while [ 1 ] do jobstatus=`curl -sS "http://$ms:8096/?command=queryAsyncJobResult&jobId=$1&response=json" | sed 's/\"//g' | sed 's/ //g' | sed 's/{//g' | sed 's/}//g' | awk -F, {'print $4'} | awk -F: {'print $2'}` if [ "$jobstatus" != "0" ]; then echo $jobstatus break fi sleep 5 done } if [ "$system$router$all$help$redundant$vpc" == "" ] then usage exit fi if [ "$help" == "1" ] then usage exit fi if [ "$all" == "1" ] then stop_start_all exit fi if [ "$system" == "1" ] then stop_start_system fi if [ "$router" == "1" ] then stop_start_router fi if [ "$redundant" == "1" ] then restart_networks fi if [ "$vpc" == "1" ] then restart_vpcs fi