CLOUDSTACK-10296: Find time different from last timestamp (#2458)

This fixes a difference issue in rVR heartbeat check script raised
recently on dev@.
Reduce logging to avoid logging to fill ramdisk
Make checkrouter return fault state when keepalived is not running

Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
Rohit Yadav 2018-03-15 16:32:18 +05:30 committed by GitHub
parent 74db647dbb
commit ab0bce2a1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 49 additions and 38 deletions

View File

@ -17,6 +17,13 @@
# under the License.
STATUS=UNKNOWN
if [ "$(systemctl is-active keepalived)" != "active" ]
then
echo "Status: FAULT"
exit
fi
ROUTER_TYPE=$(cat /etc/cloudstack/cmdline.json | grep type | awk '{print $2;}' | sed -e 's/[,\"]//g')
if [ "$ROUTER_TYPE" = "router" ]
then

View File

@ -16,48 +16,52 @@
# specific language governing permissions and limitations
# under the License.
ROUTER_BIN_PATH=/ramdisk/rrouter
ROUTER_LOG=${ROUTER_BIN_PATH}/keepalived.log
ROUTER_BIN_PATH="/ramdisk/rrouter"
ROUTER_LOG="${ROUTER_BIN_PATH}/keepalived.log"
STRIKE_FILE="$ROUTER_BIN_PATH/keepalived.strikes"
TS_FILE="$ROUTER_BIN_PATH/keepalived.ts"
CT_FILE="$ROUTER_BIN_PATH/keepalived.ct"
if [ -e $ROUTER_BIN_PATH/keepalived.ts2 ]
checktime=$(date +%s)
hbtime=$(cat $TS_FILE)
diff=$(($checktime - $hbtime))
lastcheck=0
if [ -e $CT_FILE ]
then
thistime=$(cat $ROUTER_BIN_PATH/keepalived.ts)
lasttime=$(cat $ROUTER_BIN_PATH/keepalived.ts2)
diff=$(($lasttime - $thistime))
s=0
if [ $diff -ge 10 ]
then
if [ -e $STRIKE_FILE ]
then
s=`cat $STRIKE_FILE 2>/dev/null`
fi
s=$(($s+1))
echo $s > $STRIKE_FILE
else
if [ -e $STRIKE_FILE ]
then
rm $STRIKE_FILE
echo keepalived.strikes file was removed! >> $ROUTER_LOG
else
echo keepalived.strikes file does not exist! >> $ROUTER_LOG
fi
fi
#3 strikes rule
if [ $s -gt 2 ]
then
echo Keepalived process is dead! >> $ROUTER_LOG
systemctl stop keepalived >> $ROUTER_LOG 2>&1
systemctl stop conntrackd >> $ROUTER_LOG 2>&1
lastcheck=$(cat $CT_FILE 2>/dev/null)
fi
checkdiff=$(($checktime - $lastcheck))
if [ $checkdiff -ge 0 ] && [ $checkdiff -lt 30 ]
then
exit
fi
echo $checktime > $CT_FILE
#Set fault so we have the same effect as a KeepaliveD fault.
python /opt/cloud/bin/master.py --fault
pkill -9 keepalived >> $ROUTER_LOG 2>&1
pkill -9 conntrackd >> $ROUTER_LOG 2>&1
echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
exit
s=0
if [ $diff -gt 10 ]
then
if [ -e $STRIKE_FILE ]
then
s=$(cat $STRIKE_FILE 2>/dev/null)
fi
s=$(($s+1))
echo $s > $STRIKE_FILE
echo "Check time: $checktime, last heartbeat time: $hbtime, time diff: $diff, strike count: $s" >> $ROUTER_LOG
else
rm -f $STRIKE_FILE
fi
cp $ROUTER_BIN_PATH/keepalived.ts $ROUTER_BIN_PATH/keepalived.ts2
if [ $s -gt 3 ]
then
systemctl stop --now keepalived >> $ROUTER_LOG 2>&1
systemctl stop --now conntrackd >> $ROUTER_LOG 2>&1
#Set fault so we have the same effect as a KeepaliveD fault.
python /opt/cloud/bin/master.py --fault
pkill -9 keepalived >> $ROUTER_LOG 2>&1 || true
pkill -9 conntrackd >> $ROUTER_LOG 2>&1 || true
echo Status: FAULT \(keepalived process is dead\) >> $ROUTER_LOG
exit
fi