From c44db2557db8b39425c5015d98c2915d8506faf6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 1 Nov 2011 15:04:49 -0700 Subject: [PATCH] bug 11772: Add improved version of locking for the scripts in the systemvm Because currently the lock in the script is retried every 1 second, and it's a quite a long time that it's possible for some other active script can be executed and retain the lock again. So it's possible that the first one request the lock is always being preemptted by others, then finally got timeout. To fix this issue, the retry interval is reduced to 0.1 seconds, which would provide more retry times. And each process want to get the lock would create a file named lockname-PID.lock, and only the first one(judged by timestamp) would get the lock. The remaining ones would retry every 0.1 seconds to see if it can get the lock. Also timeout time is extended to 30 seconds. And add testcase for it. status 11772: resolved fixed --- patches/systemvm/debian/config/root/func.sh | 34 +++++++++++++++------ test/scripts/script_lock_test/test.sh | 18 +++++++++++ test/scripts/script_lock_test/test_task.sh | 19 ++++++++++++ 3 files changed, 62 insertions(+), 9 deletions(-) create mode 100755 test/scripts/script_lock_test/test.sh create mode 100755 test/scripts/script_lock_test/test_task.sh diff --git a/patches/systemvm/debian/config/root/func.sh b/patches/systemvm/debian/config/root/func.sh index c2db06b6948..c1a73441708 100644 --- a/patches/systemvm/debian/config/root/func.sh +++ b/patches/systemvm/debian/config/root/func.sh @@ -8,28 +8,44 @@ # $2 timeout seconds getLockFile() { __locked=0 - __LOCKFILE="/tmp/$1.lock" + __LOCKFILE="/tmp/$1-$$.lock" if [ $2 ] then __TIMEOUT=$2 else - __TIMEOUT=10 + __TIMEOUT=30 fi - for i in `seq 1 $__TIMEOUT` + if [ -e $__LOCKFILE ] + then + logger -t cloud "Process $0 pid $$ want to get ECLUSIVE LOCK $1 RECURSIVELY!" + psline=`ps u $$` + logger -t cloud "Failed job detail: $psline" + echo 0 + return + fi + + touch $__LOCKFILE + + for i in `seq 1 $(($__TIMEOUT * 10))` do - if [ ! -e $__LOCKFILE ] + currlock=`ls -tr /tmp/$1-*.lock | head -n1` + if [ $currlock -ef $__LOCKFILE ] then - touch $__LOCKFILE __locked=1 break fi - sleep 1 - logger -t cloud "sleep 1 second wait for the lock file " $__LOCKFILE + sleep 0.1 + if [ $((i % 10)) -eq 0 ] + then + logger -t cloud "Process $0 pid $$ waiting for the lock $1 for another 1 second" + fi done if [ $__locked -ne 1 ] then - logger -t cloud "fail to acquire the lock file $__LOCKFILE after $__TIMEOUT seconds time out!" + logger -t cloud "fail to acquire the lock $1 for process $0 pid $$ after $__TIMEOUT seconds time out!" + psline=`ps u $$` + logger -t cloud "Failed job detail: $psline" fi echo $__locked } @@ -38,7 +54,7 @@ getLockFile() { # $1 lock filename # $2 locked(1) or not(0) releaseLockFile() { - __LOCKFILE="/tmp/$1.lock" + __LOCKFILE="/tmp/$1-$$.lock" __locked=$2 if [ "$__locked" == "1" ] then diff --git a/test/scripts/script_lock_test/test.sh b/test/scripts/script_lock_test/test.sh new file mode 100755 index 00000000000..120792fe3f7 --- /dev/null +++ b/test/scripts/script_lock_test/test.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +rm /tmp/biglock* +echo + +#Test task A would acquire one lock again and again in little interval +./test_task.sh A 0.3 & + +sleep 1 +#At the same time, task B would try to acquire the lock as well. +./test_task.sh B 0.5 & + +#For the original version, task B would essiental fail, because task A do it +# quicker and task B, so task B may not have time to execute. But for new +# version, since it's ordered by time, then nobody should fail. + +read end +pkill test_task diff --git a/test/scripts/script_lock_test/test_task.sh b/test/scripts/script_lock_test/test_task.sh new file mode 100755 index 00000000000..b68ca2764d9 --- /dev/null +++ b/test/scripts/script_lock_test/test_task.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +source ../../../patches/systemvm/debian/config/root/func.sh + +lock="biglock" + +for i in `seq 1 100` +do + locked=$(getLockFile $lock) + if [ "$locked" != "1" ] + then + echo WRONG, Task $1 can''t get the lock + exit 1 + fi + echo `date +%H:%M:%S.%N` TASK $1 get the lock + sleep $2 + releaseLockFile $lock $locked + echo `date +%H:%M:%S.%N` TASK $1 release the lock +done