bug 11772: Add improved version of locking for the scripts in the systemvm

Because currently the lock in the script is retried every 1 second, and it's a
quite a long time that it's possible for some other active script can be
executed and retain the lock again. So it's possible that the first one request
the lock is always being preemptted by others, then finally got timeout.

To fix this issue, the retry interval is reduced to 0.1 seconds, which would
provide more retry times. And each process want to get the lock would create a
file named lockname-PID.lock, and only the first one(judged by timestamp) would
get the lock. The remaining ones would retry every 0.1 seconds to see if it can
get the lock.

Also timeout time is extended to 30 seconds.

And add testcase for it.

status 11772: resolved fixed
This commit is contained in:
Sheng Yang 2011-11-01 15:04:49 -07:00
parent 086e7cf7b8
commit c44db2557d
3 changed files with 62 additions and 9 deletions

View File

@ -8,28 +8,44 @@
# $2 timeout seconds
getLockFile() {
__locked=0
__LOCKFILE="/tmp/$1.lock"
__LOCKFILE="/tmp/$1-$$.lock"
if [ $2 ]
then
__TIMEOUT=$2
else
__TIMEOUT=10
__TIMEOUT=30
fi
for i in `seq 1 $__TIMEOUT`
if [ -e $__LOCKFILE ]
then
logger -t cloud "Process $0 pid $$ want to get ECLUSIVE LOCK $1 RECURSIVELY!"
psline=`ps u $$`
logger -t cloud "Failed job detail: $psline"
echo 0
return
fi
touch $__LOCKFILE
for i in `seq 1 $(($__TIMEOUT * 10))`
do
if [ ! -e $__LOCKFILE ]
currlock=`ls -tr /tmp/$1-*.lock | head -n1`
if [ $currlock -ef $__LOCKFILE ]
then
touch $__LOCKFILE
__locked=1
break
fi
sleep 1
logger -t cloud "sleep 1 second wait for the lock file " $__LOCKFILE
sleep 0.1
if [ $((i % 10)) -eq 0 ]
then
logger -t cloud "Process $0 pid $$ waiting for the lock $1 for another 1 second"
fi
done
if [ $__locked -ne 1 ]
then
logger -t cloud "fail to acquire the lock file $__LOCKFILE after $__TIMEOUT seconds time out!"
logger -t cloud "fail to acquire the lock $1 for process $0 pid $$ after $__TIMEOUT seconds time out!"
psline=`ps u $$`
logger -t cloud "Failed job detail: $psline"
fi
echo $__locked
}
@ -38,7 +54,7 @@ getLockFile() {
# $1 lock filename
# $2 locked(1) or not(0)
releaseLockFile() {
__LOCKFILE="/tmp/$1.lock"
__LOCKFILE="/tmp/$1-$$.lock"
__locked=$2
if [ "$__locked" == "1" ]
then

View File

@ -0,0 +1,18 @@
#!/bin/bash
rm /tmp/biglock*
echo
#Test task A would acquire one lock again and again in little interval
./test_task.sh A 0.3 &
sleep 1
#At the same time, task B would try to acquire the lock as well.
./test_task.sh B 0.5 &
#For the original version, task B would essiental fail, because task A do it
# quicker and task B, so task B may not have time to execute. But for new
# version, since it's ordered by time, then nobody should fail.
read end
pkill test_task

View File

@ -0,0 +1,19 @@
#!/bin/bash
source ../../../patches/systemvm/debian/config/root/func.sh
lock="biglock"
for i in `seq 1 100`
do
locked=$(getLockFile $lock)
if [ "$locked" != "1" ]
then
echo WRONG, Task $1 can''t get the lock
exit 1
fi
echo `date +%H:%M:%S.%N` TASK $1 get the lock
sleep $2
releaseLockFile $lock $locked
echo `date +%H:%M:%S.%N` TASK $1 release the lock
done