mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
bug 11772: Add improved version of locking for the scripts in the systemvm
Because currently the lock in the script is retried every 1 second, and it's a quite a long time that it's possible for some other active script can be executed and retain the lock again. So it's possible that the first one request the lock is always being preemptted by others, then finally got timeout. To fix this issue, the retry interval is reduced to 0.1 seconds, which would provide more retry times. And each process want to get the lock would create a file named lockname-PID.lock, and only the first one(judged by timestamp) would get the lock. The remaining ones would retry every 0.1 seconds to see if it can get the lock. Also timeout time is extended to 30 seconds. And add testcase for it. status 11772: resolved fixed
This commit is contained in:
parent
086e7cf7b8
commit
c44db2557d
@ -8,28 +8,44 @@
|
|||||||
# $2 timeout seconds
|
# $2 timeout seconds
|
||||||
getLockFile() {
|
getLockFile() {
|
||||||
__locked=0
|
__locked=0
|
||||||
__LOCKFILE="/tmp/$1.lock"
|
__LOCKFILE="/tmp/$1-$$.lock"
|
||||||
if [ $2 ]
|
if [ $2 ]
|
||||||
then
|
then
|
||||||
__TIMEOUT=$2
|
__TIMEOUT=$2
|
||||||
else
|
else
|
||||||
__TIMEOUT=10
|
__TIMEOUT=30
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for i in `seq 1 $__TIMEOUT`
|
if [ -e $__LOCKFILE ]
|
||||||
|
then
|
||||||
|
logger -t cloud "Process $0 pid $$ want to get ECLUSIVE LOCK $1 RECURSIVELY!"
|
||||||
|
psline=`ps u $$`
|
||||||
|
logger -t cloud "Failed job detail: $psline"
|
||||||
|
echo 0
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
|
touch $__LOCKFILE
|
||||||
|
|
||||||
|
for i in `seq 1 $(($__TIMEOUT * 10))`
|
||||||
do
|
do
|
||||||
if [ ! -e $__LOCKFILE ]
|
currlock=`ls -tr /tmp/$1-*.lock | head -n1`
|
||||||
|
if [ $currlock -ef $__LOCKFILE ]
|
||||||
then
|
then
|
||||||
touch $__LOCKFILE
|
|
||||||
__locked=1
|
__locked=1
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
sleep 1
|
sleep 0.1
|
||||||
logger -t cloud "sleep 1 second wait for the lock file " $__LOCKFILE
|
if [ $((i % 10)) -eq 0 ]
|
||||||
|
then
|
||||||
|
logger -t cloud "Process $0 pid $$ waiting for the lock $1 for another 1 second"
|
||||||
|
fi
|
||||||
done
|
done
|
||||||
if [ $__locked -ne 1 ]
|
if [ $__locked -ne 1 ]
|
||||||
then
|
then
|
||||||
logger -t cloud "fail to acquire the lock file $__LOCKFILE after $__TIMEOUT seconds time out!"
|
logger -t cloud "fail to acquire the lock $1 for process $0 pid $$ after $__TIMEOUT seconds time out!"
|
||||||
|
psline=`ps u $$`
|
||||||
|
logger -t cloud "Failed job detail: $psline"
|
||||||
fi
|
fi
|
||||||
echo $__locked
|
echo $__locked
|
||||||
}
|
}
|
||||||
@ -38,7 +54,7 @@ getLockFile() {
|
|||||||
# $1 lock filename
|
# $1 lock filename
|
||||||
# $2 locked(1) or not(0)
|
# $2 locked(1) or not(0)
|
||||||
releaseLockFile() {
|
releaseLockFile() {
|
||||||
__LOCKFILE="/tmp/$1.lock"
|
__LOCKFILE="/tmp/$1-$$.lock"
|
||||||
__locked=$2
|
__locked=$2
|
||||||
if [ "$__locked" == "1" ]
|
if [ "$__locked" == "1" ]
|
||||||
then
|
then
|
||||||
|
|||||||
18
test/scripts/script_lock_test/test.sh
Executable file
18
test/scripts/script_lock_test/test.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
rm /tmp/biglock*
|
||||||
|
echo
|
||||||
|
|
||||||
|
#Test task A would acquire one lock again and again in little interval
|
||||||
|
./test_task.sh A 0.3 &
|
||||||
|
|
||||||
|
sleep 1
|
||||||
|
#At the same time, task B would try to acquire the lock as well.
|
||||||
|
./test_task.sh B 0.5 &
|
||||||
|
|
||||||
|
#For the original version, task B would essiental fail, because task A do it
|
||||||
|
# quicker and task B, so task B may not have time to execute. But for new
|
||||||
|
# version, since it's ordered by time, then nobody should fail.
|
||||||
|
|
||||||
|
read end
|
||||||
|
pkill test_task
|
||||||
19
test/scripts/script_lock_test/test_task.sh
Executable file
19
test/scripts/script_lock_test/test_task.sh
Executable file
@ -0,0 +1,19 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
source ../../../patches/systemvm/debian/config/root/func.sh
|
||||||
|
|
||||||
|
lock="biglock"
|
||||||
|
|
||||||
|
for i in `seq 1 100`
|
||||||
|
do
|
||||||
|
locked=$(getLockFile $lock)
|
||||||
|
if [ "$locked" != "1" ]
|
||||||
|
then
|
||||||
|
echo WRONG, Task $1 can''t get the lock
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo `date +%H:%M:%S.%N` TASK $1 get the lock
|
||||||
|
sleep $2
|
||||||
|
releaseLockFile $lock $locked
|
||||||
|
echo `date +%H:%M:%S.%N` TASK $1 release the lock
|
||||||
|
done
|
||||||
Loading…
x
Reference in New Issue
Block a user