Merge /u/hjasak/foam-extend-3.1/ branch HrvojeJasak into nextRelease
[foam-extend-3.2.git] / bin / foamCheckJobs
blob8eec0c8e8150ea1b27bc91f166eec3717fdcb182
1 #!/bin/sh
2 #------------------------------------------------------------------------------
3 # ========= |
4 # \\ / F ield | foam-extend: Open Source CFD
5 # \\ / O peration |
6 # \\ / A nd | For copyright notice see file Copyright
7 # \\/ M anipulation |
8 #------------------------------------------------------------------------------
9 # License
10 # This file is part of foam-extend.
12 # foam-extend is free software: you can redistribute it and/or modify it
13 # under the terms of the GNU General Public License as published by the
14 # Free Software Foundation, either version 3 of the License, or (at your
15 # option) any later version.
17 # foam-extend is distributed in the hope that it will be useful, but
18 # WITHOUT ANY WARRANTY; without even the implied warranty of
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 # General Public License for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with foam-extend. If not, see <http://www.gnu.org/licenses/>.
25 # Script
26 # foamCheckJobs
28 # Description
29 # Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile.
30 # stateFile contains per pid information on state of process. Format:
31 # pid state command
33 # where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND')
34 # (first three are from foamProcessInfo, others from jobInfo files)
35 # (PEND is special state from when user has submitted but no jobInfo
36 # file yet. Not supported by this script yet)
38 #------------------------------------------------------------------------------
40 PROGNAME=${0##*/}
42 #------------------------------------------------------------------------------
43 #- User settings
45 #- Number of days for files to be considered old
46 NDAYSLIMIT=7
47 #------------------------------------------------------------------------------
49 #- work file
50 TMPFILE=/tmp/${PROGNAME}$$.tmp
51 #- work dir. Needs to be accessible for all machines
52 MACHDIR=$HOME/.OpenFOAM/${PROGNAME}
53 DEFSTATEFILE=$HOME/.OpenFOAM/foamCheckJobs.out
56 if [ `uname -s` = Linux ]
57 then
58 ECHO='echo -e'
59 else
60 ECHO='echo'
64 #------------------------------------------------------------------------------
66 # Functions
68 #------------------------------------------------------------------------------
70 # getRawEntry dictionary entry
71 # Prints value of dictionary entry
72 getRawEntry() {
73 grep -v '^//' $1 | grep "^[ \t]*$2 " | sed -e "s/^[ \t]*$2 [ ]*//"
76 # getEntry dictionary entry
77 # Like getRawEntry but strips " and ending ';'
78 getEntry() {
79 getRawEntry $1 $2 | sed -e 's/^"//' -e 's/;$//' -e 's/"$//'
82 # notEmpty directory
83 # Returns 0 if directory contains files/directories
84 notEmpty() {
85 if [ "`ls $1`" ]; then
86 return 0
87 else
88 return 1
92 # dayDiff <date string 1> <date string 2>
93 # Prints number of days between the two
94 # Eg. dayDiff "Jan 10 2002" "Dec 28 1999"
95 # ==> 13
96 dayDiff() {
97 date -d "$1" > /dev/null 2>&1
98 if [ $? -ne 0 ]; then
99 #- option '-d' on date not supported. Give up.
100 echo "0"
101 else
102 year1=`echo "$1" | awk '{print $3}'`
103 year2=`echo "$2" | awk '{print $3}'`
104 day1=`date -d "$1" "+%j"`
105 day2=`date -d "$2" "+%j"`
107 nYears=`expr $year1 - $year2`
108 tmp1=`expr $nYears \* 365`
109 tmp2=`expr $day1 - $day2`
110 expr $tmp1 + $tmp2
113 #dayDiff "`date '+%b %d %Y'`" "Dec 28 2001"
116 # getAllJobs jobInfoDirectory
117 # Prints list of all jobs in directory (e.g. runningJobs/)
118 # Also handles 'slaves' entries in jobInfo:
119 # slaves 1 ( penfold.23766 );
120 getAllJobs() {
121 if notEmpty $1; then
122 jobs=$1/*
123 for f in $jobs
125 line=`grep '^[ ]*slaves' $f 2>/dev/null`
126 if [ $? -eq 0 ]; then
127 slaveJobs=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'`
128 jobs="$jobs $slaveJobs"
130 done
131 else
132 jobs=''
134 echo "$jobs"
137 # releaseLock jobId lockFile
138 # Releases lock on jobId
139 releaseLock () {
140 if [ -f $2 ]; then
141 #- move lock to finishedJobs
142 mv $2 $FOAM_JOB_DIR/finishedJobs/
144 $ECHO "Lock on job $1 released."
148 printUsage() {
149 cat << LABEL
150 Usage: $PROGNAME [stateFile]
152 This program checks all the locks in the license directory to see if
153 their processes are still running. Processes will not release their
154 lock if they exit abnormally. This program will try to obtain process
155 information on the machine the process ran on and release the lock
156 if the program is no longer running.
158 Requirements: the environment variable FOAM_JOB_DIR needs to point to the
159 license directory and all machines have to be reachable using ssh.
161 The output from checking all running jobs is collected in an optional
162 file.
164 FILES:
165 \$FOAM_JOB_DIR/runningJobs locks for running processes
166 /finishedJobs ,, finished processes
167 LABEL
171 #------------------------------------------------------------------------------
173 # Main
175 #------------------------------------------------------------------------------
177 #- Check a few things
179 if [ ! "$FOAM_JOB_DIR" ]; then
180 $ECHO "$PROGNAME : FOAM_JOB_DIR environment variable not set."
181 $ECHO "This should point to your central license directory."
182 exit 1
185 if [ ! -d "$FOAM_JOB_DIR" ]; then
186 $ECHO "$PROGNAME : The license directory accoring to FOAM_JOB_DIR is not valid."
187 $ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR"
188 exit 1
190 if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ]; then
191 $ECHO "$PROGNAME : The license directory according to FOAM_JOB_DIR is not valid."
192 $ECHO "FOAM_JOB_DIR: $FOAM_JOB_DIR"
193 exit 1
197 if [ $# -eq 1 ]; then
198 STATEFILE=$1
199 elif [ $# -eq 0 ]; then
200 STATEFILE=${STATEFILE:-$DEFSTATEFILE}
201 else
202 printUsage
203 exit 1
206 #- obtain rsh method
207 RSH='ssh'
208 echo "Using remote shell type : $RSH"
210 echo ""
211 echo "Collecting information on jobs in"
212 echo " $FOAM_JOB_DIR"
213 echo ""
216 #- Collect machine names into $TMPFILE
217 # Also handles 'slaves' entry in jobInfo:
219 rm -f $TMPFILE; touch $TMPFILE
220 RUNJOBS=`getAllJobs $FOAM_JOB_DIR/runningJobs`
221 for f in $RUNJOBS
223 machinePid=`basename $f`
224 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
225 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
227 fgrep "$machine" $TMPFILE >/dev/null 2>&1
228 if [ $? -ne 0 ]; then
229 $ECHO "$machine" >> $TMPFILE
231 done
232 $ECHO "Found machines:"
233 cat $TMPFILE
234 $ECHO ""
238 #- Collect process info on all machines, one file per machine
240 mkdir -p $MACHDIR
241 cnt=1
242 while true
244 machine=`sed -n -e "${cnt}p" $TMPFILE`
245 if [ ! "$machine" ]; then
246 break
249 machFile=$MACHDIR/$machine
250 rm -f $machFile
251 $ECHO "Contacting $machine to collect process information:"
252 if [ $machine = `hostname` ]; then
253 $ECHO " foamProcessInfo $machFile"
254 foamProcessInfo $machFile >/dev/null 2>&1
255 else
256 $ECHO " $RSH $machine foamProcessInfo $machFile"
257 $RSH $machine foamProcessInfo $machFile >/dev/null 2>&1
259 if [ $? -ne 0 -o ! -s $machFile ]; then
260 $ECHO "** Failed collecting process information on $machine."
261 $ECHO "Check $machFile and run foamProcessInfo by hand"
262 rm -f $machFile
263 else
264 $ECHO "Succesfully collected information in $machFile ..."
267 cnt=`expr $cnt + 1`
268 done
269 $ECHO ""
272 #- Construct state for runningJobs; move non runnning jobs to finishedJobs
274 releaseAll=''
275 rm -f $STATEFILE
276 for f in $RUNJOBS
278 machinePid=`basename $f`
279 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
280 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
282 machFile=$MACHDIR/$machine
283 if [ -r $machFile ]; then
284 entry=`grep "^$pid " $machFile 2>/dev/null`
285 if [ $? -ne 0 -o ! "$entry" ]; then
286 if [ "$releaseAll" ]; then
287 releaseLock $machinePid $f
288 else
289 $ECHO "Job $machinePid seems to be no longer running. Release lock? (y/a)\c"
290 read answ
291 if [ "${answ:-y}" = 'y' ]; then
292 releaseLock $machinePid $f
293 elif [ "${answ:-y}" = 'a' ]; then
294 releaseAll='yes'
295 releaseLock $machinePid $f
296 else
297 state='OTHR'
298 $ECHO "$machinePid $state" >> $STATEFILE
301 else
302 state=`echo "$entry" | awk '{print $2}'`
303 $ECHO "$machinePid $state" >> $STATEFILE
306 done
310 #- Collect old jobs in finishedJobs
312 OLDFILES=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print`
314 #- Construct state for finishedJobs and check on date of files.
316 if notEmpty $FOAM_JOB_DIR/finishedJobs; then
317 dateNow=`date '+%b %d %Y'`
318 for f in $FOAM_JOB_DIR/finishedJobs/*
320 sz=`ls -s $f | awk '{print $1}'`
321 if [ "$sz" -gt 0 ]; then
322 machinePid=`basename $f`
323 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
324 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
326 end=`getEntry $f endDate`
327 if [ ! "$end" ]; then
328 state='ABRT'
329 else
330 nDaysOld=`dayDiff "$dateNow" "$end"`
331 if [ "$nDaysOld" -gt $NDAYSLIMIT ]; then
332 OLDFILES="$OLDFILES $f"
335 state='FINI'
338 $ECHO "$machinePid $state" >> $STATEFILE
340 done
344 #- Remove old locks
346 nOldFiles=`echo "$OLDFILES" | wc -w`
347 if [ "$nOldFiles" -gt 0 ]; then
348 $ECHO "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/"
349 $ECHO "Do you want to remove these? (y)\c"
350 read answ
351 if [ "${answ:-y}" = 'y' ]; then
352 rm -f $OLDFILES
357 rm -f $TMPFILE
358 rm -r $MACHDIR
360 $ECHO ""
361 $ECHO "Updated stateFile:"
362 $ECHO " $STATEFILE"
363 $ECHO ""
365 #------------------------------------------------------------------------------