2 #------------------------------------------------------------------------------
4 # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
6 # \\ / A nd | Copyright (C) 2011 OpenFOAM Foundation
8 #-------------------------------------------------------------------------------
10 # This file is part of OpenFOAM.
12 # OpenFOAM is free software: you can redistribute it and/or modify it
13 # under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
18 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 # You should have received a copy of the GNU General Public License
23 # along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
29 # Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile.
30 # stateFile contains per pid information on state of process. Format:
33 # where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND')
34 # (first three are from foamProcessInfo, others from jobInfo files)
35 # (PEND is special state from when user has submitted but no jobInfo
36 # file yet. Not supported by this script yet)
38 #------------------------------------------------------------------------------
42 #-------------------------------------------------------------------------------
45 #- Number of days for files to be considered old
47 #-------------------------------------------------------------------------------
50 TMPFILE
=/tmp
/${Script}$$.tmp
51 #- work dir. Needs to be accessible for all machines
52 MACHDIR
=$HOME/.OpenFOAM
/${Script}
53 DEFSTATEFILE
=$HOME/.OpenFOAM
/foamCheckJobs.out
56 if [ `uname -s` = Linux
]
66 Usage: $Script [stateFile]
68 This program checks all the locks in the FOAM_JOB_DIR directory to see if
69 their processes are still running. Processes will not release their
70 lock if they exit abnormally. This program will try to obtain process
71 information on the machine the process ran on and release the lock
72 if the program is no longer running.
74 Note: all machines have to be reachable using ssh.
76 The output from checking all running jobs is collected in an optional
80 \$FOAM_JOB_DIR/runningJobs locks for running processes
81 /finishedJobs locks for finished processes
87 #-------------------------------------------------------------------------------
91 #-------------------------------------------------------------------------------
93 # getRawEntry dictionary entry
94 # Prints value of dictionary entry
96 grep -v '^//' $1 |
grep "^[ \t]*$2 " |
sed -e "s/^[ \t]*$2 [ ]*//"
99 # getEntry dictionary entry
100 # Like getRawEntry but strips " and ending ';'
102 getRawEntry
$1 $2 |
sed -e 's/^"//' -e 's/;$//' -e 's/"$//'
106 # Returns 0 if directory contains files/directories
116 # dayDiff <date string 1> <date string 2>
117 # Prints number of days between the two
118 # Eg. dayDiff "Jan 10 2002" "Dec 28 1999"
121 date -d "$1" > /dev
/null
2>&1
124 #- option '-d' on date not supported. Give up.
127 year1
=`echo "$1" | awk '{print $3}'`
128 year2
=`echo "$2" | awk '{print $3}'`
129 day1
=`date -d "$1" "+%j"`
130 day2
=`date -d "$2" "+%j"`
132 nYears
=`expr $year1 - $year2`
133 tmp1
=`expr $nYears \* 365`
134 tmp2
=`expr $day1 - $day2`
138 #dayDiff "`date '+%b %d %Y'`" "Dec 28 2001"
141 # getAllJobs jobInfoDirectory
142 # Prints list of all jobs in directory (e.g. runningJobs/)
143 # Also handles 'slaves' entries in jobInfo:
144 # slaves 1 ( penfold.23766 );
151 line
=`grep '^[ ]*slaves' $f 2>/dev/null`
154 slaveJobs
=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'`
155 jobs="$jobs $slaveJobs"
164 # releaseLock jobId lockFile
165 # Releases lock on jobId
169 #- move lock to finishedJobs
170 mv $2 $FOAM_JOB_DIR/finishedJobs
/
172 echo "Lock on job $1 released."
176 #-------------------------------------------------------------------------------
180 #-------------------------------------------------------------------------------
184 if [ "$1" = "-h" -o "$1" = "-help" ]
191 STATEFILE
=${STATEFILE:-$DEFSTATEFILE}
197 #- Check a few things
199 if [ ! "$FOAM_JOB_DIR" ]
201 echo "$Script : FOAM_JOB_DIR environment variable not set."
206 if [ ! -d "$FOAM_JOB_DIR" ]
208 echo "$Script : directory does not exist."
209 echo " FOAM_JOB_DIR=$FOAM_JOB_DIR"
213 if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ]
215 echo "$Script : invalid directory."
216 echo " FOAM_JOB_DIR=$FOAM_JOB_DIR"
225 echo "Using remote shell type : $RSH"
227 echo "Collecting information on jobs in"
228 echo " $FOAM_JOB_DIR"
232 #- Collect machine names into $TMPFILE
233 # Also handles 'slaves' entry in jobInfo:
235 rm -f $TMPFILE; touch $TMPFILE
236 RUNJOBS
=`getAllJobs $FOAM_JOB_DIR/runningJobs`
239 machinePid
=`basename $f`
240 machine
=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
241 pid
=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
243 fgrep
"$machine" $TMPFILE >/dev
/null
2>&1
246 echo "$machine" >> $TMPFILE
249 echo "Found machines:"
255 #- Collect process info on all machines, one file per machine
261 machine
=`sed -n -e "${cnt}p" $TMPFILE`
267 machFile
=$MACHDIR/$machine
269 echo "Contacting $machine to collect process information:"
270 if [ $machine = `hostname` ]
272 echo " foamProcessInfo $machFile"
273 foamProcessInfo
$machFile >/dev
/null
2>&1
275 echo " $RSH $machine foamProcessInfo $machFile"
276 $RSH $machine foamProcessInfo
$machFile >/dev
/null
2>&1
278 if [ $?
-ne 0 -o ! -s $machFile ]
280 echo "** Failed collecting process information on $machine."
281 echo "Check $machFile and run foamProcessInfo by hand"
284 echo "Succesfully collected information in $machFile ..."
292 #- Construct state for runningJobs; move non runnning jobs to finishedJobs
298 machinePid
=`basename $f`
299 machine
=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
300 pid
=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
302 machFile
=$MACHDIR/$machine
305 entry
=`grep "^$pid " $machFile 2>/dev/null`
306 if [ $?
-ne 0 -o ! "$entry" ]
310 releaseLock
$machinePid $f
312 echo "Job $machinePid seems to be no longer running. Release lock? (y/a)\c"
314 if [ "${answ:-y}" = 'y' ]
316 releaseLock
$machinePid $f
317 elif [ "${answ:-y}" = 'a' ]
320 releaseLock
$machinePid $f
323 echo "$machinePid $state" >> $STATEFILE
327 state
=`echo "$entry" | awk '{print $2}'`
328 echo "$machinePid $state" >> $STATEFILE
335 #- Collect old jobs in finishedJobs
337 OLDFILES
=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print`
339 #- Construct state for finishedJobs and check on date of files.
341 if notEmpty
$FOAM_JOB_DIR/finishedJobs
343 dateNow
=`date '+%b %d %Y'`
344 for f
in $FOAM_JOB_DIR/finishedJobs
/*
346 sz
=`ls -s $f | awk '{print $1}'`
349 machinePid
=`basename $f`
350 machine
=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
351 pid
=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
353 end
=`getEntry $f endDate`
358 nDaysOld
=`dayDiff "$dateNow" "$end"`
359 if [ "$nDaysOld" -gt $NDAYSLIMIT ]
361 OLDFILES
="$OLDFILES $f"
367 echo "$machinePid $state" >> $STATEFILE
375 nOldFiles
=`echo "$OLDFILES" | wc -w`
376 if [ "$nOldFiles" -gt 0 ]
378 echo "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/"
379 $ECHO "Do you want to remove these? (y)\c"
381 if [ "${answ:-y}" = 'y' ]
392 echo "Updated stateFile:"
396 #------------------------------------------------------------------------------