ENH: settings.(c)sh: added checking on SGIMPI MPI_ROOT
[OpenFOAM-2.0.x.git] / bin / foamCheckJobs
blob408dee58988b5d02fe86a56ec8be03579596cc53
1 #!/bin/sh
2 #------------------------------------------------------------------------------
3 # ========= |
4 # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
5 # \\ / O peration |
6 # \\ / A nd | Copyright (C) 2011 OpenFOAM Foundation
7 # \\/ M anipulation |
8 #-------------------------------------------------------------------------------
9 # License
10 # This file is part of OpenFOAM.
12 # OpenFOAM is free software: you can redistribute it and/or modify it
13 # under the terms of the GNU General Public License as published by
14 # the Free Software Foundation, either version 3 of the License, or
15 # (at your option) any later version.
17 # OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
18 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 # for more details.
22 # You should have received a copy of the GNU General Public License
23 # along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
25 # Script
26 # foamCheckJobs
28 # Description
29 # Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile.
30 # stateFile contains per pid information on state of process. Format:
31 # pid state command
33 # where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND')
34 # (first three are from foamProcessInfo, others from jobInfo files)
35 # (PEND is special state from when user has submitted but no jobInfo
36 # file yet. Not supported by this script yet)
38 #------------------------------------------------------------------------------
40 Script=${0##*/}
42 #-------------------------------------------------------------------------------
43 #- User settings
45 #- Number of days for files to be considered old
46 NDAYSLIMIT=7
47 #-------------------------------------------------------------------------------
49 #- work file
50 TMPFILE=/tmp/${Script}$$.tmp
51 #- work dir. Needs to be accessible for all machines
52 MACHDIR=$HOME/.OpenFOAM/${Script}
53 DEFSTATEFILE=$HOME/.OpenFOAM/foamCheckJobs.out
56 if [ `uname -s` = Linux ]
57 then
58 ECHO='echo -e'
59 else
60 ECHO='echo'
64 usage() {
65 cat<<USAGE
66 Usage: $Script [stateFile]
68 This program checks all the locks in the FOAM_JOB_DIR directory to see if
69 their processes are still running. Processes will not release their
70 lock if they exit abnormally. This program will try to obtain process
71 information on the machine the process ran on and release the lock
72 if the program is no longer running.
74 Note: all machines have to be reachable using ssh.
76 The output from checking all running jobs is collected in an optional
77 file.
79 FILES:
80 \$FOAM_JOB_DIR/runningJobs locks for running processes
81 /finishedJobs locks for finished processes
82 USAGE
83 exit 1
87 #-------------------------------------------------------------------------------
89 # Functions
91 #-------------------------------------------------------------------------------
93 # getRawEntry dictionary entry
94 # Prints value of dictionary entry
95 getRawEntry() {
96 grep -v '^//' $1 | grep "^[ \t]*$2 " | sed -e "s/^[ \t]*$2 [ ]*//"
99 # getEntry dictionary entry
100 # Like getRawEntry but strips " and ending ';'
101 getEntry() {
102 getRawEntry $1 $2 | sed -e 's/^"//' -e 's/;$//' -e 's/"$//'
105 # notEmpty directory
106 # Returns 0 if directory contains files/directories
107 notEmpty() {
108 if [ "`ls $1`" ]
109 then
110 return 0
111 else
112 return 1
116 # dayDiff <date string 1> <date string 2>
117 # Prints number of days between the two
118 # Eg. dayDiff "Jan 10 2002" "Dec 28 1999"
119 # ==> 13
120 dayDiff() {
121 date -d "$1" > /dev/null 2>&1
122 if [ $? -ne 0 ]
123 then
124 #- option '-d' on date not supported. Give up.
125 echo "0"
126 else
127 year1=`echo "$1" | awk '{print $3}'`
128 year2=`echo "$2" | awk '{print $3}'`
129 day1=`date -d "$1" "+%j"`
130 day2=`date -d "$2" "+%j"`
132 nYears=`expr $year1 - $year2`
133 tmp1=`expr $nYears \* 365`
134 tmp2=`expr $day1 - $day2`
135 expr $tmp1 + $tmp2
138 #dayDiff "`date '+%b %d %Y'`" "Dec 28 2001"
141 # getAllJobs jobInfoDirectory
142 # Prints list of all jobs in directory (e.g. runningJobs/)
143 # Also handles 'slaves' entries in jobInfo:
144 # slaves 1 ( penfold.23766 );
145 getAllJobs() {
146 if notEmpty $1
147 then
148 jobs=$1/*
149 for f in $jobs
151 line=`grep '^[ ]*slaves' $f 2>/dev/null`
152 if [ $? -eq 0 ]
153 then
154 slaveJobs=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'`
155 jobs="$jobs $slaveJobs"
157 done
158 else
159 jobs=''
161 echo "$jobs"
164 # releaseLock jobId lockFile
165 # Releases lock on jobId
166 releaseLock () {
167 if [ -f $2 ]
168 then
169 #- move lock to finishedJobs
170 mv $2 $FOAM_JOB_DIR/finishedJobs/
172 echo "Lock on job $1 released."
176 #-------------------------------------------------------------------------------
178 # Main
180 #-------------------------------------------------------------------------------
182 if [ $# -eq 1 ]
183 then
184 if [ "$1" = "-h" -o "$1" = "-help" ]
185 then
186 usage
188 STATEFILE="$1"
189 elif [ $# -eq 0 ]
190 then
191 STATEFILE=${STATEFILE:-$DEFSTATEFILE}
192 else
193 usage
197 #- Check a few things
199 if [ ! "$FOAM_JOB_DIR" ]
200 then
201 echo "$Script : FOAM_JOB_DIR environment variable not set."
202 echo
203 exit 1
206 if [ ! -d "$FOAM_JOB_DIR" ]
207 then
208 echo "$Script : directory does not exist."
209 echo " FOAM_JOB_DIR=$FOAM_JOB_DIR"
210 echo
211 exit 1
213 if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ]
214 then
215 echo "$Script : invalid directory."
216 echo " FOAM_JOB_DIR=$FOAM_JOB_DIR"
217 echo
218 exit 1
223 #- obtain rsh method
224 RSH='ssh'
225 echo "Using remote shell type : $RSH"
226 echo ""
227 echo "Collecting information on jobs in"
228 echo " $FOAM_JOB_DIR"
229 echo ""
232 #- Collect machine names into $TMPFILE
233 # Also handles 'slaves' entry in jobInfo:
235 rm -f $TMPFILE; touch $TMPFILE
236 RUNJOBS=`getAllJobs $FOAM_JOB_DIR/runningJobs`
237 for f in $RUNJOBS
239 machinePid=`basename $f`
240 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
241 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
243 fgrep "$machine" $TMPFILE >/dev/null 2>&1
244 if [ $? -ne 0 ]
245 then
246 echo "$machine" >> $TMPFILE
248 done
249 echo "Found machines:"
250 cat $TMPFILE
251 echo ""
255 #- Collect process info on all machines, one file per machine
257 mkdir -p $MACHDIR
258 cnt=1
259 while true
261 machine=`sed -n -e "${cnt}p" $TMPFILE`
262 if [ ! "$machine" ]
263 then
264 break
267 machFile=$MACHDIR/$machine
268 rm -f $machFile
269 echo "Contacting $machine to collect process information:"
270 if [ $machine = `hostname` ]
271 then
272 echo " foamProcessInfo $machFile"
273 foamProcessInfo $machFile >/dev/null 2>&1
274 else
275 echo " $RSH $machine foamProcessInfo $machFile"
276 $RSH $machine foamProcessInfo $machFile >/dev/null 2>&1
278 if [ $? -ne 0 -o ! -s $machFile ]
279 then
280 echo "** Failed collecting process information on $machine."
281 echo "Check $machFile and run foamProcessInfo by hand"
282 rm -f $machFile
283 else
284 echo "Succesfully collected information in $machFile ..."
287 cnt=`expr $cnt + 1`
288 done
289 echo ""
292 #- Construct state for runningJobs; move non runnning jobs to finishedJobs
294 releaseAll=''
295 rm -f $STATEFILE
296 for f in $RUNJOBS
298 machinePid=`basename $f`
299 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
300 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
302 machFile=$MACHDIR/$machine
303 if [ -r $machFile ]
304 then
305 entry=`grep "^$pid " $machFile 2>/dev/null`
306 if [ $? -ne 0 -o ! "$entry" ]
307 then
308 if [ "$releaseAll" ]
309 then
310 releaseLock $machinePid $f
311 else
312 echo "Job $machinePid seems to be no longer running. Release lock? (y/a)\c"
313 read answ
314 if [ "${answ:-y}" = 'y' ]
315 then
316 releaseLock $machinePid $f
317 elif [ "${answ:-y}" = 'a' ]
318 then
319 releaseAll='yes'
320 releaseLock $machinePid $f
321 else
322 state='OTHR'
323 echo "$machinePid $state" >> $STATEFILE
326 else
327 state=`echo "$entry" | awk '{print $2}'`
328 echo "$machinePid $state" >> $STATEFILE
331 done
335 #- Collect old jobs in finishedJobs
337 OLDFILES=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print`
339 #- Construct state for finishedJobs and check on date of files.
341 if notEmpty $FOAM_JOB_DIR/finishedJobs
342 then
343 dateNow=`date '+%b %d %Y'`
344 for f in $FOAM_JOB_DIR/finishedJobs/*
346 sz=`ls -s $f | awk '{print $1}'`
347 if [ "$sz" -gt 0 ]
348 then
349 machinePid=`basename $f`
350 machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'`
351 pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'`
353 end=`getEntry $f endDate`
354 if [ ! "$end" ]
355 then
356 state='ABRT'
357 else
358 nDaysOld=`dayDiff "$dateNow" "$end"`
359 if [ "$nDaysOld" -gt $NDAYSLIMIT ]
360 then
361 OLDFILES="$OLDFILES $f"
364 state='FINI'
367 echo "$machinePid $state" >> $STATEFILE
369 done
373 #- Remove old locks
375 nOldFiles=`echo "$OLDFILES" | wc -w`
376 if [ "$nOldFiles" -gt 0 ]
377 then
378 echo "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/"
379 $ECHO "Do you want to remove these? (y)\c"
380 read answ
381 if [ "${answ:-y}" = 'y' ]
382 then
383 rm -f $OLDFILES
388 rm -f $TMPFILE
389 rm -r $MACHDIR
391 echo ""
392 echo "Updated stateFile:"
393 echo " $STATEFILE"
394 echo ""
396 #------------------------------------------------------------------------------