4 use FindBin qw
/$Script $RealBin/;
6 use Jobcluster
::Jobguard qw
/:DEFAULT $MAX_R_JOBS $DEL_BAD_LOG $CHECK_USER/;
9 my ($shell,$memory,$sub_line,$submit_queue,$subsh_prefix,$check_job_time,$check_job_usage,$jump_user_determine);
10 my ($log_directory,$interval,$reqsub,$max_eqw_time,$qhost_timespan,$queue_limit,@df_array,$save_bad_logs);
11 my ($mem_check_sign,$mem_cycle,$mem_exceed,$mem_record);
18 "q=s" => \
$submit_queue,
19 "sp=s" => \
$subsh_prefix,
20 "ct" => \
$check_job_time,
21 "cu" => \
$check_job_usage,
22 "ju" => \
$jump_user_determine,
24 "d=s" => \
$log_directory,
27 "eq=i" => \
$max_eqw_time,
28 "qs=i" => \
$qhost_timespan,
29 "ql" => \
$queue_limit,
31 "sl" => \
$save_bad_logs,
33 "ms" => \
$mem_check_sign,
34 "mc=i" => \
$mem_cycle,
35 "me=s" => \
$mem_exceed,
38 "mj:i" => \
$MAX_R_JOBS,
54 Version: 1.0, 2011-08-15
55 Author: JiaWenlong, jiawenlong\@genomics.cn
57 Welcome any question, bug-report or suggestion, TIA.
61 NOTE: for detail, please perldoc $RealBin/Jobguard.pm
63 -s [s] shell script. <required>
64 -m [s] memory the shell script will use. <required>
65 -l [i] line-number of the sub shells script splited from your -s. [1]
66 -q [s] queue where you want your jobs to run. (strict format: *.q) [NA]
67 -sp [s] prefix of each sub shells splited from the original shell ['work']
68 -ct sign of using system command 'time' to check the time each subshell used. [disabled]
69 -cu sign of using system commands 'qstat -j' and 'grep' to get the 'usage' info of each subshell. [disabled]
70 -ju sign of jumping userID determination. [disabled]
71 If you have used this script before, and be sure that userID got by it will be definitely your RealOne, set '-ju' for sving your 15s time.
72 -d [s] directory where the guard_job_log will creat. [./]
73 The directory doesnot need to exist as it will be created automatically.
74 -t [i] cycle time of guarding steps, and it is in second(s). [120,[300]]
75 -r sign of reqsub jobs run to error. [disabled]
76 It is suggestted that you should set -r if you prepare to run a multi-step-flow, for the existence of error-jobs.
77 -sl sign of saving logs of bad-jobs for user to check the errors. [disabled]
78 -mj [i] maximum number of run/qw jobs, while others are hqw. [50]
79 -eq [i] the maximum time of dealing Eqw status. [[60],100].
80 -qs [i] time-span of qhost check. [1]
81 -ql the sign of job-submit-queue-limitation in the reqsubing error-jobs. (effective with '-q:*.q') [disabled]
82 -df [s] info about the disk of which free space you want to check. [NA]
83 format: disk,min_space,rls_space
84 This parameters can be set in several times, so as to you may have several disks to check, but you should set all disks' info validly.
85 -ms sign of checking memory when jobs are running. [disabled]
86 -mc [i] time-span of checking memory. (effective with -ms) [5]
87 -me [s] limited memory that the job can exceed its required memory(-m:vf_mem). (effective with -ms) [0.2]
88 it can be decimals(fraction{[0,1]} of vf_mem) or specific memory(in uint 'M/m/G/g').
89 -mr sign of recording memory guard log. (effective with -ms) [abled]
94 perl Jobguard.pl -s <shell_for_qsub> -m <memory_for_run_shell> [Options: -h]
98 die $usage_info if ($Help);
99 die "perl $Script -s <shell_for_qsub> -m <memory_for_run_shell> [Options: -h]\n" unless($shell && $memory);
100 my (@qsubmulti,@jobguard);
102 #------ qsub --------
103 qsubmulti
(@qsubmulti);
104 #------ check ---------
106 #-------- sub-routines -----------
108 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109 #--- check paras of qsubmulti ---
110 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
111 #------ check the shell --------
112 die "Cannot find the shellpath: $shell\n" unless(-e
$shell);
113 push @qsubmulti,"-s:$shell";
114 #------ check the memory -------
115 if($memory =~ /^\d+$/){
116 print STDERR
"WARNING: Memory has been reset as 1G\nBecause your input $memory has no unit!\n";
119 push @qsubmulti,"-m:$memory";
120 #------- check the subline -----
122 if($sub_line !~ /^\d+$/){
123 print STDERR
"WARNING: Subline has been reset as 1\nBecause your input $sub_line is not whole-digital!\n";
126 push @qsubmulti,"-l:$sub_line";
128 #------- check the submit queue -------
130 push @qsubmulti,"-q:$submit_queue";
132 #------- check the subsh prefix -------
134 push @qsubmulti,"-sp:$subsh_prefix";
136 #------- check the sign of check usage and time ---------
138 push @qsubmulti,"-ct:1";
140 if($check_job_usage){
141 push @qsubmulti,"-cu:1";
143 #------- check the sign of jump user determine ----------
144 if($jump_user_determine){
147 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
148 #--- check paras of jobguard ---
149 #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
150 #------ check the log_dir --------
151 $log_directory ||= './';
152 `mkdir -p $log_directory` unless(-d
$log_directory);
153 push @jobguard,"-d:$log_directory";
154 #------ check the interval -------
156 push @jobguard,"-t:$interval";
158 #------- check the reqsub sign -----
160 push @jobguard,"-r:1";
162 #------- check the max eqw time -----
164 push @jobguard,"-e:$max_eqw_time";
166 #------- check the qhost time span -----
168 push @jobguard,"-h:$qhost_timespan";
170 #------- check job queue limit sign ---------
172 push @jobguard,"-ql:1";
174 #------- disk free space -------
176 push @jobguard,"-df:$_" for @df_array;
178 #------- check mem_check_sign --------
180 push @jobguard,"-ms:1";
182 #------- check mem_cycle -------
183 if($mem_check_sign && $mem_cycle){
184 push @jobguard,"-mc:$mem_cycle";
186 #------- check mem_exceed -------
187 if($mem_check_sign && $mem_exceed){
188 push @jobguard,"-me:$mem_exceed";
190 #------- check memory_record_sign -------
192 push @jobguard,"-mr:1";
194 #------- check the save of bad-jobs' logs ------
195 $DEL_BAD_LOG = !$save_bad_logs;