5 slurm/alive.pl - a script to query the state of slurm jobs
9 slurm/alive.pl <job_id> <job_temp_dir>
13 This script is used in conjunction with the CXGN::Tools::Run
14 plugin RemoteSlurm that allows a slurm job to be launched on
15 a remote server. The script needs to be in the $PATH on that
20 Lukas Mueller <lam87@cornell.edu>
30 my $temp_dir = shift || "/tmp";
32 print alive
($job_id, $temp_dir);
36 my $job_temp_dir = shift;
38 my $slurm = Slurm
::new
();
40 my $job_info = $slurm->load_job($job_id);
41 my $current_job = $job_info->{job_array
}->[0];
43 _check_nodes_states
();
45 print STDERR
"Check job state...\n";
47 if (IS_JOB_RUNNING
($current_job)) {
48 print STDERR
"Slurm job is running...\n";
51 if (IS_JOB_COMPLETE
($current_job)) {
52 print STDERR
"slurm job is complete...\n";
55 if (IS_JOB_FINISHED
($current_job)) {
56 print STDERR
"Slurm job is finished...\n";
59 if (IS_JOB_COMPLETED
($current_job)) {
60 print STDERR
"Slurm job is completed...\n";
63 if (IS_JOB_PENDING
($current_job)) {
64 print STDERR
"Slurm job is pending...\n";
67 if (IS_JOB_COMPLETING
($current_job)) {
68 print STDERR
"Slurm job is completing...\n";
71 if (IS_JOB_CONFIGURING
($current_job)) {
72 print STDERR
"Slurm job is configuring...\n";
75 if (IS_JOB_STARTED
($current_job)) {
76 print STDERR
"Slurm job is started...\n";
79 if (IS_JOB_RESIZING
($current_job)) {
80 print STDERR
"Slurm job is resizing...\n";
83 if (IS_JOB_SUSPENDED
($current_job)) {
84 die "Slurm job is suspended...\n";
86 if (IS_JOB_CANCELLED
($current_job)) {
87 die "Slurm job is canceled...\n";
89 if (IS_JOB_FAILED
($current_job)) {
90 die "Slurm job is failed...\n";
91 write_file
(File
::Spec
->catfile($job_temp_dir, "died"), "Slurm job failed\n");
93 if (IS_JOB_TIMEOUT
($current_job)) {
94 die "Slurm job is timed out...\n";
95 write_file
(File
::Spec
->catfile($job_temp_dir, "died"), "Slurm job timed out\n");
97 if (IS_JOB_NODE_FAILED
($current_job)) {
98 die "Slurm job node failed...\n";
99 write_file
(File
::Spec
->catfile($job_temp_dir, "died"), "Slurm job node failed\n");
102 die "Slurm job is in an unknown state...\n";
105 sub _check_nodes_states
{
108 my $slurm = Slurm
::new
();
109 my $nodes_info = $slurm->load_node();
110 my $node_array = $nodes_info->{node_array
};
112 foreach (@
$node_array) {
113 if (IS_NODE_UNKNOWN
($_)) {
114 die "Slurm node is unknown... Node: ".$_->{name
}."\n";
116 if (IS_NODE_DOWN
($_)) {
117 die "Slurm node is down... Node: ".$_->{name
}."\n";
119 if (IS_NODE_IDLE
($_)) {
120 print STDERR
"Slurm node is idle... Node: ".$_->{name
}."\n";
122 if (IS_NODE_ALLOCATED
($_)) {
123 print STDERR
"Slurm node is allocated... Node: ".$_->{name
}."\n";
125 if (IS_NODE_ERROR
($_)) {
126 die "Slurm node is in error... Node: ".$_->{name
}."\n";
128 if (IS_NODE_NO_RESPOND
($_)) {
129 die "Slurm node is not responding... Node: ".$_->{name
}."\n";
131 if (IS_NODE_FAIL
($_)) {
132 die "Slurm node is failed... Node: ".$_->{name
}."\n";
134 if (IS_NODE_COMPLETING
($_)) {
135 print STDERR
"Slurm node is completing... Node: ".$_->{name
}."\n";
137 if (IS_NODE_MIXED
($_)) {
138 print STDERR
"Slurm node is mixed (some CPUs are allocated some are not)... Node: ".$_->{name
}."\n";
140 if (IS_NODE_FUTURE
($_)) {
141 die "Slurm node is in future state (not fully configured)... Node: ".$_->{name
}."\n";
143 if (IS_NODE_DRAIN
($_)) {
144 die "Slurm node is in drain... Node: ".$_->{name
}."\n";
146 if (IS_NODE_DRAINING
($_)) {
147 print STDERR
"Slurm node is draining... Node: ".$_->{name
}."\n";
149 if (IS_NODE_DRAINED
($_)) {
150 print STDERR
"Slurm node is drained... Node: ".$_->{name
}."\n";
152 if (IS_NODE_MAINT
($_)) {
153 die "Slurm node is in maintenance... Node: ".$_->{name
}."\n";
155 if (IS_NODE_POWER_UP
($_)) {
156 print STDERR
"Slurm node is powered up... Node: ".$_->{name
}."\n";
158 if (IS_NODE_POWER_SAVE
($_)) {
159 print STDERR
"Slurm node is in power save... Node: ".$_->{name
}."\n";