make script executable.
[cxgn-corelibs.git] / bin / check_slurm_job.pl
blobab30614b1c0467c779277df5899f79aa48e83cb6
1 #!/usr/bin/perl
3 =head1 NAME
5 slurm/alive.pl - a script to query the state of slurm jobs
7 =head1 SYNOPSYS
9 slurm/alive.pl <job_id> <job_temp_dir>
11 =head1 DESCRIPTION
13 This script is used in conjunction with the CXGN::Tools::Run
14 plugin RemoteSlurm that allows a slurm job to be launched on
15 a remote server. The script needs to be in the $PATH on that
16 server.
18 =head1 AUTHOR
20 Lukas Mueller <lam87@cornell.edu>
21 September 2019
23 =cut
25 use strict;
27 use Slurm;
29 my $job_id = shift;
30 my $temp_dir = shift || "/tmp";
32 print alive($job_id, $temp_dir);
34 sub alive {
35 my $job_id = shift;
36 my $job_temp_dir = shift;
38 my $slurm = Slurm::new();
40 my $job_info = $slurm->load_job($job_id);
41 my $current_job = $job_info->{job_array}->[0];
43 _check_nodes_states();
45 print STDERR "Check job state...\n";
47 if (IS_JOB_RUNNING($current_job)) {
48 print STDERR "Slurm job is running...\n";
49 return 1;
51 if (IS_JOB_COMPLETE($current_job)) {
52 print STDERR "slurm job is complete...\n";
53 return;
55 if (IS_JOB_FINISHED($current_job)) {
56 print STDERR "Slurm job is finished...\n";
57 return;
59 if (IS_JOB_COMPLETED($current_job)) {
60 print STDERR "Slurm job is completed...\n";
61 return;
63 if (IS_JOB_PENDING($current_job)) {
64 print STDERR "Slurm job is pending...\n";
65 return 1;
67 if (IS_JOB_COMPLETING($current_job)) {
68 print STDERR "Slurm job is completing...\n";
69 return 1;
71 if (IS_JOB_CONFIGURING($current_job)) {
72 print STDERR "Slurm job is configuring...\n";
73 return 1;
75 if (IS_JOB_STARTED($current_job)) {
76 print STDERR "Slurm job is started...\n";
77 return 1;
79 if (IS_JOB_RESIZING($current_job)) {
80 print STDERR "Slurm job is resizing...\n";
81 return 1;
83 if (IS_JOB_SUSPENDED($current_job)) {
84 die "Slurm job is suspended...\n";
86 if (IS_JOB_CANCELLED($current_job)) {
87 die "Slurm job is canceled...\n";
89 if (IS_JOB_FAILED($current_job)) {
90 die "Slurm job is failed...\n";
91 write_file(File::Spec->catfile($job_temp_dir, "died"), "Slurm job failed\n");
93 if (IS_JOB_TIMEOUT($current_job)) {
94 die "Slurm job is timed out...\n";
95 write_file(File::Spec->catfile($job_temp_dir, "died"), "Slurm job timed out\n");
97 if (IS_JOB_NODE_FAILED($current_job)) {
98 die "Slurm job node failed...\n";
99 write_file(File::Spec->catfile($job_temp_dir, "died"), "Slurm job node failed\n");
102 die "Slurm job is in an unknown state...\n";
105 sub _check_nodes_states {
106 my $self = shift;
108 my $slurm = Slurm::new();
109 my $nodes_info = $slurm->load_node();
110 my $node_array = $nodes_info->{node_array};
112 foreach (@$node_array) {
113 if (IS_NODE_UNKNOWN($_)) {
114 die "Slurm node is unknown... Node: ".$_->{name}."\n";
116 if (IS_NODE_DOWN($_)) {
117 die "Slurm node is down... Node: ".$_->{name}."\n";
119 if (IS_NODE_IDLE($_)) {
120 print STDERR "Slurm node is idle... Node: ".$_->{name}."\n";
122 if (IS_NODE_ALLOCATED($_)) {
123 print STDERR "Slurm node is allocated... Node: ".$_->{name}."\n";
125 if (IS_NODE_ERROR($_)) {
126 die "Slurm node is in error... Node: ".$_->{name}."\n";
128 if (IS_NODE_NO_RESPOND($_)) {
129 die "Slurm node is not responding... Node: ".$_->{name}."\n";
131 if (IS_NODE_FAIL($_)) {
132 die "Slurm node is failed... Node: ".$_->{name}."\n";
134 if (IS_NODE_COMPLETING($_)) {
135 print STDERR "Slurm node is completing... Node: ".$_->{name}."\n";
137 if (IS_NODE_MIXED($_)) {
138 print STDERR "Slurm node is mixed (some CPUs are allocated some are not)... Node: ".$_->{name}."\n";
140 if (IS_NODE_FUTURE($_)) {
141 die "Slurm node is in future state (not fully configured)... Node: ".$_->{name}."\n";
143 if (IS_NODE_DRAIN($_)) {
144 die "Slurm node is in drain... Node: ".$_->{name}."\n";
146 if (IS_NODE_DRAINING($_)) {
147 print STDERR "Slurm node is draining... Node: ".$_->{name}."\n";
149 if (IS_NODE_DRAINED($_)) {
150 print STDERR "Slurm node is drained... Node: ".$_->{name}."\n";
152 if (IS_NODE_MAINT($_)) {
153 die "Slurm node is in maintenance... Node: ".$_->{name}."\n";
155 if (IS_NODE_POWER_UP($_)) {
156 print STDERR "Slurm node is powered up... Node: ".$_->{name}."\n";
158 if (IS_NODE_POWER_SAVE($_)) {
159 print STDERR "Slurm node is in power save... Node: ".$_->{name}."\n";
162 return;