1 name: Restart Preempted Libc++ Workflow
3 # The libc++ builders run on preemptable VMs, which can be shutdown at any time.
4 # This workflow identifies when a workflow run was canceled due to the VM being preempted,
5 # and restarts the workflow run.
7 # We identify a canceled workflow run by checking the annotations of the check runs in the check suite,
8 # which should contain the message "The runner has received a shutdown signal."
10 # Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow.
14 workflows: [Build and Test libc\+\+]
23 if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled')
29 runs-on: ubuntu-latest
32 uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
35 const failure_regex = /Process completed with exit code 1./
36 const preemption_regex = /The runner has received a shutdown signal/
38 const wf_run = context.payload.workflow_run
39 core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
42 async function create_check_run(conclusion, message) {
43 // Create a check run on the given workflow run to indicate if
44 // we are restarting the workflow or not.
45 if (conclusion != 'success' && conclusion != 'skipped' && conclusion != 'neutral') {
46 core.setFailed('Invalid conclusion: ' + conclusion)
48 await github.rest.checks.create({
49 owner: context.repo.owner,
50 repo: context.repo.repo,
51 name: 'Restart Preempted Job',
52 head_sha: wf_run.head_sha,
54 conclusion: conclusion,
56 title: 'Restarted Preempted Job',
62 console.log('Listing check runs for suite')
63 const check_suites = await github.rest.checks.listForSuite({
64 owner: context.repo.owner,
65 repo: context.repo.repo,
66 check_suite_id: context.payload.workflow_run.check_suite_id,
67 per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
71 for (check_run of check_suites.data.check_runs) {
72 console.log('Checking check run: ' + check_run.id);
73 if (check_run.status != 'completed') {
74 console.log('Check run was not completed. Skipping.');
77 if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
78 console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.');
81 check_run_ids.push(check_run.id);
84 has_preempted_job = false;
86 for (check_run_id of check_run_ids) {
87 console.log('Listing annotations for check run: ' + check_run_id);
89 annotations = await github.rest.checks.listAnnotations({
90 owner: context.repo.owner,
91 repo: context.repo.repo,
92 check_run_id: check_run_id
95 // For temporary debugging purposes to see the structure of the annotations.
96 console.log(annotations);
98 has_failed_job = false;
99 saved_failure_message = null;
101 for (annotation of annotations.data) {
102 if (annotation.annotation_level != 'failure') {
106 const preemption_match = annotation.message.match(preemption_regex);
108 if (preemption_match != null) {
109 console.log('Found preemption message: ' + annotation.message);
110 has_preempted_job = true;
113 const failure_match = annotation.message.match(failure_regex);
114 if (failure_match != null) {
115 has_failed_job = true;
116 saved_failure_message = annotation.message;
119 if (has_failed_job && (! has_preempted_job)) {
120 // We only want to restart the workflow if all of the failures were due to preemption.
121 // We don't want to restart the workflow if there were other failures.
123 // However, libcxx runners running inside docker containers produce both a preemption message and failure message.
125 // The desired approach is to ignore failure messages which appear on the same job as a preemption message
126 // (An job is a single run with a specific configuration, ex generic-gcc, gcc-14).
128 // However, it's unclear that this code achieves the desired approach, and it may ignore all failures
129 // if a preemption message is found at all on any run.
131 // For now, it's more important to restart preempted workflows than to avoid restarting workflows with
132 // non-preemption failures.
134 // TODO Figure this out.
135 core.notice('Choosing not to rerun workflow because we found a non-preemption failure' +
136 'Failure message: "' + saved_failure_message + '"');
137 await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n'
138 + 'Failure message: ' + saved_failure_message)
143 if (!has_preempted_job) {
144 core.notice('No preempted jobs found. Not restarting workflow.');
145 await create_check_run('neutral', 'No preempted jobs found. Not restarting workflow.')
149 core.notice("Restarted workflow: " + context.payload.workflow_run.id);
150 await github.rest.actions.reRunWorkflowFailedJobs({
151 owner: context.repo.owner,
152 repo: context.repo.repo,
153 run_id: context.payload.workflow_run.id
155 await create_check_run('success', 'Restarted workflow run due to preempted job')
158 if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled') && github.event.actor.login == 'ldionne' # TESTING ONLY
159 name: "Restart Job (test)"
164 runs-on: ubuntu-latest
166 - name: "Restart Job (test)"
167 uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
170 const FAILURE_REGEX = /Process completed with exit code 1./
171 const PREEMPTION_REGEX = /(The runner has received a shutdown signal)|(The operation was canceled)/
177 const wf_run = context.payload.workflow_run
178 log(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
180 log('Listing check runs for suite')
181 const check_suites = await github.rest.checks.listForSuite({
182 owner: context.repo.owner,
183 repo: context.repo.repo,
184 check_suite_id: context.payload.workflow_run.check_suite_id,
185 per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
189 legitimate_failures = [];
190 for (check_run of check_suites.data.check_runs) {
191 log(`Checking check run: ${check_run.id}`);
192 if (check_run.status != 'completed') {
193 log('Check run was not completed. Skipping.');
197 if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
198 log(`Check run had conclusion: ${check_run.conclusion}. Skipping.`);
202 annotations = await github.rest.checks.listAnnotations({
203 owner: context.repo.owner,
204 repo: context.repo.repo,
205 check_run_id: check_run.id
208 preemption_annotation = annotations.data.find(function(annotation) {
209 return annotation.annotation_level == 'failure' &&
210 annotation.message.match(PREEMPTION_REGEX) != null;
212 if (preemption_annotation != null) {
213 log(`Found preemption message: ${preemption_annotation.message}`);
214 preemptions.push(check_run);
218 failure_annotation = annotations.data.find(function(annotation) {
219 return annotation.annotation_level == 'failure' &&
220 annotation.message.match(FAILURE_REGEX) != null;
222 if (failure_annotation != null) {
223 log(`Found legitimate failure annotation: ${failure_annotation.message}`);
224 legitimate_failures.push(check_run);
230 log('Found some preempted jobs');
231 if (legitimate_failures) {
232 log('Also found some legitimate failures, so not restarting the workflow.');
234 log('Did not find any legitimate failures. Restarting workflow.');
235 await github.rest.actions.reRunWorkflowFailedJobs({
236 owner: context.repo.owner,
237 repo: context.repo.repo,
238 run_id: context.payload.workflow_run.id
242 log('Did not find any preempted jobs. Not restarting the workflow.');