[mlir][acc] Update LegalizeDataValues pass to allow MappableType (#125134)
[llvm-project.git] / .github / workflows / libcxx-restart-preempted-jobs.yaml
blobe7e3772d4de2253df4f1546d4c145350213cc340
1 name: Restart Preempted Libc++ Workflow
3 # The libc++ builders run on preemptable VMs, which can be shutdown at any time.
4 # This workflow identifies when a workflow run was canceled due to the VM being preempted,
5 # and restarts the workflow run.
7 # We identify a canceled workflow run by checking the annotations of the check runs in the check suite,
8 # which should contain the message "The runner has received a shutdown signal."
10 # Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow.
12 on:
13   workflow_run:
14     workflows: [Build and Test libc\+\+]
15     types:
16       - completed
18 permissions:
19   contents: read
21 jobs:
22   restart:
23     if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled')
24     name: "Restart Job"
25     permissions:
26       statuses: read
27       checks: write
28       actions: write
29     runs-on: ubuntu-latest
30     steps:
31       - name: "Restart Job"
32         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
33         with:
34           script: |
35             const failure_regex = /Process completed with exit code 1./
36             const preemption_regex = /The runner has received a shutdown signal/
38             const wf_run = context.payload.workflow_run
39             core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
42             async function create_check_run(conclusion, message) {
43                 // Create a check run on the given workflow run to indicate if
44                 // we are restarting the workflow or not.
45                 if (conclusion != 'success' && conclusion != 'skipped' && conclusion != 'neutral') {
46                   core.setFailed('Invalid conclusion: ' + conclusion)
47                 }
48                 await github.rest.checks.create({
49                     owner: context.repo.owner,
50                     repo: context.repo.repo,
51                     name: 'Restart Preempted Job',
52                     head_sha: wf_run.head_sha,
53                     status: 'completed',
54                     conclusion: conclusion,
55                     output: {
56                       title: 'Restarted Preempted Job',
57                       summary: message
58                     }
59                 })
60             }
62             console.log('Listing check runs for suite')
63             const check_suites = await github.rest.checks.listForSuite({
64               owner: context.repo.owner,
65               repo: context.repo.repo,
66               check_suite_id: context.payload.workflow_run.check_suite_id,
67               per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
68             })
70             check_run_ids = [];
71             for (check_run of check_suites.data.check_runs) {
72               console.log('Checking check run: ' + check_run.id);
73               if (check_run.status != 'completed') {
74                 console.log('Check run was not completed. Skipping.');
75                 continue;
76               }
77               if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
78                 console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.');
79                 continue;
80               }
81               check_run_ids.push(check_run.id);
82             }
84             has_preempted_job = false;
86             for (check_run_id of check_run_ids) {
87               console.log('Listing annotations for check run: ' + check_run_id);
89               annotations = await github.rest.checks.listAnnotations({
90                 owner: context.repo.owner,
91                 repo: context.repo.repo,
92                 check_run_id: check_run_id
93               })
95               // For temporary debugging purposes to see the structure of the annotations.
96               console.log(annotations);
98               has_failed_job = false;
99               saved_failure_message = null;
101               for (annotation of annotations.data) {
102                 if (annotation.annotation_level != 'failure') {
103                   continue;
104                 }
106                 const preemption_match = annotation.message.match(preemption_regex);
108                 if (preemption_match != null) {
109                   console.log('Found preemption message: ' + annotation.message);
110                   has_preempted_job = true;
111                 }
113                 const failure_match = annotation.message.match(failure_regex);
114                 if (failure_match != null) {
115                   has_failed_job = true;
116                   saved_failure_message = annotation.message;
117                 }
118               }
119               if (has_failed_job && (! has_preempted_job)) {
120                 // We only want to restart the workflow if all of the failures were due to preemption.
121                 // We don't want to restart the workflow if there were other failures.
122                 //
123                 // However, libcxx runners running inside docker containers produce both a preemption message and failure message.
124                 //
125                 // The desired approach is to ignore failure messages which appear on the same job as a preemption message
126                 // (An job is a single run with a specific configuration, ex generic-gcc, gcc-14).
127                 //
128                 // However, it's unclear that this code achieves the desired approach, and it may ignore all failures
129                 // if a preemption message is found at all on any run.
130                 //
131                 // For now, it's more important to restart preempted workflows than to avoid restarting workflows with
132                 // non-preemption failures.
133                 //
134                 // TODO Figure this out.
135                 core.notice('Choosing not to rerun workflow because we found a non-preemption failure' +
136                   'Failure message: "' + saved_failure_message + '"');
137                 await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n'
138                     + 'Failure message: ' + saved_failure_message)
139                 return;
140               }
141             }
143             if (!has_preempted_job) {
144               core.notice('No preempted jobs found. Not restarting workflow.');
145               await create_check_run('neutral', 'No preempted jobs found. Not restarting workflow.')
146               return;
147             }
149             core.notice("Restarted workflow: " + context.payload.workflow_run.id);
150             await github.rest.actions.reRunWorkflowFailedJobs({
151                 owner: context.repo.owner,
152                 repo: context.repo.repo,
153                 run_id: context.payload.workflow_run.id
154               })
155             await create_check_run('success', 'Restarted workflow run due to preempted job')
157   restart-test:
158     if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled') && github.event.actor.login == 'ldionne' # TESTING ONLY
159     name: "Restart Job (test)"
160     permissions:
161       statuses: read
162       checks: write
163       actions: write
164     runs-on: ubuntu-latest
165     steps:
166       - name: "Restart Job (test)"
167         uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
168         with:
169           script: |
170             const FAILURE_REGEX = /Process completed with exit code 1./
171             const PREEMPTION_REGEX = /(The runner has received a shutdown signal)|(The operation was canceled)/
173             function log(msg) {
174               core.notice(msg)
175             }
177             const wf_run = context.payload.workflow_run
178             log(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
180             log('Listing check runs for suite')
181             const check_suites = await github.rest.checks.listForSuite({
182               owner: context.repo.owner,
183               repo: context.repo.repo,
184               check_suite_id: context.payload.workflow_run.check_suite_id,
185               per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
186             })
188             preemptions = [];
189             legitimate_failures = [];
190             for (check_run of check_suites.data.check_runs) {
191               log(`Checking check run: ${check_run.id}`);
192               if (check_run.status != 'completed') {
193                 log('Check run was not completed. Skipping.');
194                 continue;
195               }
197               if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
198                 log(`Check run had conclusion: ${check_run.conclusion}. Skipping.`);
199                 continue;
200               }
202               annotations = await github.rest.checks.listAnnotations({
203                 owner: context.repo.owner,
204                 repo: context.repo.repo,
205                 check_run_id: check_run.id
206               })
208               preemption_annotation = annotations.data.find(function(annotation) {
209                 return annotation.annotation_level == 'failure' &&
210                        annotation.message.match(PREEMPTION_REGEX) != null;
211               });
212               if (preemption_annotation != null) {
213                 log(`Found preemption message: ${preemption_annotation.message}`);
214                 preemptions.push(check_run);
215                 break;
216               }
218               failure_annotation = annotations.data.find(function(annotation) {
219                 return annotation.annotation_level == 'failure' &&
220                        annotation.message.match(FAILURE_REGEX) != null;
221               });
222               if (failure_annotation != null) {
223                 log(`Found legitimate failure annotation: ${failure_annotation.message}`);
224                 legitimate_failures.push(check_run);
225                 break;
226               }
227             }
229             if (preemptions) {
230               log('Found some preempted jobs');
231               if (legitimate_failures) {
232                 log('Also found some legitimate failures, so not restarting the workflow.');
233               } else {
234                 log('Did not find any legitimate failures. Restarting workflow.');
235                 await github.rest.actions.reRunWorkflowFailedJobs({
236                   owner: context.repo.owner,
237                   repo: context.repo.repo,
238                   run_id: context.payload.workflow_run.id
239                 })
240               }
241             } else {
242               log('Did not find any preempted jobs. Not restarting the workflow.');
243             }