1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -x86-slh-lfence | FileCheck %s --check-prefix=X64-LFENCE
5 ; FIXME: Add support for 32-bit and other EH ABIs.
7 declare void @leak(i32 %v1, i32 %v2)
9 declare void @sink(i32)
11 define i32 @test_trivial_entry_load(ptr %ptr) speculative_load_hardening {
12 ; X64-LABEL: test_trivial_entry_load:
13 ; X64: # %bb.0: # %entry
14 ; X64-NEXT: movq %rsp, %rcx
15 ; X64-NEXT: movq $-1, %rax
16 ; X64-NEXT: sarq $63, %rcx
17 ; X64-NEXT: movl (%rdi), %eax
18 ; X64-NEXT: orl %ecx, %eax
19 ; X64-NEXT: shlq $47, %rcx
20 ; X64-NEXT: orq %rcx, %rsp
23 ; X64-LFENCE-LABEL: test_trivial_entry_load:
24 ; X64-LFENCE: # %bb.0: # %entry
25 ; X64-LFENCE-NEXT: movl (%rdi), %eax
26 ; X64-LFENCE-NEXT: retq
28 %v = load i32, ptr %ptr
32 define void @test_basic_conditions(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2, ptr %ptr3) speculative_load_hardening {
33 ; X64-LABEL: test_basic_conditions:
34 ; X64: # %bb.0: # %entry
35 ; X64-NEXT: pushq %r15
36 ; X64-NEXT: .cfi_def_cfa_offset 16
37 ; X64-NEXT: pushq %r14
38 ; X64-NEXT: .cfi_def_cfa_offset 24
39 ; X64-NEXT: pushq %rbx
40 ; X64-NEXT: .cfi_def_cfa_offset 32
41 ; X64-NEXT: .cfi_offset %rbx, -32
42 ; X64-NEXT: .cfi_offset %r14, -24
43 ; X64-NEXT: .cfi_offset %r15, -16
44 ; X64-NEXT: movq %rsp, %rax
45 ; X64-NEXT: movq $-1, %rbx
46 ; X64-NEXT: sarq $63, %rax
47 ; X64-NEXT: testl %edi, %edi
48 ; X64-NEXT: jne .LBB1_1
49 ; X64-NEXT: # %bb.2: # %then1
50 ; X64-NEXT: cmovneq %rbx, %rax
51 ; X64-NEXT: testl %esi, %esi
52 ; X64-NEXT: je .LBB1_4
54 ; X64-NEXT: cmoveq %rbx, %rax
55 ; X64-NEXT: .LBB1_8: # %exit
56 ; X64-NEXT: shlq $47, %rax
57 ; X64-NEXT: orq %rax, %rsp
59 ; X64-NEXT: .cfi_def_cfa_offset 24
61 ; X64-NEXT: .cfi_def_cfa_offset 16
63 ; X64-NEXT: .cfi_def_cfa_offset 8
65 ; X64-NEXT: .LBB1_4: # %then2
66 ; X64-NEXT: .cfi_def_cfa_offset 32
67 ; X64-NEXT: cmovneq %rbx, %rax
68 ; X64-NEXT: testl %edx, %edx
69 ; X64-NEXT: je .LBB1_6
70 ; X64-NEXT: # %bb.5: # %else3
71 ; X64-NEXT: cmoveq %rbx, %rax
72 ; X64-NEXT: movslq (%r9), %rcx
73 ; X64-NEXT: orq %rax, %rcx
74 ; X64-NEXT: leaq (%r8,%rcx,4), %r14
75 ; X64-NEXT: movl %ecx, (%r8,%rcx,4)
76 ; X64-NEXT: jmp .LBB1_7
77 ; X64-NEXT: .LBB1_6: # %then3
78 ; X64-NEXT: cmovneq %rbx, %rax
79 ; X64-NEXT: movl (%rcx), %ecx
80 ; X64-NEXT: addl (%r8), %ecx
81 ; X64-NEXT: movslq %ecx, %rdi
82 ; X64-NEXT: orq %rax, %rdi
83 ; X64-NEXT: movl (%r8,%rdi,4), %esi
84 ; X64-NEXT: orl %eax, %esi
85 ; X64-NEXT: movq (%r9), %r14
86 ; X64-NEXT: orq %rax, %r14
87 ; X64-NEXT: addl (%r14), %esi
88 ; X64-NEXT: shlq $47, %rax
89 ; X64-NEXT: # kill: def $edi killed $edi killed $rdi
90 ; X64-NEXT: orq %rax, %rsp
91 ; X64-NEXT: movq %r8, %r15
92 ; X64-NEXT: callq leak@PLT
93 ; X64-NEXT: .Lslh_ret_addr0:
94 ; X64-NEXT: movq %r15, %r8
95 ; X64-NEXT: movq %rsp, %rax
96 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
97 ; X64-NEXT: sarq $63, %rax
98 ; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx
99 ; X64-NEXT: cmovneq %rbx, %rax
100 ; X64-NEXT: .LBB1_7: # %merge
101 ; X64-NEXT: movslq (%r14), %rcx
102 ; X64-NEXT: orq %rax, %rcx
103 ; X64-NEXT: movl $0, (%r8,%rcx,4)
104 ; X64-NEXT: jmp .LBB1_8
106 ; X64-LFENCE-LABEL: test_basic_conditions:
107 ; X64-LFENCE: # %bb.0: # %entry
108 ; X64-LFENCE-NEXT: pushq %r14
109 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
110 ; X64-LFENCE-NEXT: pushq %rbx
111 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
112 ; X64-LFENCE-NEXT: pushq %rax
113 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
114 ; X64-LFENCE-NEXT: .cfi_offset %rbx, -24
115 ; X64-LFENCE-NEXT: .cfi_offset %r14, -16
116 ; X64-LFENCE-NEXT: testl %edi, %edi
117 ; X64-LFENCE-NEXT: jne .LBB1_6
118 ; X64-LFENCE-NEXT: # %bb.1: # %then1
119 ; X64-LFENCE-NEXT: lfence
120 ; X64-LFENCE-NEXT: testl %esi, %esi
121 ; X64-LFENCE-NEXT: jne .LBB1_6
122 ; X64-LFENCE-NEXT: # %bb.2: # %then2
123 ; X64-LFENCE-NEXT: lfence
124 ; X64-LFENCE-NEXT: testl %edx, %edx
125 ; X64-LFENCE-NEXT: je .LBB1_3
126 ; X64-LFENCE-NEXT: # %bb.4: # %else3
127 ; X64-LFENCE-NEXT: lfence
128 ; X64-LFENCE-NEXT: movslq (%r9), %rax
129 ; X64-LFENCE-NEXT: leaq (%r8,%rax,4), %rbx
130 ; X64-LFENCE-NEXT: movl %eax, (%r8,%rax,4)
131 ; X64-LFENCE-NEXT: jmp .LBB1_5
132 ; X64-LFENCE-NEXT: .LBB1_3: # %then3
133 ; X64-LFENCE-NEXT: lfence
134 ; X64-LFENCE-NEXT: movl (%rcx), %eax
135 ; X64-LFENCE-NEXT: addl (%r8), %eax
136 ; X64-LFENCE-NEXT: movslq %eax, %rdi
137 ; X64-LFENCE-NEXT: movl (%r8,%rdi,4), %esi
138 ; X64-LFENCE-NEXT: movq (%r9), %rbx
139 ; X64-LFENCE-NEXT: addl (%rbx), %esi
140 ; X64-LFENCE-NEXT: # kill: def $edi killed $edi killed $rdi
141 ; X64-LFENCE-NEXT: movq %r8, %r14
142 ; X64-LFENCE-NEXT: callq leak@PLT
143 ; X64-LFENCE-NEXT: movq %r14, %r8
144 ; X64-LFENCE-NEXT: .LBB1_5: # %merge
145 ; X64-LFENCE-NEXT: movslq (%rbx), %rax
146 ; X64-LFENCE-NEXT: movl $0, (%r8,%rax,4)
147 ; X64-LFENCE-NEXT: .LBB1_6: # %exit
148 ; X64-LFENCE-NEXT: lfence
149 ; X64-LFENCE-NEXT: addq $8, %rsp
150 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
151 ; X64-LFENCE-NEXT: popq %rbx
152 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
153 ; X64-LFENCE-NEXT: popq %r14
154 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8
155 ; X64-LFENCE-NEXT: retq
157 %a.cmp = icmp eq i32 %a, 0
158 br i1 %a.cmp, label %then1, label %exit
161 %b.cmp = icmp eq i32 %b, 0
162 br i1 %b.cmp, label %then2, label %exit
165 %c.cmp = icmp eq i32 %c, 0
166 br i1 %c.cmp, label %then3, label %else3
169 %secret1 = load i32, ptr %ptr1
170 %secret2 = load i32, ptr %ptr2
171 %secret.sum1 = add i32 %secret1, %secret2
172 %ptr2.idx = getelementptr i32, ptr %ptr2, i32 %secret.sum1
173 %secret3 = load i32, ptr %ptr2.idx
174 %secret4 = load ptr, ptr %ptr3
175 %secret5 = load i32, ptr %secret4
176 %secret.sum2 = add i32 %secret3, %secret5
177 call void @leak(i32 %secret.sum1, i32 %secret.sum2)
181 %secret6 = load ptr, ptr %ptr3
182 %cast = ptrtoint ptr %secret6 to i32
183 %ptr2.idx2 = getelementptr i32, ptr %ptr2, i32 %cast
184 store i32 %cast, ptr %ptr2.idx2
188 %phi = phi ptr [ %secret4, %then3 ], [ %ptr2.idx2, %else3 ]
189 %secret7 = load i32, ptr %phi
190 %ptr2.idx3 = getelementptr i32, ptr %ptr2, i32 %secret7
191 store i32 0, ptr %ptr2.idx3
198 define void @test_basic_loop(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2) nounwind speculative_load_hardening {
199 ; X64-LABEL: test_basic_loop:
200 ; X64: # %bb.0: # %entry
201 ; X64-NEXT: pushq %rbp
202 ; X64-NEXT: pushq %r15
203 ; X64-NEXT: pushq %r14
204 ; X64-NEXT: pushq %r12
205 ; X64-NEXT: pushq %rbx
206 ; X64-NEXT: movq %rsp, %rax
207 ; X64-NEXT: movq $-1, %r15
208 ; X64-NEXT: sarq $63, %rax
209 ; X64-NEXT: testl %edi, %edi
210 ; X64-NEXT: je .LBB2_2
212 ; X64-NEXT: cmoveq %r15, %rax
213 ; X64-NEXT: jmp .LBB2_5
214 ; X64-NEXT: .LBB2_2: # %l.header.preheader
215 ; X64-NEXT: movq %rcx, %rbx
216 ; X64-NEXT: movq %rdx, %r14
217 ; X64-NEXT: movl %esi, %ebp
218 ; X64-NEXT: cmovneq %r15, %rax
219 ; X64-NEXT: xorl %r12d, %r12d
220 ; X64-NEXT: .p2align 4, 0x90
221 ; X64-NEXT: .LBB2_3: # %l.header
222 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
223 ; X64-NEXT: movslq (%r14), %rcx
224 ; X64-NEXT: orq %rax, %rcx
225 ; X64-NEXT: movq %rax, %rdx
226 ; X64-NEXT: orq %rbx, %rdx
227 ; X64-NEXT: movl (%rdx,%rcx,4), %edi
228 ; X64-NEXT: shlq $47, %rax
229 ; X64-NEXT: orq %rax, %rsp
230 ; X64-NEXT: callq sink@PLT
231 ; X64-NEXT: .Lslh_ret_addr1:
232 ; X64-NEXT: movq %rsp, %rax
233 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
234 ; X64-NEXT: sarq $63, %rax
235 ; X64-NEXT: cmpq $.Lslh_ret_addr1, %rcx
236 ; X64-NEXT: cmovneq %r15, %rax
237 ; X64-NEXT: incl %r12d
238 ; X64-NEXT: cmpl %ebp, %r12d
239 ; X64-NEXT: jge .LBB2_4
240 ; X64-NEXT: # %bb.6: # in Loop: Header=BB2_3 Depth=1
241 ; X64-NEXT: cmovgeq %r15, %rax
242 ; X64-NEXT: jmp .LBB2_3
244 ; X64-NEXT: cmovlq %r15, %rax
245 ; X64-NEXT: .LBB2_5: # %exit
246 ; X64-NEXT: shlq $47, %rax
247 ; X64-NEXT: orq %rax, %rsp
248 ; X64-NEXT: popq %rbx
249 ; X64-NEXT: popq %r12
250 ; X64-NEXT: popq %r14
251 ; X64-NEXT: popq %r15
252 ; X64-NEXT: popq %rbp
255 ; X64-LFENCE-LABEL: test_basic_loop:
256 ; X64-LFENCE: # %bb.0: # %entry
257 ; X64-LFENCE-NEXT: pushq %rbp
258 ; X64-LFENCE-NEXT: pushq %r15
259 ; X64-LFENCE-NEXT: pushq %r14
260 ; X64-LFENCE-NEXT: pushq %rbx
261 ; X64-LFENCE-NEXT: pushq %rax
262 ; X64-LFENCE-NEXT: testl %edi, %edi
263 ; X64-LFENCE-NEXT: jne .LBB2_3
264 ; X64-LFENCE-NEXT: # %bb.1: # %l.header.preheader
265 ; X64-LFENCE-NEXT: movq %rcx, %rbx
266 ; X64-LFENCE-NEXT: movq %rdx, %r14
267 ; X64-LFENCE-NEXT: movl %esi, %ebp
268 ; X64-LFENCE-NEXT: lfence
269 ; X64-LFENCE-NEXT: xorl %r15d, %r15d
270 ; X64-LFENCE-NEXT: .p2align 4, 0x90
271 ; X64-LFENCE-NEXT: .LBB2_2: # %l.header
272 ; X64-LFENCE-NEXT: # =>This Inner Loop Header: Depth=1
273 ; X64-LFENCE-NEXT: lfence
274 ; X64-LFENCE-NEXT: movslq (%r14), %rax
275 ; X64-LFENCE-NEXT: movl (%rbx,%rax,4), %edi
276 ; X64-LFENCE-NEXT: callq sink@PLT
277 ; X64-LFENCE-NEXT: incl %r15d
278 ; X64-LFENCE-NEXT: cmpl %ebp, %r15d
279 ; X64-LFENCE-NEXT: jl .LBB2_2
280 ; X64-LFENCE-NEXT: .LBB2_3: # %exit
281 ; X64-LFENCE-NEXT: lfence
282 ; X64-LFENCE-NEXT: addq $8, %rsp
283 ; X64-LFENCE-NEXT: popq %rbx
284 ; X64-LFENCE-NEXT: popq %r14
285 ; X64-LFENCE-NEXT: popq %r15
286 ; X64-LFENCE-NEXT: popq %rbp
287 ; X64-LFENCE-NEXT: retq
289 %a.cmp = icmp eq i32 %a, 0
290 br i1 %a.cmp, label %l.header, label %exit
293 %i = phi i32 [ 0, %entry ], [ %i.next, %l.header ]
294 %secret = load i32, ptr %ptr1
295 %ptr2.idx = getelementptr i32, ptr %ptr2, i32 %secret
296 %leak = load i32, ptr %ptr2.idx
297 call void @sink(i32 %leak)
298 %i.next = add i32 %i, 1
299 %i.cmp = icmp slt i32 %i.next, %b
300 br i1 %i.cmp, label %l.header, label %exit
306 define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, ptr %ptr1, ptr %ptr2) nounwind speculative_load_hardening {
307 ; X64-LABEL: test_basic_nested_loop:
308 ; X64: # %bb.0: # %entry
309 ; X64-NEXT: pushq %rbp
310 ; X64-NEXT: pushq %r15
311 ; X64-NEXT: pushq %r14
312 ; X64-NEXT: pushq %r13
313 ; X64-NEXT: pushq %r12
314 ; X64-NEXT: pushq %rbx
315 ; X64-NEXT: pushq %rax
316 ; X64-NEXT: movq %rsp, %rax
317 ; X64-NEXT: movq $-1, %r12
318 ; X64-NEXT: sarq $63, %rax
319 ; X64-NEXT: testl %edi, %edi
320 ; X64-NEXT: je .LBB3_2
322 ; X64-NEXT: cmoveq %r12, %rax
323 ; X64-NEXT: jmp .LBB3_10
324 ; X64-NEXT: .LBB3_2: # %l1.header.preheader
325 ; X64-NEXT: movq %r8, %rbx
326 ; X64-NEXT: movq %rcx, %r14
327 ; X64-NEXT: movl %edx, %ebp
328 ; X64-NEXT: movl %esi, %r15d
329 ; X64-NEXT: cmovneq %r12, %rax
330 ; X64-NEXT: xorl %r13d, %r13d
331 ; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
332 ; X64-NEXT: testl %r15d, %r15d
333 ; X64-NEXT: jle .LBB3_4
334 ; X64-NEXT: .p2align 4, 0x90
335 ; X64-NEXT: .LBB3_5: # %l2.header.preheader
336 ; X64-NEXT: cmovleq %r12, %rax
337 ; X64-NEXT: xorl %r15d, %r15d
338 ; X64-NEXT: .p2align 4, 0x90
339 ; X64-NEXT: .LBB3_6: # %l2.header
340 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
341 ; X64-NEXT: movslq (%r14), %rcx
342 ; X64-NEXT: orq %rax, %rcx
343 ; X64-NEXT: movq %rax, %rdx
344 ; X64-NEXT: orq %rbx, %rdx
345 ; X64-NEXT: movl (%rdx,%rcx,4), %edi
346 ; X64-NEXT: shlq $47, %rax
347 ; X64-NEXT: orq %rax, %rsp
348 ; X64-NEXT: callq sink@PLT
349 ; X64-NEXT: .Lslh_ret_addr2:
350 ; X64-NEXT: movq %rsp, %rax
351 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
352 ; X64-NEXT: sarq $63, %rax
353 ; X64-NEXT: cmpq $.Lslh_ret_addr2, %rcx
354 ; X64-NEXT: cmovneq %r12, %rax
355 ; X64-NEXT: incl %r15d
356 ; X64-NEXT: cmpl %ebp, %r15d
357 ; X64-NEXT: jge .LBB3_7
358 ; X64-NEXT: # %bb.11: # in Loop: Header=BB3_6 Depth=1
359 ; X64-NEXT: cmovgeq %r12, %rax
360 ; X64-NEXT: jmp .LBB3_6
361 ; X64-NEXT: .p2align 4, 0x90
363 ; X64-NEXT: cmovlq %r12, %rax
364 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload
365 ; X64-NEXT: jmp .LBB3_8
366 ; X64-NEXT: .p2align 4, 0x90
368 ; X64-NEXT: cmovgq %r12, %rax
369 ; X64-NEXT: .LBB3_8: # %l1.latch
370 ; X64-NEXT: movslq (%r14), %rcx
371 ; X64-NEXT: orq %rax, %rcx
372 ; X64-NEXT: movq %rax, %rdx
373 ; X64-NEXT: orq %rbx, %rdx
374 ; X64-NEXT: movl (%rdx,%rcx,4), %edi
375 ; X64-NEXT: shlq $47, %rax
376 ; X64-NEXT: orq %rax, %rsp
377 ; X64-NEXT: callq sink@PLT
378 ; X64-NEXT: .Lslh_ret_addr3:
379 ; X64-NEXT: movq %rsp, %rax
380 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
381 ; X64-NEXT: sarq $63, %rax
382 ; X64-NEXT: cmpq $.Lslh_ret_addr3, %rcx
383 ; X64-NEXT: cmovneq %r12, %rax
384 ; X64-NEXT: incl %r13d
385 ; X64-NEXT: cmpl %r15d, %r13d
386 ; X64-NEXT: jge .LBB3_9
387 ; X64-NEXT: # %bb.12:
388 ; X64-NEXT: cmovgeq %r12, %rax
389 ; X64-NEXT: testl %r15d, %r15d
390 ; X64-NEXT: jg .LBB3_5
391 ; X64-NEXT: jmp .LBB3_4
393 ; X64-NEXT: cmovlq %r12, %rax
394 ; X64-NEXT: .LBB3_10: # %exit
395 ; X64-NEXT: shlq $47, %rax
396 ; X64-NEXT: orq %rax, %rsp
397 ; X64-NEXT: addq $8, %rsp
398 ; X64-NEXT: popq %rbx
399 ; X64-NEXT: popq %r12
400 ; X64-NEXT: popq %r13
401 ; X64-NEXT: popq %r14
402 ; X64-NEXT: popq %r15
403 ; X64-NEXT: popq %rbp
406 ; X64-LFENCE-LABEL: test_basic_nested_loop:
407 ; X64-LFENCE: # %bb.0: # %entry
408 ; X64-LFENCE-NEXT: pushq %rbp
409 ; X64-LFENCE-NEXT: pushq %r15
410 ; X64-LFENCE-NEXT: pushq %r14
411 ; X64-LFENCE-NEXT: pushq %r13
412 ; X64-LFENCE-NEXT: pushq %r12
413 ; X64-LFENCE-NEXT: pushq %rbx
414 ; X64-LFENCE-NEXT: pushq %rax
415 ; X64-LFENCE-NEXT: testl %edi, %edi
416 ; X64-LFENCE-NEXT: je .LBB3_1
417 ; X64-LFENCE-NEXT: .LBB3_6: # %exit
418 ; X64-LFENCE-NEXT: lfence
419 ; X64-LFENCE-NEXT: addq $8, %rsp
420 ; X64-LFENCE-NEXT: popq %rbx
421 ; X64-LFENCE-NEXT: popq %r12
422 ; X64-LFENCE-NEXT: popq %r13
423 ; X64-LFENCE-NEXT: popq %r14
424 ; X64-LFENCE-NEXT: popq %r15
425 ; X64-LFENCE-NEXT: popq %rbp
426 ; X64-LFENCE-NEXT: retq
427 ; X64-LFENCE-NEXT: .LBB3_1: # %l1.header.preheader
428 ; X64-LFENCE-NEXT: movq %r8, %rbx
429 ; X64-LFENCE-NEXT: movq %rcx, %r14
430 ; X64-LFENCE-NEXT: movl %edx, %ebp
431 ; X64-LFENCE-NEXT: movl %esi, %r15d
432 ; X64-LFENCE-NEXT: lfence
433 ; X64-LFENCE-NEXT: xorl %r12d, %r12d
434 ; X64-LFENCE-NEXT: jmp .LBB3_2
435 ; X64-LFENCE-NEXT: .p2align 4, 0x90
436 ; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch
437 ; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1
438 ; X64-LFENCE-NEXT: lfence
439 ; X64-LFENCE-NEXT: movslq (%r14), %rax
440 ; X64-LFENCE-NEXT: movl (%rbx,%rax,4), %edi
441 ; X64-LFENCE-NEXT: callq sink@PLT
442 ; X64-LFENCE-NEXT: incl %r12d
443 ; X64-LFENCE-NEXT: cmpl %r15d, %r12d
444 ; X64-LFENCE-NEXT: jge .LBB3_6
445 ; X64-LFENCE-NEXT: .LBB3_2: # %l1.header
446 ; X64-LFENCE-NEXT: # =>This Loop Header: Depth=1
447 ; X64-LFENCE-NEXT: # Child Loop BB3_4 Depth 2
448 ; X64-LFENCE-NEXT: lfence
449 ; X64-LFENCE-NEXT: testl %r15d, %r15d
450 ; X64-LFENCE-NEXT: jle .LBB3_5
451 ; X64-LFENCE-NEXT: # %bb.3: # %l2.header.preheader
452 ; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1
453 ; X64-LFENCE-NEXT: lfence
454 ; X64-LFENCE-NEXT: xorl %r13d, %r13d
455 ; X64-LFENCE-NEXT: .p2align 4, 0x90
456 ; X64-LFENCE-NEXT: .LBB3_4: # %l2.header
457 ; X64-LFENCE-NEXT: # Parent Loop BB3_2 Depth=1
458 ; X64-LFENCE-NEXT: # => This Inner Loop Header: Depth=2
459 ; X64-LFENCE-NEXT: lfence
460 ; X64-LFENCE-NEXT: movslq (%r14), %rax
461 ; X64-LFENCE-NEXT: movl (%rbx,%rax,4), %edi
462 ; X64-LFENCE-NEXT: callq sink@PLT
463 ; X64-LFENCE-NEXT: incl %r13d
464 ; X64-LFENCE-NEXT: cmpl %ebp, %r13d
465 ; X64-LFENCE-NEXT: jl .LBB3_4
466 ; X64-LFENCE-NEXT: jmp .LBB3_5
468 %a.cmp = icmp eq i32 %a, 0
469 br i1 %a.cmp, label %l1.header, label %exit
472 %i = phi i32 [ 0, %entry ], [ %i.next, %l1.latch ]
473 %b.cmp = icmp sgt i32 %b, 0
474 br i1 %b.cmp, label %l2.header, label %l1.latch
477 %j = phi i32 [ 0, %l1.header ], [ %j.next, %l2.header ]
478 %secret = load i32, ptr %ptr1
479 %ptr2.idx = getelementptr i32, ptr %ptr2, i32 %secret
480 %leak = load i32, ptr %ptr2.idx
481 call void @sink(i32 %leak)
482 %j.next = add i32 %j, 1
483 %j.cmp = icmp slt i32 %j.next, %c
484 br i1 %j.cmp, label %l2.header, label %l1.latch
487 %secret2 = load i32, ptr %ptr1
488 %ptr2.idx2 = getelementptr i32, ptr %ptr2, i32 %secret2
489 %leak2 = load i32, ptr %ptr2.idx2
490 call void @sink(i32 %leak2)
491 %i.next = add i32 %i, 1
492 %i.cmp = icmp slt i32 %i.next, %b
493 br i1 %i.cmp, label %l1.header, label %exit
499 declare i32 @__gxx_personality_v0(...)
501 declare ptr @__cxa_allocate_exception(i64) local_unnamed_addr
503 declare void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
505 define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_hardening personality ptr @__gxx_personality_v0 {
506 ; X64-LABEL: test_basic_eh:
507 ; X64: # %bb.0: # %entry
508 ; X64-NEXT: pushq %rbp
509 ; X64-NEXT: .cfi_def_cfa_offset 16
510 ; X64-NEXT: pushq %r15
511 ; X64-NEXT: .cfi_def_cfa_offset 24
512 ; X64-NEXT: pushq %r14
513 ; X64-NEXT: .cfi_def_cfa_offset 32
514 ; X64-NEXT: pushq %rbx
515 ; X64-NEXT: .cfi_def_cfa_offset 40
516 ; X64-NEXT: pushq %rax
517 ; X64-NEXT: .cfi_def_cfa_offset 48
518 ; X64-NEXT: .cfi_offset %rbx, -40
519 ; X64-NEXT: .cfi_offset %r14, -32
520 ; X64-NEXT: .cfi_offset %r15, -24
521 ; X64-NEXT: .cfi_offset %rbp, -16
522 ; X64-NEXT: movq %rsp, %rax
523 ; X64-NEXT: movq $-1, %rbx
524 ; X64-NEXT: sarq $63, %rax
525 ; X64-NEXT: cmpl $41, %edi
526 ; X64-NEXT: jg .LBB4_1
527 ; X64-NEXT: # %bb.2: # %thrower
528 ; X64-NEXT: movq %rdx, %r14
529 ; X64-NEXT: cmovgq %rbx, %rax
530 ; X64-NEXT: movslq %edi, %rcx
531 ; X64-NEXT: movq %rsi, %r15
532 ; X64-NEXT: movl (%rsi,%rcx,4), %ebp
533 ; X64-NEXT: orl %eax, %ebp
534 ; X64-NEXT: movl $4, %edi
535 ; X64-NEXT: shlq $47, %rax
536 ; X64-NEXT: orq %rax, %rsp
537 ; X64-NEXT: callq __cxa_allocate_exception@PLT
538 ; X64-NEXT: .Lslh_ret_addr4:
539 ; X64-NEXT: movq %rsp, %rcx
540 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx
541 ; X64-NEXT: sarq $63, %rcx
542 ; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx
543 ; X64-NEXT: cmovneq %rbx, %rcx
544 ; X64-NEXT: movl %ebp, (%rax)
546 ; X64-NEXT: shlq $47, %rcx
547 ; X64-NEXT: movq %rax, %rdi
548 ; X64-NEXT: xorl %esi, %esi
549 ; X64-NEXT: xorl %edx, %edx
550 ; X64-NEXT: orq %rcx, %rsp
551 ; X64-NEXT: callq __cxa_throw@PLT
552 ; X64-NEXT: .Lslh_ret_addr5:
553 ; X64-NEXT: movq %rsp, %rax
554 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
555 ; X64-NEXT: sarq $63, %rax
556 ; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx
557 ; X64-NEXT: cmovneq %rbx, %rax
559 ; X64-NEXT: jmp .LBB4_3
561 ; X64-NEXT: cmovleq %rbx, %rax
562 ; X64-NEXT: .LBB4_3: # %exit
563 ; X64-NEXT: shlq $47, %rax
564 ; X64-NEXT: orq %rax, %rsp
565 ; X64-NEXT: addq $8, %rsp
566 ; X64-NEXT: .cfi_def_cfa_offset 40
567 ; X64-NEXT: popq %rbx
568 ; X64-NEXT: .cfi_def_cfa_offset 32
569 ; X64-NEXT: popq %r14
570 ; X64-NEXT: .cfi_def_cfa_offset 24
571 ; X64-NEXT: popq %r15
572 ; X64-NEXT: .cfi_def_cfa_offset 16
573 ; X64-NEXT: popq %rbp
574 ; X64-NEXT: .cfi_def_cfa_offset 8
576 ; X64-NEXT: .LBB4_4: # %lpad
577 ; X64-NEXT: .cfi_def_cfa_offset 48
579 ; X64-NEXT: movq %rsp, %rcx
580 ; X64-NEXT: sarq $63, %rcx
581 ; X64-NEXT: movl (%rax), %eax
582 ; X64-NEXT: addl (%r15), %eax
584 ; X64-NEXT: orq %rcx, %rax
585 ; X64-NEXT: movl (%r14,%rax,4), %edi
586 ; X64-NEXT: orl %ecx, %edi
587 ; X64-NEXT: shlq $47, %rcx
588 ; X64-NEXT: orq %rcx, %rsp
589 ; X64-NEXT: callq sink@PLT
590 ; X64-NEXT: .Lslh_ret_addr6:
591 ; X64-NEXT: movq %rsp, %rax
592 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
593 ; X64-NEXT: sarq $63, %rax
594 ; X64-NEXT: cmpq $.Lslh_ret_addr6, %rcx
595 ; X64-NEXT: cmovneq %rbx, %rax
597 ; X64-LFENCE-LABEL: test_basic_eh:
598 ; X64-LFENCE: # %bb.0: # %entry
599 ; X64-LFENCE-NEXT: pushq %rbp
600 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
601 ; X64-LFENCE-NEXT: pushq %r14
602 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
603 ; X64-LFENCE-NEXT: pushq %rbx
604 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
605 ; X64-LFENCE-NEXT: .cfi_offset %rbx, -32
606 ; X64-LFENCE-NEXT: .cfi_offset %r14, -24
607 ; X64-LFENCE-NEXT: .cfi_offset %rbp, -16
608 ; X64-LFENCE-NEXT: cmpl $41, %edi
609 ; X64-LFENCE-NEXT: jg .LBB4_2
610 ; X64-LFENCE-NEXT: # %bb.1: # %thrower
611 ; X64-LFENCE-NEXT: movq %rdx, %rbx
612 ; X64-LFENCE-NEXT: movq %rsi, %r14
613 ; X64-LFENCE-NEXT: lfence
614 ; X64-LFENCE-NEXT: movslq %edi, %rax
615 ; X64-LFENCE-NEXT: movl (%rsi,%rax,4), %ebp
616 ; X64-LFENCE-NEXT: movl $4, %edi
617 ; X64-LFENCE-NEXT: callq __cxa_allocate_exception@PLT
618 ; X64-LFENCE-NEXT: movl %ebp, (%rax)
619 ; X64-LFENCE-NEXT: .Ltmp0:
620 ; X64-LFENCE-NEXT: movq %rax, %rdi
621 ; X64-LFENCE-NEXT: xorl %esi, %esi
622 ; X64-LFENCE-NEXT: xorl %edx, %edx
623 ; X64-LFENCE-NEXT: callq __cxa_throw@PLT
624 ; X64-LFENCE-NEXT: .Ltmp1:
625 ; X64-LFENCE-NEXT: .LBB4_2: # %exit
626 ; X64-LFENCE-NEXT: lfence
627 ; X64-LFENCE-NEXT: popq %rbx
628 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24
629 ; X64-LFENCE-NEXT: popq %r14
630 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16
631 ; X64-LFENCE-NEXT: popq %rbp
632 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8
633 ; X64-LFENCE-NEXT: retq
634 ; X64-LFENCE-NEXT: .LBB4_3: # %lpad
635 ; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32
636 ; X64-LFENCE-NEXT: .Ltmp2:
637 ; X64-LFENCE-NEXT: movl (%rax), %eax
638 ; X64-LFENCE-NEXT: addl (%r14), %eax
639 ; X64-LFENCE-NEXT: cltq
640 ; X64-LFENCE-NEXT: movl (%rbx,%rax,4), %edi
641 ; X64-LFENCE-NEXT: callq sink@PLT
643 %a.cmp = icmp slt i32 %a, 42
644 br i1 %a.cmp, label %thrower, label %exit
647 %badidx = getelementptr i32, ptr %ptr1, i32 %a
648 %secret1 = load i32, ptr %badidx
649 %e.ptr = call ptr @__cxa_allocate_exception(i64 4)
650 store i32 %secret1, ptr %e.ptr
651 invoke void @__cxa_throw(ptr %e.ptr, ptr null, ptr null)
652 to label %exit unwind label %lpad
658 %e = landingpad { ptr, i32 }
660 %e.catch.ptr = extractvalue { ptr, i32 } %e, 0
661 %secret1.catch = load i32, ptr %e.catch.ptr
662 %secret2 = load i32, ptr %ptr1
663 %secret.sum = add i32 %secret1.catch, %secret2
664 %ptr2.idx = getelementptr i32, ptr %ptr2, i32 %secret.sum
665 %leak = load i32, ptr %ptr2.idx
666 call void @sink(i32 %leak)
670 declare void @sink_float(float)
671 declare void @sink_double(double)
673 ; Test direct and converting loads of floating point values.
674 define void @test_fp_loads(ptr %fptr, ptr %dptr, ptr %i32ptr, ptr %i64ptr) nounwind speculative_load_hardening {
675 ; X64-LABEL: test_fp_loads:
676 ; X64: # %bb.0: # %entry
677 ; X64-NEXT: pushq %r15
678 ; X64-NEXT: pushq %r14
679 ; X64-NEXT: pushq %r13
680 ; X64-NEXT: pushq %r12
681 ; X64-NEXT: pushq %rbx
682 ; X64-NEXT: movq %rsp, %rax
683 ; X64-NEXT: movq %rcx, %r14
684 ; X64-NEXT: movq %rdx, %rbx
685 ; X64-NEXT: movq %rsi, %r12
686 ; X64-NEXT: movq %rdi, %r15
687 ; X64-NEXT: movq $-1, %r13
688 ; X64-NEXT: sarq $63, %rax
689 ; X64-NEXT: orq %rax, %r15
690 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
691 ; X64-NEXT: shlq $47, %rax
692 ; X64-NEXT: orq %rax, %rsp
693 ; X64-NEXT: callq sink_float@PLT
694 ; X64-NEXT: .Lslh_ret_addr7:
695 ; X64-NEXT: movq %rsp, %rax
696 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
697 ; X64-NEXT: sarq $63, %rax
698 ; X64-NEXT: cmpq $.Lslh_ret_addr7, %rcx
699 ; X64-NEXT: cmovneq %r13, %rax
700 ; X64-NEXT: orq %rax, %r12
701 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
702 ; X64-NEXT: shlq $47, %rax
703 ; X64-NEXT: orq %rax, %rsp
704 ; X64-NEXT: callq sink_double@PLT
705 ; X64-NEXT: .Lslh_ret_addr8:
706 ; X64-NEXT: movq %rsp, %rax
707 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
708 ; X64-NEXT: sarq $63, %rax
709 ; X64-NEXT: cmpq $.Lslh_ret_addr8, %rcx
710 ; X64-NEXT: cmovneq %r13, %rax
711 ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
712 ; X64-NEXT: cvtsd2ss %xmm0, %xmm0
713 ; X64-NEXT: shlq $47, %rax
714 ; X64-NEXT: orq %rax, %rsp
715 ; X64-NEXT: callq sink_float@PLT
716 ; X64-NEXT: .Lslh_ret_addr9:
717 ; X64-NEXT: movq %rsp, %rax
718 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
719 ; X64-NEXT: sarq $63, %rax
720 ; X64-NEXT: cmpq $.Lslh_ret_addr9, %rcx
721 ; X64-NEXT: cmovneq %r13, %rax
722 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
723 ; X64-NEXT: cvtss2sd %xmm0, %xmm0
724 ; X64-NEXT: shlq $47, %rax
725 ; X64-NEXT: orq %rax, %rsp
726 ; X64-NEXT: callq sink_double@PLT
727 ; X64-NEXT: .Lslh_ret_addr10:
728 ; X64-NEXT: movq %rsp, %rax
729 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
730 ; X64-NEXT: sarq $63, %rax
731 ; X64-NEXT: cmpq $.Lslh_ret_addr10, %rcx
732 ; X64-NEXT: cmovneq %r13, %rax
733 ; X64-NEXT: orq %rax, %rbx
734 ; X64-NEXT: xorps %xmm0, %xmm0
735 ; X64-NEXT: cvtsi2ssl (%rbx), %xmm0
736 ; X64-NEXT: shlq $47, %rax
737 ; X64-NEXT: orq %rax, %rsp
738 ; X64-NEXT: callq sink_float@PLT
739 ; X64-NEXT: .Lslh_ret_addr11:
740 ; X64-NEXT: movq %rsp, %rax
741 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
742 ; X64-NEXT: sarq $63, %rax
743 ; X64-NEXT: cmpq $.Lslh_ret_addr11, %rcx
744 ; X64-NEXT: cmovneq %r13, %rax
745 ; X64-NEXT: orq %rax, %r14
746 ; X64-NEXT: xorps %xmm0, %xmm0
747 ; X64-NEXT: cvtsi2sdq (%r14), %xmm0
748 ; X64-NEXT: shlq $47, %rax
749 ; X64-NEXT: orq %rax, %rsp
750 ; X64-NEXT: callq sink_double@PLT
751 ; X64-NEXT: .Lslh_ret_addr12:
752 ; X64-NEXT: movq %rsp, %rax
753 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
754 ; X64-NEXT: sarq $63, %rax
755 ; X64-NEXT: cmpq $.Lslh_ret_addr12, %rcx
756 ; X64-NEXT: cmovneq %r13, %rax
757 ; X64-NEXT: xorps %xmm0, %xmm0
758 ; X64-NEXT: cvtsi2ssq (%r14), %xmm0
759 ; X64-NEXT: shlq $47, %rax
760 ; X64-NEXT: orq %rax, %rsp
761 ; X64-NEXT: callq sink_float@PLT
762 ; X64-NEXT: .Lslh_ret_addr13:
763 ; X64-NEXT: movq %rsp, %rax
764 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
765 ; X64-NEXT: sarq $63, %rax
766 ; X64-NEXT: cmpq $.Lslh_ret_addr13, %rcx
767 ; X64-NEXT: cmovneq %r13, %rax
768 ; X64-NEXT: xorps %xmm0, %xmm0
769 ; X64-NEXT: cvtsi2sdl (%rbx), %xmm0
770 ; X64-NEXT: shlq $47, %rax
771 ; X64-NEXT: orq %rax, %rsp
772 ; X64-NEXT: callq sink_double@PLT
773 ; X64-NEXT: .Lslh_ret_addr14:
774 ; X64-NEXT: movq %rsp, %rax
775 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
776 ; X64-NEXT: sarq $63, %rax
777 ; X64-NEXT: cmpq $.Lslh_ret_addr14, %rcx
778 ; X64-NEXT: cmovneq %r13, %rax
779 ; X64-NEXT: shlq $47, %rax
780 ; X64-NEXT: orq %rax, %rsp
781 ; X64-NEXT: popq %rbx
782 ; X64-NEXT: popq %r12
783 ; X64-NEXT: popq %r13
784 ; X64-NEXT: popq %r14
785 ; X64-NEXT: popq %r15
788 ; X64-LFENCE-LABEL: test_fp_loads:
789 ; X64-LFENCE: # %bb.0: # %entry
790 ; X64-LFENCE-NEXT: pushq %r15
791 ; X64-LFENCE-NEXT: pushq %r14
792 ; X64-LFENCE-NEXT: pushq %r12
793 ; X64-LFENCE-NEXT: pushq %rbx
794 ; X64-LFENCE-NEXT: pushq %rax
795 ; X64-LFENCE-NEXT: movq %rcx, %r14
796 ; X64-LFENCE-NEXT: movq %rdx, %rbx
797 ; X64-LFENCE-NEXT: movq %rsi, %r15
798 ; X64-LFENCE-NEXT: movq %rdi, %r12
799 ; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
800 ; X64-LFENCE-NEXT: callq sink_float@PLT
801 ; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
802 ; X64-LFENCE-NEXT: callq sink_double@PLT
803 ; X64-LFENCE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
804 ; X64-LFENCE-NEXT: cvtsd2ss %xmm0, %xmm0
805 ; X64-LFENCE-NEXT: callq sink_float@PLT
806 ; X64-LFENCE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
807 ; X64-LFENCE-NEXT: cvtss2sd %xmm0, %xmm0
808 ; X64-LFENCE-NEXT: callq sink_double@PLT
809 ; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
810 ; X64-LFENCE-NEXT: cvtsi2ssl (%rbx), %xmm0
811 ; X64-LFENCE-NEXT: callq sink_float@PLT
812 ; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
813 ; X64-LFENCE-NEXT: cvtsi2sdq (%r14), %xmm0
814 ; X64-LFENCE-NEXT: callq sink_double@PLT
815 ; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
816 ; X64-LFENCE-NEXT: cvtsi2ssq (%r14), %xmm0
817 ; X64-LFENCE-NEXT: callq sink_float@PLT
818 ; X64-LFENCE-NEXT: xorps %xmm0, %xmm0
819 ; X64-LFENCE-NEXT: cvtsi2sdl (%rbx), %xmm0
820 ; X64-LFENCE-NEXT: callq sink_double@PLT
821 ; X64-LFENCE-NEXT: addq $8, %rsp
822 ; X64-LFENCE-NEXT: popq %rbx
823 ; X64-LFENCE-NEXT: popq %r12
824 ; X64-LFENCE-NEXT: popq %r14
825 ; X64-LFENCE-NEXT: popq %r15
826 ; X64-LFENCE-NEXT: retq
828 %f1 = load float, ptr %fptr
829 call void @sink_float(float %f1)
830 %d1 = load double, ptr %dptr
831 call void @sink_double(double %d1)
832 %f2.d = load double, ptr %dptr
833 %f2 = fptrunc double %f2.d to float
834 call void @sink_float(float %f2)
835 %d2.f = load float, ptr %fptr
836 %d2 = fpext float %d2.f to double
837 call void @sink_double(double %d2)
838 %f3.i = load i32, ptr %i32ptr
839 %f3 = sitofp i32 %f3.i to float
840 call void @sink_float(float %f3)
841 %d3.i = load i64, ptr %i64ptr
842 %d3 = sitofp i64 %d3.i to double
843 call void @sink_double(double %d3)
844 %f4.i = load i64, ptr %i64ptr
845 %f4 = sitofp i64 %f4.i to float
846 call void @sink_float(float %f4)
847 %d4.i = load i32, ptr %i32ptr
848 %d4 = sitofp i32 %d4.i to double
849 call void @sink_double(double %d4)
853 declare void @sink_v4f32(<4 x float>)
854 declare void @sink_v2f64(<2 x double>)
855 declare void @sink_v16i8(<16 x i8>)
856 declare void @sink_v8i16(<8 x i16>)
857 declare void @sink_v4i32(<4 x i32>)
858 declare void @sink_v2i64(<2 x i64>)
860 ; Test loads of vectors.
861 define void @test_vec_loads(ptr %v4f32ptr, ptr %v2f64ptr, ptr %v16i8ptr, ptr %v8i16ptr, ptr %v4i32ptr, ptr %v2i64ptr) nounwind speculative_load_hardening {
862 ; X64-LABEL: test_vec_loads:
863 ; X64: # %bb.0: # %entry
864 ; X64-NEXT: pushq %rbp
865 ; X64-NEXT: pushq %r15
866 ; X64-NEXT: pushq %r14
867 ; X64-NEXT: pushq %r13
868 ; X64-NEXT: pushq %r12
869 ; X64-NEXT: pushq %rbx
870 ; X64-NEXT: pushq %rax
871 ; X64-NEXT: movq %rsp, %rax
872 ; X64-NEXT: movq %r9, %rbx
873 ; X64-NEXT: movq %r8, %r14
874 ; X64-NEXT: movq %rcx, %r15
875 ; X64-NEXT: movq %rdx, %r12
876 ; X64-NEXT: movq %rsi, %r13
877 ; X64-NEXT: movq $-1, %rbp
878 ; X64-NEXT: sarq $63, %rax
879 ; X64-NEXT: orq %rax, %rdi
880 ; X64-NEXT: movaps (%rdi), %xmm0
881 ; X64-NEXT: shlq $47, %rax
882 ; X64-NEXT: orq %rax, %rsp
883 ; X64-NEXT: callq sink_v4f32@PLT
884 ; X64-NEXT: .Lslh_ret_addr15:
885 ; X64-NEXT: movq %rsp, %rax
886 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
887 ; X64-NEXT: sarq $63, %rax
888 ; X64-NEXT: cmpq $.Lslh_ret_addr15, %rcx
889 ; X64-NEXT: cmovneq %rbp, %rax
890 ; X64-NEXT: orq %rax, %r13
891 ; X64-NEXT: movaps (%r13), %xmm0
892 ; X64-NEXT: shlq $47, %rax
893 ; X64-NEXT: orq %rax, %rsp
894 ; X64-NEXT: callq sink_v2f64@PLT
895 ; X64-NEXT: .Lslh_ret_addr16:
896 ; X64-NEXT: movq %rsp, %rax
897 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
898 ; X64-NEXT: sarq $63, %rax
899 ; X64-NEXT: cmpq $.Lslh_ret_addr16, %rcx
900 ; X64-NEXT: cmovneq %rbp, %rax
901 ; X64-NEXT: orq %rax, %r12
902 ; X64-NEXT: movaps (%r12), %xmm0
903 ; X64-NEXT: shlq $47, %rax
904 ; X64-NEXT: orq %rax, %rsp
905 ; X64-NEXT: callq sink_v16i8@PLT
906 ; X64-NEXT: .Lslh_ret_addr17:
907 ; X64-NEXT: movq %rsp, %rax
908 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
909 ; X64-NEXT: sarq $63, %rax
910 ; X64-NEXT: cmpq $.Lslh_ret_addr17, %rcx
911 ; X64-NEXT: cmovneq %rbp, %rax
912 ; X64-NEXT: orq %rax, %r15
913 ; X64-NEXT: movaps (%r15), %xmm0
914 ; X64-NEXT: shlq $47, %rax
915 ; X64-NEXT: orq %rax, %rsp
916 ; X64-NEXT: callq sink_v8i16@PLT
917 ; X64-NEXT: .Lslh_ret_addr18:
918 ; X64-NEXT: movq %rsp, %rax
919 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
920 ; X64-NEXT: sarq $63, %rax
921 ; X64-NEXT: cmpq $.Lslh_ret_addr18, %rcx
922 ; X64-NEXT: cmovneq %rbp, %rax
923 ; X64-NEXT: orq %rax, %r14
924 ; X64-NEXT: movaps (%r14), %xmm0
925 ; X64-NEXT: shlq $47, %rax
926 ; X64-NEXT: orq %rax, %rsp
927 ; X64-NEXT: callq sink_v4i32@PLT
928 ; X64-NEXT: .Lslh_ret_addr19:
929 ; X64-NEXT: movq %rsp, %rax
930 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
931 ; X64-NEXT: sarq $63, %rax
932 ; X64-NEXT: cmpq $.Lslh_ret_addr19, %rcx
933 ; X64-NEXT: cmovneq %rbp, %rax
934 ; X64-NEXT: orq %rax, %rbx
935 ; X64-NEXT: movaps (%rbx), %xmm0
936 ; X64-NEXT: shlq $47, %rax
937 ; X64-NEXT: orq %rax, %rsp
938 ; X64-NEXT: callq sink_v2i64@PLT
939 ; X64-NEXT: .Lslh_ret_addr20:
940 ; X64-NEXT: movq %rsp, %rax
941 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
942 ; X64-NEXT: sarq $63, %rax
943 ; X64-NEXT: cmpq $.Lslh_ret_addr20, %rcx
944 ; X64-NEXT: cmovneq %rbp, %rax
945 ; X64-NEXT: shlq $47, %rax
946 ; X64-NEXT: orq %rax, %rsp
947 ; X64-NEXT: addq $8, %rsp
948 ; X64-NEXT: popq %rbx
949 ; X64-NEXT: popq %r12
950 ; X64-NEXT: popq %r13
951 ; X64-NEXT: popq %r14
952 ; X64-NEXT: popq %r15
953 ; X64-NEXT: popq %rbp
956 ; X64-LFENCE-LABEL: test_vec_loads:
957 ; X64-LFENCE: # %bb.0: # %entry
958 ; X64-LFENCE-NEXT: pushq %r15
959 ; X64-LFENCE-NEXT: pushq %r14
960 ; X64-LFENCE-NEXT: pushq %r13
961 ; X64-LFENCE-NEXT: pushq %r12
962 ; X64-LFENCE-NEXT: pushq %rbx
963 ; X64-LFENCE-NEXT: movq %r9, %rbx
964 ; X64-LFENCE-NEXT: movq %r8, %r14
965 ; X64-LFENCE-NEXT: movq %rcx, %r15
966 ; X64-LFENCE-NEXT: movq %rdx, %r12
967 ; X64-LFENCE-NEXT: movq %rsi, %r13
968 ; X64-LFENCE-NEXT: movaps (%rdi), %xmm0
969 ; X64-LFENCE-NEXT: callq sink_v4f32@PLT
970 ; X64-LFENCE-NEXT: movaps (%r13), %xmm0
971 ; X64-LFENCE-NEXT: callq sink_v2f64@PLT
972 ; X64-LFENCE-NEXT: movaps (%r12), %xmm0
973 ; X64-LFENCE-NEXT: callq sink_v16i8@PLT
974 ; X64-LFENCE-NEXT: movaps (%r15), %xmm0
975 ; X64-LFENCE-NEXT: callq sink_v8i16@PLT
976 ; X64-LFENCE-NEXT: movaps (%r14), %xmm0
977 ; X64-LFENCE-NEXT: callq sink_v4i32@PLT
978 ; X64-LFENCE-NEXT: movaps (%rbx), %xmm0
979 ; X64-LFENCE-NEXT: callq sink_v2i64@PLT
980 ; X64-LFENCE-NEXT: popq %rbx
981 ; X64-LFENCE-NEXT: popq %r12
982 ; X64-LFENCE-NEXT: popq %r13
983 ; X64-LFENCE-NEXT: popq %r14
984 ; X64-LFENCE-NEXT: popq %r15
985 ; X64-LFENCE-NEXT: retq
987 %x1 = load <4 x float>, ptr %v4f32ptr
988 call void @sink_v4f32(<4 x float> %x1)
989 %x2 = load <2 x double>, ptr %v2f64ptr
990 call void @sink_v2f64(<2 x double> %x2)
991 %x3 = load <16 x i8>, ptr %v16i8ptr
992 call void @sink_v16i8(<16 x i8> %x3)
993 %x4 = load <8 x i16>, ptr %v8i16ptr
994 call void @sink_v8i16(<8 x i16> %x4)
995 %x5 = load <4 x i32>, ptr %v4i32ptr
996 call void @sink_v4i32(<4 x i32> %x5)
997 %x6 = load <2 x i64>, ptr %v2i64ptr
998 call void @sink_v2i64(<2 x i64> %x6)
1002 define void @test_deferred_hardening(ptr %ptr1, ptr %ptr2, i32 %x) nounwind speculative_load_hardening {
1003 ; X64-LABEL: test_deferred_hardening:
1004 ; X64: # %bb.0: # %entry
1005 ; X64-NEXT: pushq %r15
1006 ; X64-NEXT: pushq %r14
1007 ; X64-NEXT: pushq %rbx
1008 ; X64-NEXT: movq %rsp, %rax
1009 ; X64-NEXT: movq %rsi, %r14
1010 ; X64-NEXT: movq %rdi, %rbx
1011 ; X64-NEXT: movq $-1, %r15
1012 ; X64-NEXT: sarq $63, %rax
1013 ; X64-NEXT: movl (%rdi), %edi
1014 ; X64-NEXT: incl %edi
1015 ; X64-NEXT: imull %edx, %edi
1016 ; X64-NEXT: orl %eax, %edi
1017 ; X64-NEXT: shlq $47, %rax
1018 ; X64-NEXT: orq %rax, %rsp
1019 ; X64-NEXT: callq sink@PLT
1020 ; X64-NEXT: .Lslh_ret_addr21:
1021 ; X64-NEXT: movq %rsp, %rax
1022 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1023 ; X64-NEXT: sarq $63, %rax
1024 ; X64-NEXT: cmpq $.Lslh_ret_addr21, %rcx
1025 ; X64-NEXT: cmovneq %r15, %rax
1026 ; X64-NEXT: movl (%rbx), %ecx
1027 ; X64-NEXT: movl (%r14), %edx
1028 ; X64-NEXT: leal 1(%rcx,%rdx), %edi
1029 ; X64-NEXT: orl %eax, %edi
1030 ; X64-NEXT: shlq $47, %rax
1031 ; X64-NEXT: orq %rax, %rsp
1032 ; X64-NEXT: callq sink@PLT
1033 ; X64-NEXT: .Lslh_ret_addr22:
1034 ; X64-NEXT: movq %rsp, %rax
1035 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1036 ; X64-NEXT: sarq $63, %rax
1037 ; X64-NEXT: cmpq $.Lslh_ret_addr22, %rcx
1038 ; X64-NEXT: cmovneq %r15, %rax
1039 ; X64-NEXT: movl (%rbx), %edi
1040 ; X64-NEXT: shll $7, %edi
1041 ; X64-NEXT: orl %eax, %edi
1042 ; X64-NEXT: shlq $47, %rax
1043 ; X64-NEXT: orq %rax, %rsp
1044 ; X64-NEXT: callq sink@PLT
1045 ; X64-NEXT: .Lslh_ret_addr23:
1046 ; X64-NEXT: movq %rsp, %rax
1047 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1048 ; X64-NEXT: sarq $63, %rax
1049 ; X64-NEXT: cmpq $.Lslh_ret_addr23, %rcx
1050 ; X64-NEXT: cmovneq %r15, %rax
1051 ; X64-NEXT: movswl (%rbx), %edi
1052 ; X64-NEXT: notl %edi
1053 ; X64-NEXT: shrl $7, %edi
1054 ; X64-NEXT: orl $-65536, %edi # imm = 0xFFFF0000
1055 ; X64-NEXT: orl %eax, %edi
1056 ; X64-NEXT: shlq $47, %rax
1057 ; X64-NEXT: orq %rax, %rsp
1058 ; X64-NEXT: callq sink@PLT
1059 ; X64-NEXT: .Lslh_ret_addr24:
1060 ; X64-NEXT: movq %rsp, %rax
1061 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1062 ; X64-NEXT: sarq $63, %rax
1063 ; X64-NEXT: cmpq $.Lslh_ret_addr24, %rcx
1064 ; X64-NEXT: cmovneq %r15, %rax
1065 ; X64-NEXT: movzwl (%rbx), %ecx
1066 ; X64-NEXT: rolw $9, %cx
1067 ; X64-NEXT: movswl %cx, %edi
1068 ; X64-NEXT: negl %edi
1069 ; X64-NEXT: orl %eax, %edi
1070 ; X64-NEXT: shlq $47, %rax
1071 ; X64-NEXT: orq %rax, %rsp
1072 ; X64-NEXT: callq sink@PLT
1073 ; X64-NEXT: .Lslh_ret_addr25:
1074 ; X64-NEXT: movq %rsp, %rax
1075 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
1076 ; X64-NEXT: sarq $63, %rax
1077 ; X64-NEXT: cmpq $.Lslh_ret_addr25, %rcx
1078 ; X64-NEXT: cmovneq %r15, %rax
1079 ; X64-NEXT: shlq $47, %rax
1080 ; X64-NEXT: orq %rax, %rsp
1081 ; X64-NEXT: popq %rbx
1082 ; X64-NEXT: popq %r14
1083 ; X64-NEXT: popq %r15
1086 ; X64-LFENCE-LABEL: test_deferred_hardening:
1087 ; X64-LFENCE: # %bb.0: # %entry
1088 ; X64-LFENCE-NEXT: pushq %r14
1089 ; X64-LFENCE-NEXT: pushq %rbx
1090 ; X64-LFENCE-NEXT: pushq %rax
1091 ; X64-LFENCE-NEXT: movq %rsi, %r14
1092 ; X64-LFENCE-NEXT: movq %rdi, %rbx
1093 ; X64-LFENCE-NEXT: movl (%rdi), %edi
1094 ; X64-LFENCE-NEXT: incl %edi
1095 ; X64-LFENCE-NEXT: imull %edx, %edi
1096 ; X64-LFENCE-NEXT: callq sink@PLT
1097 ; X64-LFENCE-NEXT: movl (%rbx), %eax
1098 ; X64-LFENCE-NEXT: movl (%r14), %ecx
1099 ; X64-LFENCE-NEXT: leal 1(%rax,%rcx), %edi
1100 ; X64-LFENCE-NEXT: callq sink@PLT
1101 ; X64-LFENCE-NEXT: movl (%rbx), %edi
1102 ; X64-LFENCE-NEXT: shll $7, %edi
1103 ; X64-LFENCE-NEXT: callq sink@PLT
1104 ; X64-LFENCE-NEXT: movswl (%rbx), %edi
1105 ; X64-LFENCE-NEXT: notl %edi
1106 ; X64-LFENCE-NEXT: shrl $7, %edi
1107 ; X64-LFENCE-NEXT: orl $-65536, %edi # imm = 0xFFFF0000
1108 ; X64-LFENCE-NEXT: callq sink@PLT
1109 ; X64-LFENCE-NEXT: movzwl (%rbx), %eax
1110 ; X64-LFENCE-NEXT: rolw $9, %ax
1111 ; X64-LFENCE-NEXT: movswl %ax, %edi
1112 ; X64-LFENCE-NEXT: negl %edi
1113 ; X64-LFENCE-NEXT: callq sink@PLT
1114 ; X64-LFENCE-NEXT: addq $8, %rsp
1115 ; X64-LFENCE-NEXT: popq %rbx
1116 ; X64-LFENCE-NEXT: popq %r14
1117 ; X64-LFENCE-NEXT: retq
1119 %a1 = load i32, ptr %ptr1
1120 %a2 = add i32 %a1, 1
1121 %a3 = mul i32 %a2, %x
1122 call void @sink(i32 %a3)
1123 %b1 = load i32, ptr %ptr1
1124 %b2 = add i32 %b1, 1
1125 %b3 = load i32, ptr %ptr2
1126 %b4 = add i32 %b2, %b3
1127 call void @sink(i32 %b4)
1128 %c1 = load i32, ptr %ptr1
1129 %c2 = shl i32 %c1, 7
1130 call void @sink(i32 %c2)
1131 %d1 = load i32, ptr %ptr1
1132 ; Check trunc and integer ops narrower than i32.
1133 %d2 = trunc i32 %d1 to i16
1134 %d3 = ashr i16 %d2, 7
1135 %d4 = zext i16 %d3 to i32
1136 %d5 = xor i32 %d4, -1
1137 call void @sink(i32 %d5)
1138 %e1 = load i32, ptr %ptr1
1139 %e2 = trunc i32 %e1 to i16
1140 %e3 = lshr i16 %e2, 7
1141 %e4 = shl i16 %e2, 9
1142 %e5 = or i16 %e3, %e4
1143 %e6 = sext i16 %e5 to i32
1144 %e7 = sub i32 0, %e6
1145 call void @sink(i32 %e7)
1149 ; Make sure we don't crash on idempotent atomic operations which have a
1150 ; hardcoded reference to RSP+offset.
1151 define void @idempotent_atomic(ptr %x) speculative_load_hardening {
1152 ; X64-LABEL: idempotent_atomic:
1154 ; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
1157 ; X64-LFENCE-LABEL: idempotent_atomic:
1158 ; X64-LFENCE: # %bb.0:
1159 ; X64-LFENCE-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
1160 ; X64-LFENCE-NEXT: retq
1161 %tmp = atomicrmw or ptr %x, i32 0 seq_cst