1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32
4 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64
5 ; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32
7 declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32)
8 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
9 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>)
10 declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, ptr)
11 declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, ptr)
12 declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, ptr)
13 declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, ptr)
14 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
15 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
16 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
17 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
19 define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) {
20 ; X64-LABEL: test_loadiwkey:
21 ; X64: # %bb.0: # %entry
22 ; X64-NEXT: movl %edi, %eax
23 ; X64-NEXT: loadiwkey %xmm2, %xmm1
26 ; X32-LABEL: test_loadiwkey:
27 ; X32: # %bb.0: # %entry
28 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
29 ; X32-NEXT: loadiwkey %xmm2, %xmm1
32 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl)
36 define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h0, ptr nocapture %h1, ptr nocapture %h2, ptr nocapture %h3, ptr nocapture %h4, ptr nocapture %h5) nounwind {
37 ; X64-LABEL: test_encodekey128_u32:
38 ; X64: # %bb.0: # %entry
39 ; X64-NEXT: encodekey128 %edi, %eax
40 ; X64-NEXT: movaps %xmm0, (%rsi)
41 ; X64-NEXT: movaps %xmm1, (%rdx)
42 ; X64-NEXT: movaps %xmm2, (%rcx)
45 ; X32-LABEL: test_encodekey128_u32:
46 ; X32: # %bb.0: # %entry
47 ; X32-NEXT: pushl %esi
48 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
49 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
50 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
51 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
52 ; X32-NEXT: encodekey128 %eax, %eax
53 ; X32-NEXT: vmovaps %xmm0, (%esi)
54 ; X32-NEXT: vmovaps %xmm1, (%edx)
55 ; X32-NEXT: vmovaps %xmm2, (%ecx)
59 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
60 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
61 store <2 x i64> %1, ptr %h0, align 16
62 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
63 store <2 x i64> %2, ptr %h1, align 16
64 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
65 store <2 x i64> %3, ptr %h2, align 16
66 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
70 define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, ptr nocapture %h0, ptr nocapture %h1, ptr nocapture %h2, ptr nocapture %h3, ptr nocapture %h4, ptr nocapture %h5, ptr nocapture readnone %h6) nounwind {
71 ; X64-LABEL: test_encodekey256_u32:
72 ; X64: # %bb.0: # %entry
73 ; X64-NEXT: encodekey256 %edi, %eax
74 ; X64-NEXT: movaps %xmm0, (%rsi)
75 ; X64-NEXT: movaps %xmm1, (%rdx)
76 ; X64-NEXT: movaps %xmm2, (%rcx)
77 ; X64-NEXT: movaps %xmm3, (%r8)
80 ; X32-LABEL: test_encodekey256_u32:
81 ; X32: # %bb.0: # %entry
82 ; X32-NEXT: pushl %edi
83 ; X32-NEXT: pushl %esi
84 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
85 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
86 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
87 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
88 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
89 ; X32-NEXT: encodekey256 %eax, %eax
90 ; X32-NEXT: vmovaps %xmm0, (%edi)
91 ; X32-NEXT: vmovaps %xmm1, (%esi)
92 ; X32-NEXT: vmovaps %xmm2, (%edx)
93 ; X32-NEXT: vmovaps %xmm3, (%ecx)
98 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
99 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
100 store <2 x i64> %1, ptr %h0, align 16
101 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
102 store <2 x i64> %2, ptr %h1, align 16
103 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
104 store <2 x i64> %3, ptr %h2, align 16
105 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
106 store <2 x i64> %4, ptr %h3, align 16
107 %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
111 define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
112 ; X64-LABEL: test_mm_aesenc128kl_u8:
113 ; X64: # %bb.0: # %entry
114 ; X64-NEXT: aesenc128kl (%rdi), %xmm0
116 ; X64-NEXT: movaps %xmm0, (%rsi)
119 ; X32-LABEL: test_mm_aesenc128kl_u8:
120 ; X32: # %bb.0: # %entry
121 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
122 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
123 ; X32-NEXT: aesenc128kl (%eax), %xmm0
125 ; X32-NEXT: vmovaps %xmm0, (%ecx)
128 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, ptr %h)
129 %1 = extractvalue { i8, <2 x i64> } %0, 1
130 store <2 x i64> %1, ptr %out
131 %2 = extractvalue { i8, <2 x i64> } %0, 0
135 define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
136 ; X64-LABEL: test_mm_aesdec128kl_u8:
137 ; X64: # %bb.0: # %entry
138 ; X64-NEXT: aesdec128kl (%rdi), %xmm0
140 ; X64-NEXT: movaps %xmm0, (%rsi)
143 ; X32-LABEL: test_mm_aesdec128kl_u8:
144 ; X32: # %bb.0: # %entry
145 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
146 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
147 ; X32-NEXT: aesdec128kl (%eax), %xmm0
149 ; X32-NEXT: vmovaps %xmm0, (%ecx)
152 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, ptr %h)
153 %1 = extractvalue { i8, <2 x i64> } %0, 1
154 store <2 x i64> %1, ptr %out
155 %2 = extractvalue { i8, <2 x i64> } %0, 0
159 define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
160 ; X64-LABEL: test_mm_aesenc256kl_u8:
161 ; X64: # %bb.0: # %entry
162 ; X64-NEXT: aesenc256kl (%rdi), %xmm0
164 ; X64-NEXT: movaps %xmm0, (%rsi)
167 ; X32-LABEL: test_mm_aesenc256kl_u8:
168 ; X32: # %bb.0: # %entry
169 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
170 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
171 ; X32-NEXT: aesenc256kl (%eax), %xmm0
173 ; X32-NEXT: vmovaps %xmm0, (%ecx)
176 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, ptr %h)
177 %1 = extractvalue { i8, <2 x i64> } %0, 1
178 store <2 x i64> %1, ptr %out
179 %2 = extractvalue { i8, <2 x i64> } %0, 0
183 define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, ptr %h, ptr %out) {
184 ; X64-LABEL: test_mm_aesdec256kl_u8:
185 ; X64: # %bb.0: # %entry
186 ; X64-NEXT: aesdec256kl (%rdi), %xmm0
188 ; X64-NEXT: movaps %xmm0, (%rsi)
191 ; X32-LABEL: test_mm_aesdec256kl_u8:
192 ; X32: # %bb.0: # %entry
193 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
194 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
195 ; X32-NEXT: aesdec256kl (%eax), %xmm0
197 ; X32-NEXT: vmovaps %xmm0, (%ecx)
200 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, ptr %h)
201 %1 = extractvalue { i8, <2 x i64> } %0, 1
202 store <2 x i64> %1, ptr %out
203 %2 = extractvalue { i8, <2 x i64> } %0, 0
207 define i8 @test_mm_aesencwide128kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
208 ; X64-LABEL: test_mm_aesencwide128kl_u8:
209 ; X64: # %bb.0: # %entry
210 ; X64-NEXT: pushq %rbx
211 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
212 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
213 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
214 ; X64-NEXT: aesencwide128kl (%rdi)
216 ; X64-NEXT: movaps %xmm0, (%rsi)
217 ; X64-NEXT: movaps %xmm1, (%rdx)
218 ; X64-NEXT: movaps %xmm1, (%rcx)
219 ; X64-NEXT: movaps %xmm1, (%r8)
220 ; X64-NEXT: movaps %xmm1, (%r9)
221 ; X64-NEXT: movaps %xmm1, (%rbx)
222 ; X64-NEXT: movaps %xmm1, (%r11)
223 ; X64-NEXT: movaps %xmm1, (%r10)
224 ; X64-NEXT: popq %rbx
227 ; X32-LABEL: test_mm_aesencwide128kl_u8:
228 ; X32: # %bb.0: # %entry
229 ; X32-NEXT: pushl %ebp
230 ; X32-NEXT: movl %esp, %ebp
231 ; X32-NEXT: andl $-16, %esp
232 ; X32-NEXT: subl $16, %esp
233 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
234 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
235 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
236 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
237 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
238 ; X32-NEXT: movl 8(%ebp), %eax
239 ; X32-NEXT: aesencwide128kl (%eax)
240 ; X32-NEXT: movl 104(%ebp), %eax
241 ; X32-NEXT: vmovaps %xmm0, (%eax)
242 ; X32-NEXT: movl 108(%ebp), %eax
243 ; X32-NEXT: vmovaps %xmm1, (%eax)
244 ; X32-NEXT: movl 112(%ebp), %eax
245 ; X32-NEXT: vmovaps %xmm1, (%eax)
246 ; X32-NEXT: movl 116(%ebp), %eax
247 ; X32-NEXT: vmovaps %xmm1, (%eax)
248 ; X32-NEXT: movl 120(%ebp), %eax
249 ; X32-NEXT: vmovaps %xmm1, (%eax)
250 ; X32-NEXT: movl 124(%ebp), %eax
251 ; X32-NEXT: vmovaps %xmm1, (%eax)
252 ; X32-NEXT: movl 128(%ebp), %eax
253 ; X32-NEXT: vmovaps %xmm1, (%eax)
254 ; X32-NEXT: movl 132(%ebp), %eax
255 ; X32-NEXT: vmovaps %xmm1, (%eax)
257 ; X32-NEXT: movl %ebp, %esp
258 ; X32-NEXT: popl %ebp
261 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
262 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
263 store <2 x i64> %1, ptr %out0
264 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
265 store <2 x i64> %2, ptr %out1
266 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
267 store <2 x i64> %2, ptr %out2
268 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
269 store <2 x i64> %2, ptr %out3
270 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
271 store <2 x i64> %2, ptr %out4
272 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
273 store <2 x i64> %2, ptr %out5
274 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
275 store <2 x i64> %2, ptr %out6
276 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
277 store <2 x i64> %2, ptr %out7
278 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
282 define i8 @test_mm_aesdecwide128kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
283 ; X64-LABEL: test_mm_aesdecwide128kl_u8:
284 ; X64: # %bb.0: # %entry
285 ; X64-NEXT: pushq %rbx
286 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
287 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
288 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
289 ; X64-NEXT: aesdecwide128kl (%rdi)
291 ; X64-NEXT: movaps %xmm0, (%rsi)
292 ; X64-NEXT: movaps %xmm1, (%rdx)
293 ; X64-NEXT: movaps %xmm1, (%rcx)
294 ; X64-NEXT: movaps %xmm1, (%r8)
295 ; X64-NEXT: movaps %xmm1, (%r9)
296 ; X64-NEXT: movaps %xmm1, (%rbx)
297 ; X64-NEXT: movaps %xmm1, (%r11)
298 ; X64-NEXT: movaps %xmm1, (%r10)
299 ; X64-NEXT: popq %rbx
302 ; X32-LABEL: test_mm_aesdecwide128kl_u8:
303 ; X32: # %bb.0: # %entry
304 ; X32-NEXT: pushl %ebp
305 ; X32-NEXT: movl %esp, %ebp
306 ; X32-NEXT: andl $-16, %esp
307 ; X32-NEXT: subl $16, %esp
308 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
309 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
310 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
311 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
312 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
313 ; X32-NEXT: movl 8(%ebp), %eax
314 ; X32-NEXT: aesdecwide128kl (%eax)
315 ; X32-NEXT: movl 104(%ebp), %eax
316 ; X32-NEXT: vmovaps %xmm0, (%eax)
317 ; X32-NEXT: movl 108(%ebp), %eax
318 ; X32-NEXT: vmovaps %xmm1, (%eax)
319 ; X32-NEXT: movl 112(%ebp), %eax
320 ; X32-NEXT: vmovaps %xmm1, (%eax)
321 ; X32-NEXT: movl 116(%ebp), %eax
322 ; X32-NEXT: vmovaps %xmm1, (%eax)
323 ; X32-NEXT: movl 120(%ebp), %eax
324 ; X32-NEXT: vmovaps %xmm1, (%eax)
325 ; X32-NEXT: movl 124(%ebp), %eax
326 ; X32-NEXT: vmovaps %xmm1, (%eax)
327 ; X32-NEXT: movl 128(%ebp), %eax
328 ; X32-NEXT: vmovaps %xmm1, (%eax)
329 ; X32-NEXT: movl 132(%ebp), %eax
330 ; X32-NEXT: vmovaps %xmm1, (%eax)
332 ; X32-NEXT: movl %ebp, %esp
333 ; X32-NEXT: popl %ebp
336 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
337 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
338 store <2 x i64> %1, ptr %out0
339 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
340 store <2 x i64> %2, ptr %out1
341 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
342 store <2 x i64> %2, ptr %out2
343 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
344 store <2 x i64> %2, ptr %out3
345 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
346 store <2 x i64> %2, ptr %out4
347 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
348 store <2 x i64> %2, ptr %out5
349 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
350 store <2 x i64> %2, ptr %out6
351 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
352 store <2 x i64> %2, ptr %out7
353 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
357 define i8 @test_mm_aesencwide256kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
358 ; X64-LABEL: test_mm_aesencwide256kl_u8:
359 ; X64: # %bb.0: # %entry
360 ; X64-NEXT: pushq %rbx
361 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
362 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
363 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
364 ; X64-NEXT: aesencwide256kl (%rdi)
366 ; X64-NEXT: movaps %xmm0, (%rsi)
367 ; X64-NEXT: movaps %xmm1, (%rdx)
368 ; X64-NEXT: movaps %xmm1, (%rcx)
369 ; X64-NEXT: movaps %xmm1, (%r8)
370 ; X64-NEXT: movaps %xmm1, (%r9)
371 ; X64-NEXT: movaps %xmm1, (%rbx)
372 ; X64-NEXT: movaps %xmm1, (%r11)
373 ; X64-NEXT: movaps %xmm1, (%r10)
374 ; X64-NEXT: popq %rbx
377 ; X32-LABEL: test_mm_aesencwide256kl_u8:
378 ; X32: # %bb.0: # %entry
379 ; X32-NEXT: pushl %ebp
380 ; X32-NEXT: movl %esp, %ebp
381 ; X32-NEXT: andl $-16, %esp
382 ; X32-NEXT: subl $16, %esp
383 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
384 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
385 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
386 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
387 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
388 ; X32-NEXT: movl 8(%ebp), %eax
389 ; X32-NEXT: aesencwide256kl (%eax)
390 ; X32-NEXT: movl 104(%ebp), %eax
391 ; X32-NEXT: vmovaps %xmm0, (%eax)
392 ; X32-NEXT: movl 108(%ebp), %eax
393 ; X32-NEXT: vmovaps %xmm1, (%eax)
394 ; X32-NEXT: movl 112(%ebp), %eax
395 ; X32-NEXT: vmovaps %xmm1, (%eax)
396 ; X32-NEXT: movl 116(%ebp), %eax
397 ; X32-NEXT: vmovaps %xmm1, (%eax)
398 ; X32-NEXT: movl 120(%ebp), %eax
399 ; X32-NEXT: vmovaps %xmm1, (%eax)
400 ; X32-NEXT: movl 124(%ebp), %eax
401 ; X32-NEXT: vmovaps %xmm1, (%eax)
402 ; X32-NEXT: movl 128(%ebp), %eax
403 ; X32-NEXT: vmovaps %xmm1, (%eax)
404 ; X32-NEXT: movl 132(%ebp), %eax
405 ; X32-NEXT: vmovaps %xmm1, (%eax)
407 ; X32-NEXT: movl %ebp, %esp
408 ; X32-NEXT: popl %ebp
411 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
412 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
413 store <2 x i64> %1, ptr %out0
414 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
415 store <2 x i64> %2, ptr %out1
416 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
417 store <2 x i64> %2, ptr %out2
418 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
419 store <2 x i64> %2, ptr %out3
420 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
421 store <2 x i64> %2, ptr %out4
422 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
423 store <2 x i64> %2, ptr %out5
424 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
425 store <2 x i64> %2, ptr %out6
426 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
427 store <2 x i64> %2, ptr %out7
428 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
432 define i8 @test_mm_aesdecwide256kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
433 ; X64-LABEL: test_mm_aesdecwide256kl_u8:
434 ; X64: # %bb.0: # %entry
435 ; X64-NEXT: pushq %rbx
436 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
437 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
438 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
439 ; X64-NEXT: aesdecwide256kl (%rdi)
441 ; X64-NEXT: movaps %xmm0, (%rsi)
442 ; X64-NEXT: movaps %xmm1, (%rdx)
443 ; X64-NEXT: movaps %xmm1, (%rcx)
444 ; X64-NEXT: movaps %xmm1, (%r8)
445 ; X64-NEXT: movaps %xmm1, (%r9)
446 ; X64-NEXT: movaps %xmm1, (%rbx)
447 ; X64-NEXT: movaps %xmm1, (%r11)
448 ; X64-NEXT: movaps %xmm1, (%r10)
449 ; X64-NEXT: popq %rbx
452 ; X32-LABEL: test_mm_aesdecwide256kl_u8:
453 ; X32: # %bb.0: # %entry
454 ; X32-NEXT: pushl %ebp
455 ; X32-NEXT: movl %esp, %ebp
456 ; X32-NEXT: andl $-16, %esp
457 ; X32-NEXT: subl $16, %esp
458 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
459 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
460 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
461 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
462 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
463 ; X32-NEXT: movl 8(%ebp), %eax
464 ; X32-NEXT: aesdecwide256kl (%eax)
465 ; X32-NEXT: movl 104(%ebp), %eax
466 ; X32-NEXT: vmovaps %xmm0, (%eax)
467 ; X32-NEXT: movl 108(%ebp), %eax
468 ; X32-NEXT: vmovaps %xmm1, (%eax)
469 ; X32-NEXT: movl 112(%ebp), %eax
470 ; X32-NEXT: vmovaps %xmm1, (%eax)
471 ; X32-NEXT: movl 116(%ebp), %eax
472 ; X32-NEXT: vmovaps %xmm1, (%eax)
473 ; X32-NEXT: movl 120(%ebp), %eax
474 ; X32-NEXT: vmovaps %xmm1, (%eax)
475 ; X32-NEXT: movl 124(%ebp), %eax
476 ; X32-NEXT: vmovaps %xmm1, (%eax)
477 ; X32-NEXT: movl 128(%ebp), %eax
478 ; X32-NEXT: vmovaps %xmm1, (%eax)
479 ; X32-NEXT: movl 132(%ebp), %eax
480 ; X32-NEXT: vmovaps %xmm1, (%eax)
482 ; X32-NEXT: movl %ebp, %esp
483 ; X32-NEXT: popl %ebp
486 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
487 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
488 store <2 x i64> %1, ptr %out0
489 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
490 store <2 x i64> %2, ptr %out1
491 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
492 store <2 x i64> %2, ptr %out2
493 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
494 store <2 x i64> %2, ptr %out3
495 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
496 store <2 x i64> %2, ptr %out4
497 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
498 store <2 x i64> %2, ptr %out5
499 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
500 store <2 x i64> %2, ptr %out6
501 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
502 store <2 x i64> %2, ptr %out7
503 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
507 ; Tests to make sure we can select an appropriate addressing mode for a global.
509 @foo = external dso_local global [64 x i8]
511 define i8 @test_mm_aesenc256kl_u8_global(<2 x i64> %data, ptr %out) {
512 ; X64-LABEL: test_mm_aesenc256kl_u8_global:
513 ; X64: # %bb.0: # %entry
514 ; X64-NEXT: aesenc256kl foo(%rip), %xmm0
516 ; X64-NEXT: movaps %xmm0, (%rdi)
519 ; X32-LABEL: test_mm_aesenc256kl_u8_global:
520 ; X32: # %bb.0: # %entry
521 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
522 ; X32-NEXT: aesenc256kl foo, %xmm0
524 ; X32-NEXT: vmovaps %xmm0, (%ecx)
527 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, ptr @foo)
528 %1 = extractvalue { i8, <2 x i64> } %0, 1
529 store <2 x i64> %1, ptr %out
530 %2 = extractvalue { i8, <2 x i64> } %0, 0
534 define i8 @test_mm_aesdecwide256kl_u8_global(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind {
535 ; X64-LABEL: test_mm_aesdecwide256kl_u8_global:
536 ; X64: # %bb.0: # %entry
537 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
538 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
539 ; X64-NEXT: aesdecwide256kl foo(%rip)
541 ; X64-NEXT: movaps %xmm0, (%rdi)
542 ; X64-NEXT: movaps %xmm1, (%rsi)
543 ; X64-NEXT: movaps %xmm1, (%rdx)
544 ; X64-NEXT: movaps %xmm1, (%rcx)
545 ; X64-NEXT: movaps %xmm1, (%r8)
546 ; X64-NEXT: movaps %xmm1, (%r9)
547 ; X64-NEXT: movaps %xmm1, (%r11)
548 ; X64-NEXT: movaps %xmm1, (%r10)
551 ; X32-LABEL: test_mm_aesdecwide256kl_u8_global:
552 ; X32: # %bb.0: # %entry
553 ; X32-NEXT: pushl %ebp
554 ; X32-NEXT: movl %esp, %ebp
555 ; X32-NEXT: andl $-16, %esp
556 ; X32-NEXT: subl $16, %esp
557 ; X32-NEXT: movl 88(%ebp), %eax
558 ; X32-NEXT: vmovaps 8(%ebp), %xmm3
559 ; X32-NEXT: vmovaps 24(%ebp), %xmm4
560 ; X32-NEXT: vmovaps 40(%ebp), %xmm5
561 ; X32-NEXT: vmovaps 56(%ebp), %xmm6
562 ; X32-NEXT: vmovaps 72(%ebp), %xmm7
563 ; X32-NEXT: aesdecwide256kl foo
564 ; X32-NEXT: vmovaps %xmm0, (%eax)
565 ; X32-NEXT: movl 92(%ebp), %eax
566 ; X32-NEXT: vmovaps %xmm1, (%eax)
567 ; X32-NEXT: movl 96(%ebp), %eax
568 ; X32-NEXT: vmovaps %xmm1, (%eax)
569 ; X32-NEXT: movl 100(%ebp), %eax
570 ; X32-NEXT: vmovaps %xmm1, (%eax)
571 ; X32-NEXT: movl 104(%ebp), %eax
572 ; X32-NEXT: vmovaps %xmm1, (%eax)
573 ; X32-NEXT: movl 108(%ebp), %eax
574 ; X32-NEXT: vmovaps %xmm1, (%eax)
575 ; X32-NEXT: movl 112(%ebp), %eax
576 ; X32-NEXT: vmovaps %xmm1, (%eax)
577 ; X32-NEXT: movl 116(%ebp), %eax
578 ; X32-NEXT: vmovaps %xmm1, (%eax)
580 ; X32-NEXT: movl %ebp, %esp
581 ; X32-NEXT: popl %ebp
584 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr @foo, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
585 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
586 store <2 x i64> %1, ptr %out0
587 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
588 store <2 x i64> %2, ptr %out1
589 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
590 store <2 x i64> %2, ptr %out2
591 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
592 store <2 x i64> %2, ptr %out3
593 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
594 store <2 x i64> %2, ptr %out4
595 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
596 store <2 x i64> %2, ptr %out5
597 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
598 store <2 x i64> %2, ptr %out6
599 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
600 store <2 x i64> %2, ptr %out7
601 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0