1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64
3 ; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32
4 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64
5 ; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X32
7 declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32)
8 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
9 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>)
10 declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, i8*)
11 declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, i8*)
12 declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, i8*)
13 declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, i8*)
14 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
15 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
16 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
17 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8*, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
19 define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) {
20 ; X64-LABEL: test_loadiwkey:
21 ; X64: # %bb.0: # %entry
22 ; X64-NEXT: movl %edi, %eax
23 ; X64-NEXT: loadiwkey %xmm2, %xmm1
26 ; X32-LABEL: test_loadiwkey:
27 ; X32: # %bb.0: # %entry
28 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
29 ; X32-NEXT: loadiwkey %xmm2, %xmm1
32 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl)
36 define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5) nounwind {
37 ; X64-LABEL: test_encodekey128_u32:
38 ; X64: # %bb.0: # %entry
39 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
40 ; X64-NEXT: encodekey128 %edi, %eax
41 ; X64-NEXT: movaps %xmm0, (%rsi)
42 ; X64-NEXT: movaps %xmm1, (%rdx)
43 ; X64-NEXT: movaps %xmm2, (%rcx)
44 ; X64-NEXT: movaps %xmm4, (%r8)
45 ; X64-NEXT: movaps %xmm5, (%r9)
46 ; X64-NEXT: movaps %xmm6, (%r10)
49 ; X32-LABEL: test_encodekey128_u32:
50 ; X32: # %bb.0: # %entry
51 ; X32-NEXT: pushl %ebp
52 ; X32-NEXT: pushl %ebx
53 ; X32-NEXT: pushl %edi
54 ; X32-NEXT: pushl %esi
55 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
56 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
57 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
58 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
59 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
60 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
61 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
62 ; X32-NEXT: encodekey128 %eax, %eax
63 ; X32-NEXT: vmovaps %xmm0, (%ebp)
64 ; X32-NEXT: vmovaps %xmm1, (%ebx)
65 ; X32-NEXT: vmovaps %xmm2, (%edi)
66 ; X32-NEXT: vmovaps %xmm4, (%esi)
67 ; X32-NEXT: vmovaps %xmm5, (%edx)
68 ; X32-NEXT: vmovaps %xmm6, (%ecx)
75 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
76 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
77 store <2 x i64> %1, <2 x i64>* %h0, align 16
78 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
79 store <2 x i64> %2, <2 x i64>* %h1, align 16
80 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
81 store <2 x i64> %3, <2 x i64>* %h2, align 16
82 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
83 store <2 x i64> %4, <2 x i64>* %h3, align 16
84 %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
85 store <2 x i64> %5, <2 x i64>* %h4, align 16
86 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
87 store <2 x i64> %6, <2 x i64>* %h5, align 16
88 %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
92 define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, <2 x i64>* nocapture %h0, <2 x i64>* nocapture %h1, <2 x i64>* nocapture %h2, <2 x i64>* nocapture %h3, <2 x i64>* nocapture %h4, <2 x i64>* nocapture %h5, <2 x i64>* nocapture readnone %h6) nounwind {
93 ; X64-LABEL: test_encodekey256_u32:
94 ; X64: # %bb.0: # %entry
95 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
96 ; X64-NEXT: encodekey256 %edi, %eax
97 ; X64-NEXT: movaps %xmm0, (%rsi)
98 ; X64-NEXT: movaps %xmm1, (%rdx)
99 ; X64-NEXT: movaps %xmm2, (%rcx)
100 ; X64-NEXT: movaps %xmm3, (%r8)
101 ; X64-NEXT: movaps %xmm4, (%r9)
102 ; X64-NEXT: movaps %xmm5, (%r10)
105 ; X32-LABEL: test_encodekey256_u32:
106 ; X32: # %bb.0: # %entry
107 ; X32-NEXT: pushl %ebp
108 ; X32-NEXT: pushl %ebx
109 ; X32-NEXT: pushl %edi
110 ; X32-NEXT: pushl %esi
111 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
112 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
113 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
114 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
115 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
116 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
117 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
118 ; X32-NEXT: encodekey256 %eax, %eax
119 ; X32-NEXT: vmovaps %xmm0, (%ebp)
120 ; X32-NEXT: vmovaps %xmm1, (%ebx)
121 ; X32-NEXT: vmovaps %xmm2, (%edi)
122 ; X32-NEXT: vmovaps %xmm3, (%esi)
123 ; X32-NEXT: vmovaps %xmm4, (%edx)
124 ; X32-NEXT: vmovaps %xmm5, (%ecx)
125 ; X32-NEXT: popl %esi
126 ; X32-NEXT: popl %edi
127 ; X32-NEXT: popl %ebx
128 ; X32-NEXT: popl %ebp
131 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
132 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
133 store <2 x i64> %1, <2 x i64>* %h0, align 16
134 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
135 store <2 x i64> %2, <2 x i64>* %h1, align 16
136 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
137 store <2 x i64> %3, <2 x i64>* %h2, align 16
138 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
139 store <2 x i64> %4, <2 x i64>* %h3, align 16
140 %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
141 store <2 x i64> %5, <2 x i64>* %h4, align 16
142 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
143 store <2 x i64> %6, <2 x i64>* %h5, align 16
144 %7 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
148 define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
149 ; X64-LABEL: test_mm_aesenc128kl_u8:
150 ; X64: # %bb.0: # %entry
151 ; X64-NEXT: aesenc128kl (%rdi), %xmm0
153 ; X64-NEXT: movaps %xmm0, (%rsi)
156 ; X32-LABEL: test_mm_aesenc128kl_u8:
157 ; X32: # %bb.0: # %entry
158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
159 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
160 ; X32-NEXT: aesenc128kl (%eax), %xmm0
162 ; X32-NEXT: vmovaps %xmm0, (%ecx)
165 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, i8* %h)
166 %1 = extractvalue { i8, <2 x i64> } %0, 1
167 store <2 x i64> %1, <2 x i64>* %out
168 %2 = extractvalue { i8, <2 x i64> } %0, 0
172 define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
173 ; X64-LABEL: test_mm_aesdec128kl_u8:
174 ; X64: # %bb.0: # %entry
175 ; X64-NEXT: aesdec128kl (%rdi), %xmm0
177 ; X64-NEXT: movaps %xmm0, (%rsi)
180 ; X32-LABEL: test_mm_aesdec128kl_u8:
181 ; X32: # %bb.0: # %entry
182 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
183 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
184 ; X32-NEXT: aesdec128kl (%eax), %xmm0
186 ; X32-NEXT: vmovaps %xmm0, (%ecx)
189 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, i8* %h)
190 %1 = extractvalue { i8, <2 x i64> } %0, 1
191 store <2 x i64> %1, <2 x i64>* %out
192 %2 = extractvalue { i8, <2 x i64> } %0, 0
196 define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
197 ; X64-LABEL: test_mm_aesenc256kl_u8:
198 ; X64: # %bb.0: # %entry
199 ; X64-NEXT: aesenc256kl (%rdi), %xmm0
201 ; X64-NEXT: movaps %xmm0, (%rsi)
204 ; X32-LABEL: test_mm_aesenc256kl_u8:
205 ; X32: # %bb.0: # %entry
206 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
207 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
208 ; X32-NEXT: aesenc256kl (%eax), %xmm0
210 ; X32-NEXT: vmovaps %xmm0, (%ecx)
213 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, i8* %h)
214 %1 = extractvalue { i8, <2 x i64> } %0, 1
215 store <2 x i64> %1, <2 x i64>* %out
216 %2 = extractvalue { i8, <2 x i64> } %0, 0
220 define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, i8* %h, <2 x i64>* %out) {
221 ; X64-LABEL: test_mm_aesdec256kl_u8:
222 ; X64: # %bb.0: # %entry
223 ; X64-NEXT: aesdec256kl (%rdi), %xmm0
225 ; X64-NEXT: movaps %xmm0, (%rsi)
228 ; X32-LABEL: test_mm_aesdec256kl_u8:
229 ; X32: # %bb.0: # %entry
230 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
231 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
232 ; X32-NEXT: aesdec256kl (%eax), %xmm0
234 ; X32-NEXT: vmovaps %xmm0, (%ecx)
237 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, i8* %h)
238 %1 = extractvalue { i8, <2 x i64> } %0, 1
239 store <2 x i64> %1, <2 x i64>* %out
240 %2 = extractvalue { i8, <2 x i64> } %0, 0
244 define i8 @test_mm_aesencwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
245 ; X64-LABEL: test_mm_aesencwide128kl_u8:
246 ; X64: # %bb.0: # %entry
247 ; X64-NEXT: pushq %rbx
248 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
249 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
250 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
251 ; X64-NEXT: aesencwide128kl (%rdi)
253 ; X64-NEXT: movaps %xmm0, (%rsi)
254 ; X64-NEXT: movaps %xmm1, (%rdx)
255 ; X64-NEXT: movaps %xmm1, (%rcx)
256 ; X64-NEXT: movaps %xmm1, (%r8)
257 ; X64-NEXT: movaps %xmm1, (%r9)
258 ; X64-NEXT: movaps %xmm1, (%rbx)
259 ; X64-NEXT: movaps %xmm1, (%r11)
260 ; X64-NEXT: movaps %xmm1, (%r10)
261 ; X64-NEXT: popq %rbx
264 ; X32-LABEL: test_mm_aesencwide128kl_u8:
265 ; X32: # %bb.0: # %entry
266 ; X32-NEXT: pushl %ebp
267 ; X32-NEXT: movl %esp, %ebp
268 ; X32-NEXT: andl $-16, %esp
269 ; X32-NEXT: subl $16, %esp
270 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
271 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
272 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
273 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
274 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
275 ; X32-NEXT: movl 8(%ebp), %eax
276 ; X32-NEXT: aesencwide128kl (%eax)
277 ; X32-NEXT: movl 104(%ebp), %eax
278 ; X32-NEXT: vmovaps %xmm0, (%eax)
279 ; X32-NEXT: movl 108(%ebp), %eax
280 ; X32-NEXT: vmovaps %xmm1, (%eax)
281 ; X32-NEXT: movl 112(%ebp), %eax
282 ; X32-NEXT: vmovaps %xmm1, (%eax)
283 ; X32-NEXT: movl 116(%ebp), %eax
284 ; X32-NEXT: vmovaps %xmm1, (%eax)
285 ; X32-NEXT: movl 120(%ebp), %eax
286 ; X32-NEXT: vmovaps %xmm1, (%eax)
287 ; X32-NEXT: movl 124(%ebp), %eax
288 ; X32-NEXT: vmovaps %xmm1, (%eax)
289 ; X32-NEXT: movl 128(%ebp), %eax
290 ; X32-NEXT: vmovaps %xmm1, (%eax)
291 ; X32-NEXT: movl 132(%ebp), %eax
292 ; X32-NEXT: vmovaps %xmm1, (%eax)
294 ; X32-NEXT: movl %ebp, %esp
295 ; X32-NEXT: popl %ebp
298 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
299 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
300 store <2 x i64> %1, <2 x i64>* %out0
301 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
302 store <2 x i64> %2, <2 x i64>* %out1
303 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
304 store <2 x i64> %2, <2 x i64>* %out2
305 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
306 store <2 x i64> %2, <2 x i64>* %out3
307 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
308 store <2 x i64> %2, <2 x i64>* %out4
309 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
310 store <2 x i64> %2, <2 x i64>* %out5
311 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
312 store <2 x i64> %2, <2 x i64>* %out6
313 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
314 store <2 x i64> %2, <2 x i64>* %out7
315 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
319 define i8 @test_mm_aesdecwide128kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
320 ; X64-LABEL: test_mm_aesdecwide128kl_u8:
321 ; X64: # %bb.0: # %entry
322 ; X64-NEXT: pushq %rbx
323 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
324 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
325 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
326 ; X64-NEXT: aesdecwide128kl (%rdi)
328 ; X64-NEXT: movaps %xmm0, (%rsi)
329 ; X64-NEXT: movaps %xmm1, (%rdx)
330 ; X64-NEXT: movaps %xmm1, (%rcx)
331 ; X64-NEXT: movaps %xmm1, (%r8)
332 ; X64-NEXT: movaps %xmm1, (%r9)
333 ; X64-NEXT: movaps %xmm1, (%rbx)
334 ; X64-NEXT: movaps %xmm1, (%r11)
335 ; X64-NEXT: movaps %xmm1, (%r10)
336 ; X64-NEXT: popq %rbx
339 ; X32-LABEL: test_mm_aesdecwide128kl_u8:
340 ; X32: # %bb.0: # %entry
341 ; X32-NEXT: pushl %ebp
342 ; X32-NEXT: movl %esp, %ebp
343 ; X32-NEXT: andl $-16, %esp
344 ; X32-NEXT: subl $16, %esp
345 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
346 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
347 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
348 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
349 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
350 ; X32-NEXT: movl 8(%ebp), %eax
351 ; X32-NEXT: aesdecwide128kl (%eax)
352 ; X32-NEXT: movl 104(%ebp), %eax
353 ; X32-NEXT: vmovaps %xmm0, (%eax)
354 ; X32-NEXT: movl 108(%ebp), %eax
355 ; X32-NEXT: vmovaps %xmm1, (%eax)
356 ; X32-NEXT: movl 112(%ebp), %eax
357 ; X32-NEXT: vmovaps %xmm1, (%eax)
358 ; X32-NEXT: movl 116(%ebp), %eax
359 ; X32-NEXT: vmovaps %xmm1, (%eax)
360 ; X32-NEXT: movl 120(%ebp), %eax
361 ; X32-NEXT: vmovaps %xmm1, (%eax)
362 ; X32-NEXT: movl 124(%ebp), %eax
363 ; X32-NEXT: vmovaps %xmm1, (%eax)
364 ; X32-NEXT: movl 128(%ebp), %eax
365 ; X32-NEXT: vmovaps %xmm1, (%eax)
366 ; X32-NEXT: movl 132(%ebp), %eax
367 ; X32-NEXT: vmovaps %xmm1, (%eax)
369 ; X32-NEXT: movl %ebp, %esp
370 ; X32-NEXT: popl %ebp
373 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
374 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
375 store <2 x i64> %1, <2 x i64>* %out0
376 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
377 store <2 x i64> %2, <2 x i64>* %out1
378 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
379 store <2 x i64> %2, <2 x i64>* %out2
380 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
381 store <2 x i64> %2, <2 x i64>* %out3
382 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
383 store <2 x i64> %2, <2 x i64>* %out4
384 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
385 store <2 x i64> %2, <2 x i64>* %out5
386 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
387 store <2 x i64> %2, <2 x i64>* %out6
388 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
389 store <2 x i64> %2, <2 x i64>* %out7
390 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
394 define i8 @test_mm_aesencwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
395 ; X64-LABEL: test_mm_aesencwide256kl_u8:
396 ; X64: # %bb.0: # %entry
397 ; X64-NEXT: pushq %rbx
398 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
399 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
400 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
401 ; X64-NEXT: aesencwide256kl (%rdi)
403 ; X64-NEXT: movaps %xmm0, (%rsi)
404 ; X64-NEXT: movaps %xmm1, (%rdx)
405 ; X64-NEXT: movaps %xmm1, (%rcx)
406 ; X64-NEXT: movaps %xmm1, (%r8)
407 ; X64-NEXT: movaps %xmm1, (%r9)
408 ; X64-NEXT: movaps %xmm1, (%rbx)
409 ; X64-NEXT: movaps %xmm1, (%r11)
410 ; X64-NEXT: movaps %xmm1, (%r10)
411 ; X64-NEXT: popq %rbx
414 ; X32-LABEL: test_mm_aesencwide256kl_u8:
415 ; X32: # %bb.0: # %entry
416 ; X32-NEXT: pushl %ebp
417 ; X32-NEXT: movl %esp, %ebp
418 ; X32-NEXT: andl $-16, %esp
419 ; X32-NEXT: subl $16, %esp
420 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
421 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
422 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
423 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
424 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
425 ; X32-NEXT: movl 8(%ebp), %eax
426 ; X32-NEXT: aesencwide256kl (%eax)
427 ; X32-NEXT: movl 104(%ebp), %eax
428 ; X32-NEXT: vmovaps %xmm0, (%eax)
429 ; X32-NEXT: movl 108(%ebp), %eax
430 ; X32-NEXT: vmovaps %xmm1, (%eax)
431 ; X32-NEXT: movl 112(%ebp), %eax
432 ; X32-NEXT: vmovaps %xmm1, (%eax)
433 ; X32-NEXT: movl 116(%ebp), %eax
434 ; X32-NEXT: vmovaps %xmm1, (%eax)
435 ; X32-NEXT: movl 120(%ebp), %eax
436 ; X32-NEXT: vmovaps %xmm1, (%eax)
437 ; X32-NEXT: movl 124(%ebp), %eax
438 ; X32-NEXT: vmovaps %xmm1, (%eax)
439 ; X32-NEXT: movl 128(%ebp), %eax
440 ; X32-NEXT: vmovaps %xmm1, (%eax)
441 ; X32-NEXT: movl 132(%ebp), %eax
442 ; X32-NEXT: vmovaps %xmm1, (%eax)
444 ; X32-NEXT: movl %ebp, %esp
445 ; X32-NEXT: popl %ebp
448 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
449 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
450 store <2 x i64> %1, <2 x i64>* %out0
451 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
452 store <2 x i64> %2, <2 x i64>* %out1
453 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
454 store <2 x i64> %2, <2 x i64>* %out2
455 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
456 store <2 x i64> %2, <2 x i64>* %out3
457 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
458 store <2 x i64> %2, <2 x i64>* %out4
459 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
460 store <2 x i64> %2, <2 x i64>* %out5
461 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
462 store <2 x i64> %2, <2 x i64>* %out6
463 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
464 store <2 x i64> %2, <2 x i64>* %out7
465 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
469 define i8 @test_mm_aesdecwide256kl_u8(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
470 ; X64-LABEL: test_mm_aesdecwide256kl_u8:
471 ; X64: # %bb.0: # %entry
472 ; X64-NEXT: pushq %rbx
473 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
474 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
475 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx
476 ; X64-NEXT: aesdecwide256kl (%rdi)
478 ; X64-NEXT: movaps %xmm0, (%rsi)
479 ; X64-NEXT: movaps %xmm1, (%rdx)
480 ; X64-NEXT: movaps %xmm1, (%rcx)
481 ; X64-NEXT: movaps %xmm1, (%r8)
482 ; X64-NEXT: movaps %xmm1, (%r9)
483 ; X64-NEXT: movaps %xmm1, (%rbx)
484 ; X64-NEXT: movaps %xmm1, (%r11)
485 ; X64-NEXT: movaps %xmm1, (%r10)
486 ; X64-NEXT: popq %rbx
489 ; X32-LABEL: test_mm_aesdecwide256kl_u8:
490 ; X32: # %bb.0: # %entry
491 ; X32-NEXT: pushl %ebp
492 ; X32-NEXT: movl %esp, %ebp
493 ; X32-NEXT: andl $-16, %esp
494 ; X32-NEXT: subl $16, %esp
495 ; X32-NEXT: vmovaps 24(%ebp), %xmm3
496 ; X32-NEXT: vmovaps 40(%ebp), %xmm4
497 ; X32-NEXT: vmovaps 56(%ebp), %xmm5
498 ; X32-NEXT: vmovaps 72(%ebp), %xmm6
499 ; X32-NEXT: vmovaps 88(%ebp), %xmm7
500 ; X32-NEXT: movl 8(%ebp), %eax
501 ; X32-NEXT: aesdecwide256kl (%eax)
502 ; X32-NEXT: movl 104(%ebp), %eax
503 ; X32-NEXT: vmovaps %xmm0, (%eax)
504 ; X32-NEXT: movl 108(%ebp), %eax
505 ; X32-NEXT: vmovaps %xmm1, (%eax)
506 ; X32-NEXT: movl 112(%ebp), %eax
507 ; X32-NEXT: vmovaps %xmm1, (%eax)
508 ; X32-NEXT: movl 116(%ebp), %eax
509 ; X32-NEXT: vmovaps %xmm1, (%eax)
510 ; X32-NEXT: movl 120(%ebp), %eax
511 ; X32-NEXT: vmovaps %xmm1, (%eax)
512 ; X32-NEXT: movl 124(%ebp), %eax
513 ; X32-NEXT: vmovaps %xmm1, (%eax)
514 ; X32-NEXT: movl 128(%ebp), %eax
515 ; X32-NEXT: vmovaps %xmm1, (%eax)
516 ; X32-NEXT: movl 132(%ebp), %eax
517 ; X32-NEXT: vmovaps %xmm1, (%eax)
519 ; X32-NEXT: movl %ebp, %esp
520 ; X32-NEXT: popl %ebp
523 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
524 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
525 store <2 x i64> %1, <2 x i64>* %out0
526 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
527 store <2 x i64> %2, <2 x i64>* %out1
528 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
529 store <2 x i64> %2, <2 x i64>* %out2
530 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
531 store <2 x i64> %2, <2 x i64>* %out3
532 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
533 store <2 x i64> %2, <2 x i64>* %out4
534 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
535 store <2 x i64> %2, <2 x i64>* %out5
536 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
537 store <2 x i64> %2, <2 x i64>* %out6
538 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
539 store <2 x i64> %2, <2 x i64>* %out7
540 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
544 ; Tests to make sure we can select an appropriate addressing mode for a global.
546 @foo = external dso_local global [64 x i8]
548 define i8 @test_mm_aesenc256kl_u8_global(<2 x i64> %data, <2 x i64>* %out) {
549 ; X64-LABEL: test_mm_aesenc256kl_u8_global:
550 ; X64: # %bb.0: # %entry
551 ; X64-NEXT: aesenc256kl foo(%rip), %xmm0
553 ; X64-NEXT: movaps %xmm0, (%rdi)
556 ; X32-LABEL: test_mm_aesenc256kl_u8_global:
557 ; X32: # %bb.0: # %entry
558 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
559 ; X32-NEXT: aesenc256kl foo, %xmm0
561 ; X32-NEXT: vmovaps %xmm0, (%ecx)
564 %h = bitcast [64 x i8]* @foo to i8*
565 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, i8* %h)
566 %1 = extractvalue { i8, <2 x i64> } %0, 1
567 store <2 x i64> %1, <2 x i64>* %out
568 %2 = extractvalue { i8, <2 x i64> } %0, 0
572 define i8 @test_mm_aesdecwide256kl_u8_global(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, <2 x i64>* %out0, <2 x i64>* %out1, <2 x i64>* %out2, <2 x i64>* %out3, <2 x i64>* %out4, <2 x i64>* %out5, <2 x i64>* %out6, <2 x i64>* %out7) nounwind {
573 ; X64-LABEL: test_mm_aesdecwide256kl_u8_global:
574 ; X64: # %bb.0: # %entry
575 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10
576 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11
577 ; X64-NEXT: aesdecwide256kl foo(%rip)
579 ; X64-NEXT: movaps %xmm0, (%rdi)
580 ; X64-NEXT: movaps %xmm1, (%rsi)
581 ; X64-NEXT: movaps %xmm1, (%rdx)
582 ; X64-NEXT: movaps %xmm1, (%rcx)
583 ; X64-NEXT: movaps %xmm1, (%r8)
584 ; X64-NEXT: movaps %xmm1, (%r9)
585 ; X64-NEXT: movaps %xmm1, (%r11)
586 ; X64-NEXT: movaps %xmm1, (%r10)
589 ; X32-LABEL: test_mm_aesdecwide256kl_u8_global:
590 ; X32: # %bb.0: # %entry
591 ; X32-NEXT: pushl %ebp
592 ; X32-NEXT: movl %esp, %ebp
593 ; X32-NEXT: andl $-16, %esp
594 ; X32-NEXT: subl $16, %esp
595 ; X32-NEXT: movl 88(%ebp), %eax
596 ; X32-NEXT: vmovaps 8(%ebp), %xmm3
597 ; X32-NEXT: vmovaps 24(%ebp), %xmm4
598 ; X32-NEXT: vmovaps 40(%ebp), %xmm5
599 ; X32-NEXT: vmovaps 56(%ebp), %xmm6
600 ; X32-NEXT: vmovaps 72(%ebp), %xmm7
601 ; X32-NEXT: aesdecwide256kl foo
602 ; X32-NEXT: vmovaps %xmm0, (%eax)
603 ; X32-NEXT: movl 92(%ebp), %eax
604 ; X32-NEXT: vmovaps %xmm1, (%eax)
605 ; X32-NEXT: movl 96(%ebp), %eax
606 ; X32-NEXT: vmovaps %xmm1, (%eax)
607 ; X32-NEXT: movl 100(%ebp), %eax
608 ; X32-NEXT: vmovaps %xmm1, (%eax)
609 ; X32-NEXT: movl 104(%ebp), %eax
610 ; X32-NEXT: vmovaps %xmm1, (%eax)
611 ; X32-NEXT: movl 108(%ebp), %eax
612 ; X32-NEXT: vmovaps %xmm1, (%eax)
613 ; X32-NEXT: movl 112(%ebp), %eax
614 ; X32-NEXT: vmovaps %xmm1, (%eax)
615 ; X32-NEXT: movl 116(%ebp), %eax
616 ; X32-NEXT: vmovaps %xmm1, (%eax)
618 ; X32-NEXT: movl %ebp, %esp
619 ; X32-NEXT: popl %ebp
622 %p = bitcast [64 x i8]* @foo to i8*
623 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(i8* %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7)
624 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
625 store <2 x i64> %1, <2 x i64>* %out0
626 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
627 store <2 x i64> %2, <2 x i64>* %out1
628 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
629 store <2 x i64> %2, <2 x i64>* %out2
630 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
631 store <2 x i64> %2, <2 x i64>* %out3
632 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
633 store <2 x i64> %2, <2 x i64>* %out4
634 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
635 store <2 x i64> %2, <2 x i64>* %out5
636 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
637 store <2 x i64> %2, <2 x i64>* %out6
638 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8
639 store <2 x i64> %2, <2 x i64>* %out7
640 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0