1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s
4 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c
6 define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) {
7 ; CHECK-LABEL: test_loadiwkey:
8 ; CHECK: # %bb.0: # %entry
9 ; CHECK-NEXT: movl %edi, %eax
10 ; CHECK-NEXT: loadiwkey %xmm2, %xmm1
13 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl)
17 define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h) {
18 ; CHECK-LABEL: test_encodekey128_u32:
19 ; CHECK: # %bb.0: # %entry
20 ; CHECK-NEXT: encodekey128 %edi, %eax
21 ; CHECK-NEXT: movups %xmm0, (%rsi)
22 ; CHECK-NEXT: movups %xmm1, 16(%rsi)
23 ; CHECK-NEXT: movups %xmm2, 32(%rsi)
24 ; CHECK-NEXT: movups %xmm4, 48(%rsi)
25 ; CHECK-NEXT: movups %xmm5, 64(%rsi)
26 ; CHECK-NEXT: movups %xmm6, 80(%rsi)
29 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
30 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
31 store <2 x i64> %1, ptr %h, align 1
32 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
33 %3 = getelementptr i8, ptr %h, i64 16
34 store <2 x i64> %2, ptr %3, align 1
35 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
36 %5 = getelementptr i8, ptr %h, i64 32
37 store <2 x i64> %4, ptr %5, align 1
38 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
39 %7 = getelementptr i8, ptr %h, i64 48
40 store <2 x i64> %6, ptr %7, align 1
41 %8 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
42 %9 = getelementptr i8, ptr %h, i64 64
43 store <2 x i64> %8, ptr %9, align 1
44 %10 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
45 %11 = getelementptr i8, ptr %h, i64 80
46 store <2 x i64> %10, ptr %11, align 1
47 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
51 define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, ptr nocapture %h) {
52 ; CHECK-LABEL: test_encodekey256_u32:
53 ; CHECK: # %bb.0: # %entry
54 ; CHECK-NEXT: encodekey256 %edi, %eax
55 ; CHECK-NEXT: movups %xmm0, (%rsi)
56 ; CHECK-NEXT: movups %xmm1, 16(%rsi)
57 ; CHECK-NEXT: movups %xmm2, 32(%rsi)
58 ; CHECK-NEXT: movups %xmm3, 48(%rsi)
59 ; CHECK-NEXT: movups %xmm4, 64(%rsi)
60 ; CHECK-NEXT: movups %xmm5, 80(%rsi)
61 ; CHECK-NEXT: movups %xmm6, 96(%rsi)
64 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
65 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
66 store <2 x i64> %1, ptr %h, align 1
67 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
68 %3 = getelementptr i8, ptr %h, i64 16
69 store <2 x i64> %2, ptr %3, align 1
70 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
71 %5 = getelementptr i8, ptr %h, i64 32
72 store <2 x i64> %4, ptr %5, align 1
73 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
74 %7 = getelementptr i8, ptr %h, i64 48
75 store <2 x i64> %6, ptr %7, align 1
76 %8 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
77 %9 = getelementptr i8, ptr %h, i64 64
78 store <2 x i64> %8, ptr %9, align 1
79 %10 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
80 %11 = getelementptr i8, ptr %h, i64 80
81 store <2 x i64> %10, ptr %11, align 1
82 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
83 %13 = getelementptr i8, ptr %h, i64 96
84 store <2 x i64> %12, ptr %13, align 1
85 %14 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
89 define zeroext i8 @test_mm_aesenc256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
90 ; CHECK-LABEL: test_mm_aesenc256kl_u8:
91 ; CHECK: # %bb.0: # %entry
92 ; CHECK-NEXT: aesenc256kl (%rsi), %xmm0
93 ; CHECK-NEXT: sete %al
94 ; CHECK-NEXT: movaps %xmm0, (%rdi)
97 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, ptr %h) #1
98 %1 = extractvalue { i8, <2 x i64> } %0, 1
99 store <2 x i64> %1, ptr %odata, align 16
100 %2 = extractvalue { i8, <2 x i64> } %0, 0
104 define zeroext i8 @test_mm_aesdec256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
105 ; CHECK-LABEL: test_mm_aesdec256kl_u8:
106 ; CHECK: # %bb.0: # %entry
107 ; CHECK-NEXT: aesdec256kl (%rsi), %xmm0
108 ; CHECK-NEXT: sete %al
109 ; CHECK-NEXT: movaps %xmm0, (%rdi)
112 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, ptr %h) #1
113 %1 = extractvalue { i8, <2 x i64> } %0, 1
114 store <2 x i64> %1, ptr %odata, align 16
115 %2 = extractvalue { i8, <2 x i64> } %0, 0
119 define zeroext i8 @test_mm_aesenc128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
120 ; CHECK-LABEL: test_mm_aesenc128kl_u8:
121 ; CHECK: # %bb.0: # %entry
122 ; CHECK-NEXT: aesenc128kl (%rsi), %xmm0
123 ; CHECK-NEXT: sete %al
124 ; CHECK-NEXT: movaps %xmm0, (%rdi)
127 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, ptr %h) #1
128 %1 = extractvalue { i8, <2 x i64> } %0, 1
129 store <2 x i64> %1, ptr %odata, align 16
130 %2 = extractvalue { i8, <2 x i64> } %0, 0
134 define zeroext i8 @test_mm_aesdec128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
135 ; CHECK-LABEL: test_mm_aesdec128kl_u8:
136 ; CHECK: # %bb.0: # %entry
137 ; CHECK-NEXT: aesdec128kl (%rsi), %xmm0
138 ; CHECK-NEXT: sete %al
139 ; CHECK-NEXT: movaps %xmm0, (%rdi)
142 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, ptr %h) #1
143 %1 = extractvalue { i8, <2 x i64> } %0, 1
144 store <2 x i64> %1, ptr %odata, align 16
145 %2 = extractvalue { i8, <2 x i64> } %0, 0
149 define zeroext i8 @test__mm_aesencwide128kl_u8(ptr %odata, ptr %idata, ptr %h) {
150 ; CHECK-LABEL: test__mm_aesencwide128kl_u8:
151 ; CHECK: # %bb.0: # %entry
152 ; CHECK-NEXT: movaps (%rsi), %xmm0
153 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
154 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
155 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
156 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
157 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
158 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
159 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
160 ; CHECK-NEXT: aesencwide128kl (%rdx)
161 ; CHECK-NEXT: sete %al
162 ; CHECK-NEXT: movaps %xmm0, (%rdi)
163 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
164 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
165 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
166 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
167 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
168 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
169 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
172 %0 = load <2 x i64>, ptr %idata, align 16
173 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
174 %2 = load <2 x i64>, ptr %1, align 16
175 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
176 %4 = load <2 x i64>, ptr %3, align 16
177 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
178 %6 = load <2 x i64>, ptr %5, align 16
179 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
180 %8 = load <2 x i64>, ptr %7, align 16
181 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
182 %10 = load <2 x i64>, ptr %9, align 16
183 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
184 %12 = load <2 x i64>, ptr %11, align 16
185 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
186 %14 = load <2 x i64>, ptr %13, align 16
187 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
188 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
189 store <2 x i64> %16, ptr %odata, align 16
190 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
191 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
192 store <2 x i64> %17, ptr %18, align 16
193 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
194 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
195 store <2 x i64> %19, ptr %20, align 16
196 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
197 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
198 store <2 x i64> %21, ptr %22, align 16
199 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
200 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
201 store <2 x i64> %23, ptr %24, align 16
202 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
203 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
204 store <2 x i64> %25, ptr %26, align 16
205 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
206 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
207 store <2 x i64> %27, ptr %28, align 16
208 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
209 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
210 store <2 x i64> %29, ptr %30, align 16
211 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
215 define zeroext i8 @test__mm_aesdecwide128kl_u8(ptr %odata, ptr %idata, ptr %h) {
216 ; CHECK-LABEL: test__mm_aesdecwide128kl_u8:
217 ; CHECK: # %bb.0: # %entry
218 ; CHECK-NEXT: movaps (%rsi), %xmm0
219 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
220 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
221 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
222 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
223 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
224 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
225 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
226 ; CHECK-NEXT: aesdecwide128kl (%rdx)
227 ; CHECK-NEXT: sete %al
228 ; CHECK-NEXT: movaps %xmm0, (%rdi)
229 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
230 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
231 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
232 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
233 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
234 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
235 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
238 %0 = load <2 x i64>, ptr %idata, align 16
239 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
240 %2 = load <2 x i64>, ptr %1, align 16
241 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
242 %4 = load <2 x i64>, ptr %3, align 16
243 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
244 %6 = load <2 x i64>, ptr %5, align 16
245 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
246 %8 = load <2 x i64>, ptr %7, align 16
247 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
248 %10 = load <2 x i64>, ptr %9, align 16
249 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
250 %12 = load <2 x i64>, ptr %11, align 16
251 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
252 %14 = load <2 x i64>, ptr %13, align 16
253 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
254 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
255 store <2 x i64> %16, ptr %odata, align 16
256 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
257 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
258 store <2 x i64> %17, ptr %18, align 16
259 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
260 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
261 store <2 x i64> %19, ptr %20, align 16
262 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
263 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
264 store <2 x i64> %21, ptr %22, align 16
265 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
266 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
267 store <2 x i64> %23, ptr %24, align 16
268 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
269 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
270 store <2 x i64> %25, ptr %26, align 16
271 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
272 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
273 store <2 x i64> %27, ptr %28, align 16
274 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
275 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
276 store <2 x i64> %29, ptr %30, align 16
277 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
281 define zeroext i8 @test__mm_aesencwide256kl_u8(ptr %odata, ptr %idata, ptr %h) {
282 ; CHECK-LABEL: test__mm_aesencwide256kl_u8:
283 ; CHECK: # %bb.0: # %entry
284 ; CHECK-NEXT: movaps (%rsi), %xmm0
285 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
286 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
287 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
288 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
289 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
290 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
291 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
292 ; CHECK-NEXT: aesencwide256kl (%rdx)
293 ; CHECK-NEXT: sete %al
294 ; CHECK-NEXT: movaps %xmm0, (%rdi)
295 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
296 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
297 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
298 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
299 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
300 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
301 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
304 %0 = load <2 x i64>, ptr %idata, align 16
305 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
306 %2 = load <2 x i64>, ptr %1, align 16
307 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
308 %4 = load <2 x i64>, ptr %3, align 16
309 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
310 %6 = load <2 x i64>, ptr %5, align 16
311 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
312 %8 = load <2 x i64>, ptr %7, align 16
313 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
314 %10 = load <2 x i64>, ptr %9, align 16
315 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
316 %12 = load <2 x i64>, ptr %11, align 16
317 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
318 %14 = load <2 x i64>, ptr %13, align 16
319 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
320 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
321 store <2 x i64> %16, ptr %odata, align 16
322 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
323 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
324 store <2 x i64> %17, ptr %18, align 16
325 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
326 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
327 store <2 x i64> %19, ptr %20, align 16
328 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
329 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
330 store <2 x i64> %21, ptr %22, align 16
331 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
332 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
333 store <2 x i64> %23, ptr %24, align 16
334 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
335 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
336 store <2 x i64> %25, ptr %26, align 16
337 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
338 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
339 store <2 x i64> %27, ptr %28, align 16
340 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
341 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
342 store <2 x i64> %29, ptr %30, align 16
343 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
347 define zeroext i8 @test__mm_aesdecwide256kl_u8(ptr %odata, ptr %idata, ptr %h) {
348 ; CHECK-LABEL: test__mm_aesdecwide256kl_u8:
349 ; CHECK: # %bb.0: # %entry
350 ; CHECK-NEXT: movaps (%rsi), %xmm0
351 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
352 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
353 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
354 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
355 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
356 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
357 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
358 ; CHECK-NEXT: aesdecwide256kl (%rdx)
359 ; CHECK-NEXT: sete %al
360 ; CHECK-NEXT: movaps %xmm0, (%rdi)
361 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
362 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
363 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
364 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
365 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
366 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
367 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
370 %0 = load <2 x i64>, ptr %idata, align 16
371 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
372 %2 = load <2 x i64>, ptr %1, align 16
373 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
374 %4 = load <2 x i64>, ptr %3, align 16
375 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
376 %6 = load <2 x i64>, ptr %5, align 16
377 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
378 %8 = load <2 x i64>, ptr %7, align 16
379 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
380 %10 = load <2 x i64>, ptr %9, align 16
381 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
382 %12 = load <2 x i64>, ptr %11, align 16
383 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
384 %14 = load <2 x i64>, ptr %13, align 16
385 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
386 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
387 store <2 x i64> %16, ptr %odata, align 16
388 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
389 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
390 store <2 x i64> %17, ptr %18, align 16
391 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
392 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
393 store <2 x i64> %19, ptr %20, align 16
394 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
395 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
396 store <2 x i64> %21, ptr %22, align 16
397 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
398 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
399 store <2 x i64> %23, ptr %24, align 16
400 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
401 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
402 store <2 x i64> %25, ptr %26, align 16
403 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
404 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
405 store <2 x i64> %27, ptr %28, align 16
406 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
407 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
408 store <2 x i64> %29, ptr %30, align 16
409 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
413 declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32)
414 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
415 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>)
416 declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, ptr)
417 declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, ptr)
418 declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, ptr)
419 declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, ptr)
420 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
421 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
422 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
423 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)