1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s
4 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c
6 define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) {
7 ; CHECK-LABEL: test_loadiwkey:
8 ; CHECK: # %bb.0: # %entry
9 ; CHECK-NEXT: movl %edi, %eax
10 ; CHECK-NEXT: loadiwkey %xmm2, %xmm1
13 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl)
17 define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h) {
18 ; CHECK-LABEL: test_encodekey128_u32:
19 ; CHECK: # %bb.0: # %entry
20 ; CHECK-NEXT: encodekey128 %edi, %eax
21 ; CHECK-NEXT: movups %xmm0, (%rsi)
22 ; CHECK-NEXT: movups %xmm1, 16(%rsi)
23 ; CHECK-NEXT: movups %xmm2, 32(%rsi)
24 ; CHECK-NEXT: movups %xmm4, 48(%rsi)
25 ; CHECK-NEXT: movups %xmm5, 64(%rsi)
26 ; CHECK-NEXT: movups %xmm6, 80(%rsi)
29 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key)
30 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
31 store <2 x i64> %1, ptr %h, align 1
32 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
33 %3 = getelementptr i8, ptr %h, i64 16
34 store <2 x i64> %2, ptr %3, align 1
35 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
36 %5 = getelementptr i8, ptr %h, i64 32
37 store <2 x i64> %4, ptr %5, align 1
38 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
39 %7 = getelementptr i8, ptr %h, i64 48
40 store <2 x i64> %6, ptr %7, align 1
41 %8 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
42 %9 = getelementptr i8, ptr %h, i64 64
43 store <2 x i64> %8, ptr %9, align 1
44 %10 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
45 %11 = getelementptr i8, ptr %h, i64 80
46 store <2 x i64> %10, ptr %11, align 1
47 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
51 define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, ptr nocapture %h) {
52 ; CHECK-LABEL: test_encodekey256_u32:
53 ; CHECK: # %bb.0: # %entry
54 ; CHECK-NEXT: encodekey256 %edi, %eax
55 ; CHECK-NEXT: movups %xmm0, (%rsi)
56 ; CHECK-NEXT: movups %xmm1, 16(%rsi)
57 ; CHECK-NEXT: movups %xmm2, 32(%rsi)
58 ; CHECK-NEXT: movups %xmm3, 48(%rsi)
59 ; CHECK-NEXT: movups %xmm4, 64(%rsi)
60 ; CHECK-NEXT: movups %xmm5, 80(%rsi)
61 ; CHECK-NEXT: movups %xmm6, 96(%rsi)
64 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi)
65 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1
66 store <2 x i64> %1, ptr %h, align 1
67 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2
68 %3 = getelementptr i8, ptr %h, i64 16
69 store <2 x i64> %2, ptr %3, align 1
70 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3
71 %5 = getelementptr i8, ptr %h, i64 32
72 store <2 x i64> %4, ptr %5, align 1
73 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4
74 %7 = getelementptr i8, ptr %h, i64 48
75 store <2 x i64> %6, ptr %7, align 1
76 %8 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5
77 %9 = getelementptr i8, ptr %h, i64 64
78 store <2 x i64> %8, ptr %9, align 1
79 %10 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6
80 %11 = getelementptr i8, ptr %h, i64 80
81 store <2 x i64> %10, ptr %11, align 1
82 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7
83 %13 = getelementptr i8, ptr %h, i64 96
84 store <2 x i64> %12, ptr %13, align 1
85 %14 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0
89 define zeroext i8 @test_mm_aesenc256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
90 ; CHECK-LABEL: test_mm_aesenc256kl_u8:
91 ; CHECK: # %bb.0: # %entry
92 ; CHECK-NEXT: xorl %eax, %eax
93 ; CHECK-NEXT: aesenc256kl (%rsi), %xmm0
94 ; CHECK-NEXT: sete %al
95 ; CHECK-NEXT: movaps %xmm0, (%rdi)
98 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, ptr %h) #1
99 %1 = extractvalue { i8, <2 x i64> } %0, 1
100 store <2 x i64> %1, ptr %odata, align 16
101 %2 = extractvalue { i8, <2 x i64> } %0, 0
105 define zeroext i8 @test_mm_aesdec256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
106 ; CHECK-LABEL: test_mm_aesdec256kl_u8:
107 ; CHECK: # %bb.0: # %entry
108 ; CHECK-NEXT: xorl %eax, %eax
109 ; CHECK-NEXT: aesdec256kl (%rsi), %xmm0
110 ; CHECK-NEXT: sete %al
111 ; CHECK-NEXT: movaps %xmm0, (%rdi)
114 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, ptr %h) #1
115 %1 = extractvalue { i8, <2 x i64> } %0, 1
116 store <2 x i64> %1, ptr %odata, align 16
117 %2 = extractvalue { i8, <2 x i64> } %0, 0
121 define zeroext i8 @test_mm_aesenc128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
122 ; CHECK-LABEL: test_mm_aesenc128kl_u8:
123 ; CHECK: # %bb.0: # %entry
124 ; CHECK-NEXT: xorl %eax, %eax
125 ; CHECK-NEXT: aesenc128kl (%rsi), %xmm0
126 ; CHECK-NEXT: sete %al
127 ; CHECK-NEXT: movaps %xmm0, (%rdi)
130 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, ptr %h) #1
131 %1 = extractvalue { i8, <2 x i64> } %0, 1
132 store <2 x i64> %1, ptr %odata, align 16
133 %2 = extractvalue { i8, <2 x i64> } %0, 0
137 define zeroext i8 @test_mm_aesdec128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) {
138 ; CHECK-LABEL: test_mm_aesdec128kl_u8:
139 ; CHECK: # %bb.0: # %entry
140 ; CHECK-NEXT: xorl %eax, %eax
141 ; CHECK-NEXT: aesdec128kl (%rsi), %xmm0
142 ; CHECK-NEXT: sete %al
143 ; CHECK-NEXT: movaps %xmm0, (%rdi)
146 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, ptr %h) #1
147 %1 = extractvalue { i8, <2 x i64> } %0, 1
148 store <2 x i64> %1, ptr %odata, align 16
149 %2 = extractvalue { i8, <2 x i64> } %0, 0
153 define zeroext i8 @test__mm_aesencwide128kl_u8(ptr %odata, ptr %idata, ptr %h) {
154 ; CHECK-LABEL: test__mm_aesencwide128kl_u8:
155 ; CHECK: # %bb.0: # %entry
156 ; CHECK-NEXT: movaps (%rsi), %xmm0
157 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
158 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
159 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
160 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
161 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
162 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
163 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
164 ; CHECK-NEXT: xorl %eax, %eax
165 ; CHECK-NEXT: aesencwide128kl (%rdx)
166 ; CHECK-NEXT: sete %al
167 ; CHECK-NEXT: movaps %xmm0, (%rdi)
168 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
169 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
170 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
171 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
172 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
173 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
174 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
177 %0 = load <2 x i64>, ptr %idata, align 16
178 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
179 %2 = load <2 x i64>, ptr %1, align 16
180 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
181 %4 = load <2 x i64>, ptr %3, align 16
182 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
183 %6 = load <2 x i64>, ptr %5, align 16
184 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
185 %8 = load <2 x i64>, ptr %7, align 16
186 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
187 %10 = load <2 x i64>, ptr %9, align 16
188 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
189 %12 = load <2 x i64>, ptr %11, align 16
190 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
191 %14 = load <2 x i64>, ptr %13, align 16
192 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
193 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
194 store <2 x i64> %16, ptr %odata, align 16
195 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
196 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
197 store <2 x i64> %17, ptr %18, align 16
198 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
199 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
200 store <2 x i64> %19, ptr %20, align 16
201 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
202 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
203 store <2 x i64> %21, ptr %22, align 16
204 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
205 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
206 store <2 x i64> %23, ptr %24, align 16
207 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
208 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
209 store <2 x i64> %25, ptr %26, align 16
210 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
211 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
212 store <2 x i64> %27, ptr %28, align 16
213 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
214 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
215 store <2 x i64> %29, ptr %30, align 16
216 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
220 define zeroext i8 @test__mm_aesdecwide128kl_u8(ptr %odata, ptr %idata, ptr %h) {
221 ; CHECK-LABEL: test__mm_aesdecwide128kl_u8:
222 ; CHECK: # %bb.0: # %entry
223 ; CHECK-NEXT: movaps (%rsi), %xmm0
224 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
225 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
226 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
227 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
228 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
229 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
230 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
231 ; CHECK-NEXT: xorl %eax, %eax
232 ; CHECK-NEXT: aesdecwide128kl (%rdx)
233 ; CHECK-NEXT: sete %al
234 ; CHECK-NEXT: movaps %xmm0, (%rdi)
235 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
236 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
237 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
238 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
239 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
240 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
241 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
244 %0 = load <2 x i64>, ptr %idata, align 16
245 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
246 %2 = load <2 x i64>, ptr %1, align 16
247 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
248 %4 = load <2 x i64>, ptr %3, align 16
249 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
250 %6 = load <2 x i64>, ptr %5, align 16
251 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
252 %8 = load <2 x i64>, ptr %7, align 16
253 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
254 %10 = load <2 x i64>, ptr %9, align 16
255 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
256 %12 = load <2 x i64>, ptr %11, align 16
257 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
258 %14 = load <2 x i64>, ptr %13, align 16
259 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
260 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
261 store <2 x i64> %16, ptr %odata, align 16
262 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
263 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
264 store <2 x i64> %17, ptr %18, align 16
265 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
266 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
267 store <2 x i64> %19, ptr %20, align 16
268 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
269 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
270 store <2 x i64> %21, ptr %22, align 16
271 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
272 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
273 store <2 x i64> %23, ptr %24, align 16
274 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
275 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
276 store <2 x i64> %25, ptr %26, align 16
277 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
278 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
279 store <2 x i64> %27, ptr %28, align 16
280 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
281 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
282 store <2 x i64> %29, ptr %30, align 16
283 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
287 define zeroext i8 @test__mm_aesencwide256kl_u8(ptr %odata, ptr %idata, ptr %h) {
288 ; CHECK-LABEL: test__mm_aesencwide256kl_u8:
289 ; CHECK: # %bb.0: # %entry
290 ; CHECK-NEXT: movaps (%rsi), %xmm0
291 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
292 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
293 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
294 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
295 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
296 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
297 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
298 ; CHECK-NEXT: xorl %eax, %eax
299 ; CHECK-NEXT: aesencwide256kl (%rdx)
300 ; CHECK-NEXT: sete %al
301 ; CHECK-NEXT: movaps %xmm0, (%rdi)
302 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
303 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
304 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
305 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
306 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
307 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
308 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
311 %0 = load <2 x i64>, ptr %idata, align 16
312 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
313 %2 = load <2 x i64>, ptr %1, align 16
314 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
315 %4 = load <2 x i64>, ptr %3, align 16
316 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
317 %6 = load <2 x i64>, ptr %5, align 16
318 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
319 %8 = load <2 x i64>, ptr %7, align 16
320 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
321 %10 = load <2 x i64>, ptr %9, align 16
322 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
323 %12 = load <2 x i64>, ptr %11, align 16
324 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
325 %14 = load <2 x i64>, ptr %13, align 16
326 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
327 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
328 store <2 x i64> %16, ptr %odata, align 16
329 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
330 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
331 store <2 x i64> %17, ptr %18, align 16
332 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
333 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
334 store <2 x i64> %19, ptr %20, align 16
335 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
336 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
337 store <2 x i64> %21, ptr %22, align 16
338 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
339 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
340 store <2 x i64> %23, ptr %24, align 16
341 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
342 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
343 store <2 x i64> %25, ptr %26, align 16
344 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
345 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
346 store <2 x i64> %27, ptr %28, align 16
347 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
348 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
349 store <2 x i64> %29, ptr %30, align 16
350 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
354 define zeroext i8 @test__mm_aesdecwide256kl_u8(ptr %odata, ptr %idata, ptr %h) {
355 ; CHECK-LABEL: test__mm_aesdecwide256kl_u8:
356 ; CHECK: # %bb.0: # %entry
357 ; CHECK-NEXT: movaps (%rsi), %xmm0
358 ; CHECK-NEXT: movaps 16(%rsi), %xmm1
359 ; CHECK-NEXT: movaps 32(%rsi), %xmm2
360 ; CHECK-NEXT: movaps 48(%rsi), %xmm3
361 ; CHECK-NEXT: movaps 64(%rsi), %xmm4
362 ; CHECK-NEXT: movaps 80(%rsi), %xmm5
363 ; CHECK-NEXT: movaps 96(%rsi), %xmm6
364 ; CHECK-NEXT: movaps 112(%rsi), %xmm7
365 ; CHECK-NEXT: xorl %eax, %eax
366 ; CHECK-NEXT: aesdecwide256kl (%rdx)
367 ; CHECK-NEXT: sete %al
368 ; CHECK-NEXT: movaps %xmm0, (%rdi)
369 ; CHECK-NEXT: movaps %xmm1, 16(%rdi)
370 ; CHECK-NEXT: movaps %xmm2, 32(%rdi)
371 ; CHECK-NEXT: movaps %xmm3, 48(%rdi)
372 ; CHECK-NEXT: movaps %xmm4, 64(%rdi)
373 ; CHECK-NEXT: movaps %xmm5, 80(%rdi)
374 ; CHECK-NEXT: movaps %xmm6, 96(%rdi)
375 ; CHECK-NEXT: movaps %xmm7, 112(%rdi)
378 %0 = load <2 x i64>, ptr %idata, align 16
379 %1 = getelementptr <2 x i64>, ptr %idata, i64 1
380 %2 = load <2 x i64>, ptr %1, align 16
381 %3 = getelementptr <2 x i64>, ptr %idata, i64 2
382 %4 = load <2 x i64>, ptr %3, align 16
383 %5 = getelementptr <2 x i64>, ptr %idata, i64 3
384 %6 = load <2 x i64>, ptr %5, align 16
385 %7 = getelementptr <2 x i64>, ptr %idata, i64 4
386 %8 = load <2 x i64>, ptr %7, align 16
387 %9 = getelementptr <2 x i64>, ptr %idata, i64 5
388 %10 = load <2 x i64>, ptr %9, align 16
389 %11 = getelementptr <2 x i64>, ptr %idata, i64 6
390 %12 = load <2 x i64>, ptr %11, align 16
391 %13 = getelementptr <2 x i64>, ptr %idata, i64 7
392 %14 = load <2 x i64>, ptr %13, align 16
393 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1
394 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1
395 store <2 x i64> %16, ptr %odata, align 16
396 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2
397 %18 = getelementptr <2 x i64>, ptr %odata, i64 1
398 store <2 x i64> %17, ptr %18, align 16
399 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3
400 %20 = getelementptr <2 x i64>, ptr %odata, i64 2
401 store <2 x i64> %19, ptr %20, align 16
402 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4
403 %22 = getelementptr <2 x i64>, ptr %odata, i64 3
404 store <2 x i64> %21, ptr %22, align 16
405 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5
406 %24 = getelementptr <2 x i64>, ptr %odata, i64 4
407 store <2 x i64> %23, ptr %24, align 16
408 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6
409 %26 = getelementptr <2 x i64>, ptr %odata, i64 5
410 store <2 x i64> %25, ptr %26, align 16
411 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7
412 %28 = getelementptr <2 x i64>, ptr %odata, i64 6
413 store <2 x i64> %27, ptr %28, align 16
414 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8
415 %30 = getelementptr <2 x i64>, ptr %odata, i64 7
416 store <2 x i64> %29, ptr %30, align 16
417 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0
421 declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32)
422 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
423 declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>)
424 declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, ptr)
425 declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, ptr)
426 declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, ptr)
427 declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, ptr)
428 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
429 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
430 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
431 declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)