1 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s
2 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s
3 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s
4 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s
5 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s
6 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s
7 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s
8 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76 | FileCheck %s
9 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s
10 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s
11 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78c| FileCheck %s
12 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s
13 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-e1 | FileCheck %s
14 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1 | FileCheck %s
15 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v1 | FileCheck %s
16 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-512tvb | FileCheck %s
17 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s
18 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s
19 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s
20 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s
21 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s
22 ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s
24 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k)
25 declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d)
26 declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d, <16 x i8> %k)
27 declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %d)
29 define void @aesea(ptr %a0, ptr %b0, ptr %c0, <16 x i8> %d, <16 x i8> %e) {
30 %d0 = load <16 x i8>, ptr %a0
31 %a1 = getelementptr inbounds <16 x i8>, ptr %a0, i64 1
32 %d1 = load <16 x i8>, ptr %a1
33 %a2 = getelementptr inbounds <16 x i8>, ptr %a0, i64 2
34 %d2 = load <16 x i8>, ptr %a2
35 %a3 = getelementptr inbounds <16 x i8>, ptr %a0, i64 3
36 %d3 = load <16 x i8>, ptr %a3
37 %k0 = load <16 x i8>, ptr %b0
38 %e00 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d0, <16 x i8> %k0)
39 %f00 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00)
40 %e01 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d1, <16 x i8> %k0)
41 %f01 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01)
42 %e02 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d2, <16 x i8> %k0)
43 %f02 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02)
44 %e03 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d3, <16 x i8> %k0)
45 %f03 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03)
46 %b1 = getelementptr inbounds <16 x i8>, ptr %b0, i64 1
47 %k1 = load <16 x i8>, ptr %b1
48 %e10 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f00, <16 x i8> %k1)
49 %f10 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00)
50 %e11 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f01, <16 x i8> %k1)
51 %f11 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01)
52 %e12 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f02, <16 x i8> %k1)
53 %f12 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02)
54 %e13 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f03, <16 x i8> %k1)
55 %f13 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03)
56 %b2 = getelementptr inbounds <16 x i8>, ptr %b0, i64 2
57 %k2 = load <16 x i8>, ptr %b2
58 %e20 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f10, <16 x i8> %k2)
59 %f20 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e10)
60 %e21 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f11, <16 x i8> %k2)
61 %f21 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e11)
62 %e22 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f12, <16 x i8> %k2)
63 %f22 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e12)
64 %e23 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f13, <16 x i8> %k2)
65 %f23 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e13)
66 %b3 = getelementptr inbounds <16 x i8>, ptr %b0, i64 3
67 %k3 = load <16 x i8>, ptr %b3
68 %e30 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f20, <16 x i8> %k3)
69 %f30 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e20)
70 %e31 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f21, <16 x i8> %k3)
71 %f31 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e21)
72 %e32 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f22, <16 x i8> %k3)
73 %f32 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e22)
74 %e33 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f23, <16 x i8> %k3)
75 %f33 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e23)
76 %g0 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f30, <16 x i8> %d)
77 %h0 = xor <16 x i8> %g0, %e
78 %g1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f31, <16 x i8> %d)
79 %h1 = xor <16 x i8> %g1, %e
80 %g2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f32, <16 x i8> %d)
81 %h2 = xor <16 x i8> %g2, %e
82 %g3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f33, <16 x i8> %d)
83 %h3 = xor <16 x i8> %g3, %e
84 store <16 x i8> %h0, ptr %c0
85 %c1 = getelementptr inbounds <16 x i8>, ptr %c0, i64 1
86 store <16 x i8> %h1, ptr %c1
87 %c2 = getelementptr inbounds <16 x i8>, ptr %c0, i64 2
88 store <16 x i8> %h2, ptr %c2
89 %c3 = getelementptr inbounds <16 x i8>, ptr %c0, i64 3
90 store <16 x i8> %h3, ptr %c3
94 ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
95 ; CHECK: aesmc [[VA]], [[VA]]
96 ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
97 ; CHECK-NEXT: aesmc [[VB]], [[VB]]
98 ; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
99 ; CHECK-NEXT: aesmc [[VC]], [[VC]]
100 ; CHECK: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
101 ; CHECK-NEXT: aesmc [[VD]], [[VD]]
102 ; CHECK: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
103 ; CHECK-NEXT: aesmc [[VE]], [[VE]]
104 ; CHECK: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
105 ; CHECK-NEXT: aesmc [[VF]], [[VF]]
106 ; CHECK: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
107 ; CHECK-NEXT: aesmc [[VG]], [[VG]]
108 ; CHECK: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
109 ; CHECK-NEXT: aesmc [[VH]], [[VH]]
113 define void @aesda(ptr %a0, ptr %b0, ptr %c0, <16 x i8> %d, <16 x i8> %e) {
114 %d0 = load <16 x i8>, ptr %a0
115 %a1 = getelementptr inbounds <16 x i8>, ptr %a0, i64 1
116 %d1 = load <16 x i8>, ptr %a1
117 %a2 = getelementptr inbounds <16 x i8>, ptr %a0, i64 2
118 %d2 = load <16 x i8>, ptr %a2
119 %a3 = getelementptr inbounds <16 x i8>, ptr %a0, i64 3
120 %d3 = load <16 x i8>, ptr %a3
121 %k0 = load <16 x i8>, ptr %b0
122 %e00 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d0, <16 x i8> %k0)
123 %f00 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00)
124 %e01 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d1, <16 x i8> %k0)
125 %f01 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01)
126 %e02 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d2, <16 x i8> %k0)
127 %f02 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02)
128 %e03 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d3, <16 x i8> %k0)
129 %f03 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03)
130 %b1 = getelementptr inbounds <16 x i8>, ptr %b0, i64 1
131 %k1 = load <16 x i8>, ptr %b1
132 %e10 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f00, <16 x i8> %k1)
133 %f10 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00)
134 %e11 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f01, <16 x i8> %k1)
135 %f11 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01)
136 %e12 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f02, <16 x i8> %k1)
137 %f12 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02)
138 %e13 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f03, <16 x i8> %k1)
139 %f13 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03)
140 %b2 = getelementptr inbounds <16 x i8>, ptr %b0, i64 2
141 %k2 = load <16 x i8>, ptr %b2
142 %e20 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f10, <16 x i8> %k2)
143 %f20 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e10)
144 %e21 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f11, <16 x i8> %k2)
145 %f21 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e11)
146 %e22 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f12, <16 x i8> %k2)
147 %f22 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e12)
148 %e23 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f13, <16 x i8> %k2)
149 %f23 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e13)
150 %b3 = getelementptr inbounds <16 x i8>, ptr %b0, i64 3
151 %k3 = load <16 x i8>, ptr %b3
152 %e30 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f20, <16 x i8> %k3)
153 %f30 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e20)
154 %e31 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f21, <16 x i8> %k3)
155 %f31 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e21)
156 %e32 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f22, <16 x i8> %k3)
157 %f32 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e22)
158 %e33 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f23, <16 x i8> %k3)
159 %f33 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e23)
160 %g0 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f30, <16 x i8> %d)
161 %h0 = xor <16 x i8> %g0, %e
162 %g1 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f31, <16 x i8> %d)
163 %h1 = xor <16 x i8> %g1, %e
164 %g2 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f32, <16 x i8> %d)
165 %h2 = xor <16 x i8> %g2, %e
166 %g3 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f33, <16 x i8> %d)
167 %h3 = xor <16 x i8> %g3, %e
168 store <16 x i8> %h0, ptr %c0
169 %c1 = getelementptr inbounds <16 x i8>, ptr %c0, i64 1
170 store <16 x i8> %h1, ptr %c1
171 %c2 = getelementptr inbounds <16 x i8>, ptr %c0, i64 2
172 store <16 x i8> %h2, ptr %c2
173 %c3 = getelementptr inbounds <16 x i8>, ptr %c0, i64 3
174 store <16 x i8> %h3, ptr %c3
177 ; CHECK-LABEL: aesda:
178 ; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
179 ; CHECK: aesimc [[VA]], [[VA]]
180 ; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
181 ; CHECK-NEXT: aesimc [[VB]], [[VB]]
182 ; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
183 ; CHECK-NEXT: aesimc [[VC]], [[VC]]
184 ; CHECK: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
185 ; CHECK-NEXT: aesimc [[VD]], [[VD]]
186 ; CHECK: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
187 ; CHECK-NEXT: aesimc [[VE]], [[VE]]
188 ; CHECK: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
189 ; CHECK-NEXT: aesimc [[VF]], [[VF]]
190 ; CHECK: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
191 ; CHECK-NEXT: aesimc [[VG]], [[VG]]
192 ; CHECK: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
193 ; CHECK-NEXT: aesimc [[VH]], [[VH]]
197 define void @aes_load_store(ptr %p1, ptr %p2 , ptr %p3) {
199 %x1 = alloca <16 x i8>, align 16
200 %x2 = alloca <16 x i8>, align 16
201 %x3 = alloca <16 x i8>, align 16
202 %x4 = alloca <16 x i8>, align 16
203 %x5 = alloca <16 x i8>, align 16
204 %in1 = load <16 x i8>, ptr %p1, align 16
205 store <16 x i8> %in1, ptr %x1, align 16
206 %aese1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in1) #2
207 %in2 = load <16 x i8>, ptr %p2, align 16
208 %aesmc1= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese1) #2
209 %aese2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc1, <16 x i8> %in2) #2
210 store <16 x i8> %aesmc1, ptr %x3, align 16
211 %in3 = load <16 x i8>, ptr %p3, align 16
212 %aesmc2= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese2) #2
213 %aese3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc2, <16 x i8> %in3) #2
214 store <16 x i8> %aese3, ptr %x5, align 16
217 ; CHECK-LABEL: aes_load_store:
218 ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
219 ; aese and aesmc are described to share a unit, hence won't be scheduled on the
220 ; same cycle and the scheduler can find another instruction to place inbetween
221 ; CHECK: aesmc [[VA]], [[VA]]
222 ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
223 ; CHECK-NEXT: aesmc [[VB]], [[VB]]