1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
5 declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
7 define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
8 ; SSE-LABEL: test_sha1rnds4rr:
9 ; SSE: # %bb.0: # %entry
10 ; SSE-NEXT: sha1rnds4 $3, %xmm1, %xmm0
13 ; AVX-LABEL: test_sha1rnds4rr:
14 ; AVX: # %bb.0: # %entry
15 ; AVX-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x0f,0x3a,0xcc,0xc1,0x03]
16 ; AVX-NEXT: retq # encoding: [0xc3]
18 %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
22 define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
23 ; SSE-LABEL: test_sha1rnds4rm:
24 ; SSE: # %bb.0: # %entry
25 ; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0
28 ; AVX-LABEL: test_sha1rnds4rm:
29 ; AVX: # %bb.0: # %entry
30 ; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03]
31 ; AVX-NEXT: retq # encoding: [0xc3]
33 %0 = load <4 x i32>, ptr %b
34 %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
38 declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
40 define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
41 ; SSE-LABEL: test_sha1nexterr:
42 ; SSE: # %bb.0: # %entry
43 ; SSE-NEXT: sha1nexte %xmm1, %xmm0
46 ; AVX-LABEL: test_sha1nexterr:
47 ; AVX: # %bb.0: # %entry
48 ; AVX-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc8,0xc1]
49 ; AVX-NEXT: retq # encoding: [0xc3]
51 %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
55 define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
56 ; SSE-LABEL: test_sha1nexterm:
57 ; SSE: # %bb.0: # %entry
58 ; SSE-NEXT: sha1nexte (%rdi), %xmm0
61 ; AVX-LABEL: test_sha1nexterm:
62 ; AVX: # %bb.0: # %entry
63 ; AVX-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc8,0x07]
64 ; AVX-NEXT: retq # encoding: [0xc3]
66 %0 = load <4 x i32>, ptr %b
67 %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
71 declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
73 define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
74 ; SSE-LABEL: test_sha1msg1rr:
75 ; SSE: # %bb.0: # %entry
76 ; SSE-NEXT: sha1msg1 %xmm1, %xmm0
79 ; AVX-LABEL: test_sha1msg1rr:
80 ; AVX: # %bb.0: # %entry
81 ; AVX-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xc9,0xc1]
82 ; AVX-NEXT: retq # encoding: [0xc3]
84 %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
88 define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
89 ; SSE-LABEL: test_sha1msg1rm:
90 ; SSE: # %bb.0: # %entry
91 ; SSE-NEXT: sha1msg1 (%rdi), %xmm0
94 ; AVX-LABEL: test_sha1msg1rm:
95 ; AVX: # %bb.0: # %entry
96 ; AVX-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xc9,0x07]
97 ; AVX-NEXT: retq # encoding: [0xc3]
99 %0 = load <4 x i32>, ptr %b
100 %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
104 declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
106 define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
107 ; SSE-LABEL: test_sha1msg2rr:
108 ; SSE: # %bb.0: # %entry
109 ; SSE-NEXT: sha1msg2 %xmm1, %xmm0
112 ; AVX-LABEL: test_sha1msg2rr:
113 ; AVX: # %bb.0: # %entry
114 ; AVX-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xca,0xc1]
115 ; AVX-NEXT: retq # encoding: [0xc3]
117 %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
121 define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
122 ; SSE-LABEL: test_sha1msg2rm:
123 ; SSE: # %bb.0: # %entry
124 ; SSE-NEXT: sha1msg2 (%rdi), %xmm0
127 ; AVX-LABEL: test_sha1msg2rm:
128 ; AVX: # %bb.0: # %entry
129 ; AVX-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xca,0x07]
130 ; AVX-NEXT: retq # encoding: [0xc3]
132 %0 = load <4 x i32>, ptr %b
133 %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
137 declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
139 define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
140 ; SSE-LABEL: test_sha256rnds2rr:
141 ; SSE: # %bb.0: # %entry
142 ; SSE-NEXT: movaps %xmm0, %xmm3
143 ; SSE-NEXT: movaps %xmm2, %xmm0
144 ; SSE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3
145 ; SSE-NEXT: movaps %xmm3, %xmm0
148 ; AVX-LABEL: test_sha256rnds2rr:
149 ; AVX: # %bb.0: # %entry
150 ; AVX-NEXT: vmovaps %xmm0, %xmm3 # encoding: [0xc5,0xf8,0x28,0xd8]
151 ; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
152 ; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x0f,0x38,0xcb,0xd9]
153 ; AVX-NEXT: vmovaps %xmm3, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc3]
154 ; AVX-NEXT: retq # encoding: [0xc3]
156 %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
160 define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
161 ; SSE-LABEL: test_sha256rnds2rm:
162 ; SSE: # %bb.0: # %entry
163 ; SSE-NEXT: movaps %xmm0, %xmm2
164 ; SSE-NEXT: movaps %xmm1, %xmm0
165 ; SSE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2
166 ; SSE-NEXT: movaps %xmm2, %xmm0
169 ; AVX-LABEL: test_sha256rnds2rm:
170 ; AVX: # %bb.0: # %entry
171 ; AVX-NEXT: vmovaps %xmm0, %xmm2 # encoding: [0xc5,0xf8,0x28,0xd0]
172 ; AVX-NEXT: vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
173 ; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x0f,0x38,0xcb,0x17]
174 ; AVX-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
175 ; AVX-NEXT: retq # encoding: [0xc3]
177 %0 = load <4 x i32>, ptr %b
178 %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
182 declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
184 define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
185 ; SSE-LABEL: test_sha256msg1rr:
186 ; SSE: # %bb.0: # %entry
187 ; SSE-NEXT: sha256msg1 %xmm1, %xmm0
190 ; AVX-LABEL: test_sha256msg1rr:
191 ; AVX: # %bb.0: # %entry
192 ; AVX-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcc,0xc1]
193 ; AVX-NEXT: retq # encoding: [0xc3]
195 %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
199 define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
200 ; SSE-LABEL: test_sha256msg1rm:
201 ; SSE: # %bb.0: # %entry
202 ; SSE-NEXT: sha256msg1 (%rdi), %xmm0
205 ; AVX-LABEL: test_sha256msg1rm:
206 ; AVX: # %bb.0: # %entry
207 ; AVX-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcc,0x07]
208 ; AVX-NEXT: retq # encoding: [0xc3]
210 %0 = load <4 x i32>, ptr %b
211 %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
215 declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
217 define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
218 ; SSE-LABEL: test_sha256msg2rr:
219 ; SSE: # %bb.0: # %entry
220 ; SSE-NEXT: sha256msg2 %xmm1, %xmm0
223 ; AVX-LABEL: test_sha256msg2rr:
224 ; AVX: # %bb.0: # %entry
225 ; AVX-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x0f,0x38,0xcd,0xc1]
226 ; AVX-NEXT: retq # encoding: [0xc3]
228 %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
232 define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
233 ; SSE-LABEL: test_sha256msg2rm:
234 ; SSE: # %bb.0: # %entry
235 ; SSE-NEXT: sha256msg2 (%rdi), %xmm0
238 ; AVX-LABEL: test_sha256msg2rm:
239 ; AVX: # %bb.0: # %entry
240 ; AVX-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x0f,0x38,0xcd,0x07]
241 ; AVX-NEXT: retq # encoding: [0xc3]
243 %0 = load <4 x i32>, ptr %b
244 %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
248 ; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
249 define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable {
250 ; SSE-LABEL: test_sha1rnds4_zero_extend:
251 ; SSE: # %bb.0: # %entry
252 ; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0
253 ; SSE-NEXT: xorps %xmm1, %xmm1
256 ; AVX-LABEL: test_sha1rnds4_zero_extend:
257 ; AVX: # %bb.0: # %entry
258 ; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x0f,0x3a,0xcc,0x07,0x03]
259 ; AVX-NEXT: vmovaps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc0]
260 ; AVX-NEXT: retq # encoding: [0xc3]
262 %0 = load <4 x i32>, ptr %b
263 %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
264 %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
267 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: