1 ; Test vector intrinsics added with z14.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
5 declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>)
6 declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
7 declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
8 declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
10 declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>)
11 declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>)
12 declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>)
13 declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32)
14 declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32)
16 declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
17 declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
18 declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32)
19 declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32)
22 define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
23 ; CHECK-LABEL: test_vbperm:
24 ; CHECK: vbperm %v24, %v24, %v26
26 %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b)
30 ; VMSLG with no shifts.
31 define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
32 ; CHECK-LABEL: test_vmslg1:
33 ; CHECK: vmslg %v24, %v24, %v26, %v28, 0
35 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0)
39 ; VMSLG with both shifts.
40 define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
41 ; CHECK-LABEL: test_vmslg2:
42 ; CHECK: vmslg %v24, %v24, %v26, %v28, 12
44 %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12)
48 ; VLRLR with the lowest in-range displacement.
49 define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) {
50 ; CHECK-LABEL: test_vlrlr1:
51 ; CHECK: vlrlr %v24, %r3, 0(%r2)
53 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
57 ; VLRLR with the highest in-range displacement.
58 define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) {
59 ; CHECK-LABEL: test_vlrlr2:
60 ; CHECK: vlrlr %v24, %r3, 4095(%r2)
62 %ptr = getelementptr i8, i8 *%base, i64 4095
63 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
67 ; VLRLR with an out-of-range displacement.
68 define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) {
69 ; CHECK-LABEL: test_vlrlr3:
70 ; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}})
72 %ptr = getelementptr i8, i8 *%base, i64 4096
73 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
77 ; Check that VLRLR doesn't allow an index.
78 define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) {
79 ; CHECK-LABEL: test_vlrlr4:
80 ; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}})
82 %ptr = getelementptr i8, i8 *%base, i64 %index
83 %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
87 ; VLRL with the lowest in-range displacement.
88 define <16 x i8> @test_vlrl1(i8 *%ptr) {
89 ; CHECK-LABEL: test_vlrl1:
90 ; CHECK: vlrl %v24, 0(%r2), 0
92 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
96 ; VLRL with the highest in-range displacement.
97 define <16 x i8> @test_vlrl2(i8 *%base) {
98 ; CHECK-LABEL: test_vlrl2:
99 ; CHECK: vlrl %v24, 4095(%r2), 0
101 %ptr = getelementptr i8, i8 *%base, i64 4095
102 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
106 ; VLRL with an out-of-range displacement.
107 define <16 x i8> @test_vlrl3(i8 *%base) {
108 ; CHECK-LABEL: test_vlrl3:
109 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
111 %ptr = getelementptr i8, i8 *%base, i64 4096
112 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
116 ; Check that VLRL doesn't allow an index.
117 define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) {
118 ; CHECK-LABEL: test_vlrl4:
119 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
121 %ptr = getelementptr i8, i8 *%base, i64 %index
122 %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
126 ; VSTRLR with the lowest in-range displacement.
127 define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
128 ; CHECK-LABEL: test_vstrlr1:
129 ; CHECK: vstrlr %v24, %r3, 0(%r2)
131 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
135 ; VSTRLR with the highest in-range displacement.
136 define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) {
137 ; CHECK-LABEL: test_vstrlr2:
138 ; CHECK: vstrlr %v24, %r3, 4095(%r2)
140 %ptr = getelementptr i8, i8 *%base, i64 4095
141 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
145 ; VSTRLR with an out-of-range displacement.
146 define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) {
147 ; CHECK-LABEL: test_vstrlr3:
148 ; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}})
150 %ptr = getelementptr i8, i8 *%base, i64 4096
151 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
155 ; Check that VSTRLR doesn't allow an index.
156 define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
157 ; CHECK-LABEL: test_vstrlr4:
158 ; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}})
160 %ptr = getelementptr i8, i8 *%base, i64 %index
161 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
165 ; VSTRL with the lowest in-range displacement.
166 define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) {
167 ; CHECK-LABEL: test_vstrl1:
168 ; CHECK: vstrl %v24, 0(%r2), 8
170 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
174 ; VSTRL with the highest in-range displacement.
175 define void @test_vstrl2(<16 x i8> %vec, i8 *%base) {
176 ; CHECK-LABEL: test_vstrl2:
177 ; CHECK: vstrl %v24, 4095(%r2), 8
179 %ptr = getelementptr i8, i8 *%base, i64 4095
180 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
184 ; VSTRL with an out-of-range displacement.
185 define void @test_vstrl3(<16 x i8> %vec, i8 *%base) {
186 ; CHECK-LABEL: test_vstrl3:
187 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
189 %ptr = getelementptr i8, i8 *%base, i64 4096
190 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
194 ; Check that VSTRL doesn't allow an index.
195 define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) {
196 ; CHECK-LABEL: test_vstrl4:
197 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
199 %ptr = getelementptr i8, i8 *%base, i64 %index
200 call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
204 ; VFCESBS with no processing of the result.
205 define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) {
206 ; CHECK-LABEL: test_vfcesbs:
207 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
211 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
213 %res = extractvalue {<4 x i32>, i32} %call, 1
217 ; VFCESBS, returning 1 if any elements are equal (CC != 3).
218 define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) {
219 ; CHECK-LABEL: test_vfcesbs_any_bool:
220 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
222 ; CHECK: lochile %r2, 1
224 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
226 %res = extractvalue {<4 x i32>, i32} %call, 1
227 %cmp = icmp ne i32 %res, 3
228 %ext = zext i1 %cmp to i32
232 ; VFCESBS, storing to %ptr if any elements are equal.
233 define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b,
235 ; CHECK-LABEL: test_vfcesbs_any_store:
237 ; CHECK: vfcesbs %v24, %v24, %v26
238 ; CHECK-NEXT: {{bor|bnler}} %r14
239 ; CHECK: mvhi 0(%r2), 0
241 %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
243 %res = extractvalue {<4 x i32>, i32} %call, 0
244 %cc = extractvalue {<4 x i32>, i32} %call, 1
245 %cmp = icmp ule i32 %cc, 2
246 br i1 %cmp, label %store, label %exit
249 store i32 0, i32 *%ptr
256 ; VFCHSBS with no processing of the result.
257 define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) {
258 ; CHECK-LABEL: test_vfchsbs:
259 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
263 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
265 %res = extractvalue {<4 x i32>, i32} %call, 1
269 ; VFCHSBS, returning 1 if not all elements are higher.
270 define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) {
271 ; CHECK-LABEL: test_vfchsbs_notall_bool:
272 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
274 ; CHECK: lochinhe %r2, 1
276 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
278 %res = extractvalue {<4 x i32>, i32} %call, 1
279 %cmp = icmp sge i32 %res, 1
280 %ext = zext i1 %cmp to i32
284 ; VFCHSBS, storing to %ptr if not all elements are higher.
285 define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b,
287 ; CHECK-LABEL: test_vfchsbs_notall_store:
289 ; CHECK: vfchsbs %v24, %v24, %v26
290 ; CHECK-NEXT: {{bher|ber}} %r14
291 ; CHECK: mvhi 0(%r2), 0
293 %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
295 %res = extractvalue {<4 x i32>, i32} %call, 0
296 %cc = extractvalue {<4 x i32>, i32} %call, 1
297 %cmp = icmp ugt i32 %cc, 0
298 br i1 %cmp, label %store, label %exit
301 store i32 0, i32 *%ptr
308 ; VFCHESBS with no processing of the result.
309 define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) {
310 ; CHECK-LABEL: test_vfchesbs:
311 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
315 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
317 %res = extractvalue {<4 x i32>, i32} %call, 1
321 ; VFCHESBS, returning 1 if neither element is higher or equal.
322 define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) {
323 ; CHECK-LABEL: test_vfchesbs_none_bool:
324 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
326 ; CHECK: lochio %r2, 1
328 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
330 %res = extractvalue {<4 x i32>, i32} %call, 1
331 %cmp = icmp eq i32 %res, 3
332 %ext = zext i1 %cmp to i32
336 ; VFCHESBS, storing to %ptr if neither element is higher or equal.
337 define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b,
339 ; CHECK-LABEL: test_vfchesbs_none_store:
341 ; CHECK: vfchesbs %v24, %v24, %v26
342 ; CHECK-NEXT: {{bnor|bler}} %r14
343 ; CHECK: mvhi 0(%r2), 0
345 %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
347 %res = extractvalue {<4 x i32>, i32} %call, 0
348 %cc = extractvalue {<4 x i32>, i32} %call, 1
349 %cmp = icmp uge i32 %cc, 3
350 br i1 %cmp, label %store, label %exit
353 store i32 0, i32 *%ptr
360 ; VFTCISB with the lowest useful class selector and no processing of the result.
361 define i32 @test_vftcisb(<4 x float> %a) {
362 ; CHECK-LABEL: test_vftcisb:
363 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1
367 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1)
368 %res = extractvalue {<4 x i32>, i32} %call, 1
372 ; VFTCISB with the highest useful class selector, returning 1 if all elements
373 ; have the right class (CC == 0).
374 define i32 @test_vftcisb_all_bool(<4 x float> %a) {
375 ; CHECK-LABEL: test_vftcisb_all_bool:
376 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094
378 ; CHECK: lochie %r2, 1
380 %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094)
381 %res = extractvalue {<4 x i32>, i32} %call, 1
382 %cmp = icmp eq i32 %res, 0
383 %ext = zext i1 %cmp to i32
387 ; VFISB with a rounding mode not usable via standard intrinsics.
388 define <4 x float> @test_vfisb_0_4(<4 x float> %a) {
389 ; CHECK-LABEL: test_vfisb_0_4:
390 ; CHECK: vfisb %v24, %v24, 0, 4
392 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4)
396 ; VFISB with IEEE-inexact exception suppressed.
397 define <4 x float> @test_vfisb_4_0(<4 x float> %a) {
398 ; CHECK-LABEL: test_vfisb_4_0:
399 ; CHECK: vfisb %v24, %v24, 4, 0
401 %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0)
406 define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
407 ; CHECK-LABEL: test_vfmaxdb:
408 ; CHECK: vfmaxdb %v24, %v24, %v26, 4
410 %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4)
411 ret <2 x double> %res
415 define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) {
416 ; CHECK-LABEL: test_vfmindb:
417 ; CHECK: vfmindb %v24, %v24, %v26, 4
419 %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4)
420 ret <2 x double> %res
424 define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) {
425 ; CHECK-LABEL: test_vfmaxsb:
426 ; CHECK: vfmaxsb %v24, %v24, %v26, 4
428 %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4)
433 define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) {
434 ; CHECK-LABEL: test_vfminsb:
435 ; CHECK: vfminsb %v24, %v24, %v26, 4
437 %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4)