test/CodeGen/SystemZ/vec-intrinsics-02.ll

   1 ; Test vector intrinsics added with z14.
   2 ;
   3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
   4
   5 declare <2 x i64> @llvm.s390.vbperm(<16 x i8>, <16 x i8>)
   6 declare <16 x i8> @llvm.s390.vmslg(<2 x i64>, <2 x i64>, <16 x i8>, i32)
   7 declare <16 x i8> @llvm.s390.vlrl(i32, i8 *)
   8 declare void @llvm.s390.vstrl(<16 x i8>, i32, i8 *)
   9
  10 declare {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float>, <4 x float>)
  11 declare {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float>, <4 x float>)
  12 declare {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float>, <4 x float>)
  13 declare {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float>, i32)
  14 declare <4 x float> @llvm.s390.vfisb(<4 x float>, i32, i32)
  15
  16 declare <2 x double> @llvm.s390.vfmaxdb(<2 x double>, <2 x double>, i32)
  17 declare <2 x double> @llvm.s390.vfmindb(<2 x double>, <2 x double>, i32)
  18 declare <4 x float> @llvm.s390.vfmaxsb(<4 x float>, <4 x float>, i32)
  19 declare <4 x float> @llvm.s390.vfminsb(<4 x float>, <4 x float>, i32)
  20
  21 ; VBPERM.
  22 define <2 x i64> @test_vbperm(<16 x i8> %a, <16 x i8> %b) {
  23 ; CHECK-LABEL: test_vbperm:
  24 ; CHECK: vbperm %v24, %v24, %v26
  25 ; CHECK: br %r14
  26   %res = call <2 x i64> @llvm.s390.vbperm(<16 x i8> %a, <16 x i8> %b)
  27   ret <2 x i64> %res
  28 }
  29
  30 ; VMSLG with no shifts.
  31 define <16 x i8> @test_vmslg1(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
  32 ; CHECK-LABEL: test_vmslg1:
  33 ; CHECK: vmslg %v24, %v24, %v26, %v28, 0
  34 ; CHECK: br %r14
  35   %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 0)
  36   ret <16 x i8> %res
  37 }
  38
  39 ; VMSLG with both shifts.
  40 define <16 x i8> @test_vmslg2(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
  41 ; CHECK-LABEL: test_vmslg2:
  42 ; CHECK: vmslg %v24, %v24, %v26, %v28, 12
  43 ; CHECK: br %r14
  44   %res = call <16 x i8> @llvm.s390.vmslg(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c, i32 12)
  45   ret <16 x i8> %res
  46 }
  47
  48 ; VLRLR with the lowest in-range displacement.
  49 define <16 x i8> @test_vlrlr1(i8 *%ptr, i32 %length) {
  50 ; CHECK-LABEL: test_vlrlr1:
  51 ; CHECK: vlrlr %v24, %r3, 0(%r2)
  52 ; CHECK: br %r14
  53   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
  54   ret <16 x i8> %res
  55 }
  56
  57 ; VLRLR with the highest in-range displacement.
  58 define <16 x i8> @test_vlrlr2(i8 *%base, i32 %length) {
  59 ; CHECK-LABEL: test_vlrlr2:
  60 ; CHECK: vlrlr %v24, %r3, 4095(%r2)
  61 ; CHECK: br %r14
  62   %ptr = getelementptr i8, i8 *%base, i64 4095
  63   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
  64   ret <16 x i8> %res
  65 }
  66
  67 ; VLRLR with an out-of-range displacement.
  68 define <16 x i8> @test_vlrlr3(i8 *%base, i32 %length) {
  69 ; CHECK-LABEL: test_vlrlr3:
  70 ; CHECK: vlrlr %v24, %r3, 0({{%r[1-5]}})
  71 ; CHECK: br %r14
  72   %ptr = getelementptr i8, i8 *%base, i64 4096
  73   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
  74   ret <16 x i8> %res
  75 }
  76
  77 ; Check that VLRLR doesn't allow an index.
  78 define <16 x i8> @test_vlrlr4(i8 *%base, i64 %index, i32 %length) {
  79 ; CHECK-LABEL: test_vlrlr4:
  80 ; CHECK: vlrlr %v24, %r4, 0({{%r[1-5]}})
  81 ; CHECK: br %r14
  82   %ptr = getelementptr i8, i8 *%base, i64 %index
  83   %res = call <16 x i8> @llvm.s390.vlrl(i32 %length, i8 *%ptr)
  84   ret <16 x i8> %res
  85 }
  86
  87 ; VLRL with the lowest in-range displacement.
  88 define <16 x i8> @test_vlrl1(i8 *%ptr) {
  89 ; CHECK-LABEL: test_vlrl1:
  90 ; CHECK: vlrl %v24, 0(%r2), 0
  91 ; CHECK: br %r14
  92   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
  93   ret <16 x i8> %res
  94 }
  95
  96 ; VLRL with the highest in-range displacement.
  97 define <16 x i8> @test_vlrl2(i8 *%base) {
  98 ; CHECK-LABEL: test_vlrl2:
  99 ; CHECK: vlrl %v24, 4095(%r2), 0
 100 ; CHECK: br %r14
 101   %ptr = getelementptr i8, i8 *%base, i64 4095
 102   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
 103   ret <16 x i8> %res
 104 }
 105
 106 ; VLRL with an out-of-range displacement.
 107 define <16 x i8> @test_vlrl3(i8 *%base) {
 108 ; CHECK-LABEL: test_vlrl3:
 109 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
 110 ; CHECK: br %r14
 111   %ptr = getelementptr i8, i8 *%base, i64 4096
 112   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
 113   ret <16 x i8> %res
 114 }
 115
 116 ; Check that VLRL doesn't allow an index.
 117 define <16 x i8> @test_vlrl4(i8 *%base, i64 %index) {
 118 ; CHECK-LABEL: test_vlrl4:
 119 ; CHECK: vlrl %v24, 0({{%r[1-5]}}), 0
 120 ; CHECK: br %r14
 121   %ptr = getelementptr i8, i8 *%base, i64 %index
 122   %res = call <16 x i8> @llvm.s390.vlrl(i32 0, i8 *%ptr)
 123   ret <16 x i8> %res
 124 }
 125
 126 ; VSTRLR with the lowest in-range displacement.
 127 define void @test_vstrlr1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
 128 ; CHECK-LABEL: test_vstrlr1:
 129 ; CHECK: vstrlr %v24, %r3, 0(%r2)
 130 ; CHECK: br %r14
 131   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
 132   ret void
 133 }
 134
 135 ; VSTRLR with the highest in-range displacement.
 136 define void @test_vstrlr2(<16 x i8> %vec, i8 *%base, i32 %length) {
 137 ; CHECK-LABEL: test_vstrlr2:
 138 ; CHECK: vstrlr %v24, %r3, 4095(%r2)
 139 ; CHECK: br %r14
 140   %ptr = getelementptr i8, i8 *%base, i64 4095
 141   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
 142   ret void
 143 }
 144
 145 ; VSTRLR with an out-of-range displacement.
 146 define void @test_vstrlr3(<16 x i8> %vec, i8 *%base, i32 %length) {
 147 ; CHECK-LABEL: test_vstrlr3:
 148 ; CHECK: vstrlr %v24, %r3, 0({{%r[1-5]}})
 149 ; CHECK: br %r14
 150   %ptr = getelementptr i8, i8 *%base, i64 4096
 151   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
 152   ret void
 153 }
 154
 155 ; Check that VSTRLR doesn't allow an index.
 156 define void @test_vstrlr4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
 157 ; CHECK-LABEL: test_vstrlr4:
 158 ; CHECK: vstrlr %v24, %r4, 0({{%r[1-5]}})
 159 ; CHECK: br %r14
 160   %ptr = getelementptr i8, i8 *%base, i64 %index
 161   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 %length, i8 *%ptr)
 162   ret void
 163 }
 164
 165 ; VSTRL with the lowest in-range displacement.
 166 define void @test_vstrl1(<16 x i8> %vec, i8 *%ptr) {
 167 ; CHECK-LABEL: test_vstrl1:
 168 ; CHECK: vstrl %v24, 0(%r2), 8
 169 ; CHECK: br %r14
 170   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
 171   ret void
 172 }
 173
 174 ; VSTRL with the highest in-range displacement.
 175 define void @test_vstrl2(<16 x i8> %vec, i8 *%base) {
 176 ; CHECK-LABEL: test_vstrl2:
 177 ; CHECK: vstrl %v24, 4095(%r2), 8
 178 ; CHECK: br %r14
 179   %ptr = getelementptr i8, i8 *%base, i64 4095
 180   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
 181   ret void
 182 }
 183
 184 ; VSTRL with an out-of-range displacement.
 185 define void @test_vstrl3(<16 x i8> %vec, i8 *%base) {
 186 ; CHECK-LABEL: test_vstrl3:
 187 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
 188 ; CHECK: br %r14
 189   %ptr = getelementptr i8, i8 *%base, i64 4096
 190   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
 191   ret void
 192 }
 193
 194 ; Check that VSTRL doesn't allow an index.
 195 define void @test_vstrl4(<16 x i8> %vec, i8 *%base, i64 %index) {
 196 ; CHECK-LABEL: test_vstrl4:
 197 ; CHECK: vstrl %v24, 0({{%r[1-5]}}), 8
 198 ; CHECK: br %r14
 199   %ptr = getelementptr i8, i8 *%base, i64 %index
 200   call void @llvm.s390.vstrl(<16 x i8> %vec, i32 8, i8 *%ptr)
 201   ret void
 202 }
 203
 204 ; VFCESBS with no processing of the result.
 205 define i32 @test_vfcesbs(<4 x float> %a, <4 x float> %b) {
 206 ; CHECK-LABEL: test_vfcesbs:
 207 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
 208 ; CHECK: ipm %r2
 209 ; CHECK: srl %r2, 28
 210 ; CHECK: br %r14
 211   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
 212                                                    <4 x float> %b)
 213   %res = extractvalue {<4 x i32>, i32} %call, 1
 214   ret i32 %res
 215 }
 216
 217 ; VFCESBS, returning 1 if any elements are equal (CC != 3).
 218 define i32 @test_vfcesbs_any_bool(<4 x float> %a, <4 x float> %b) {
 219 ; CHECK-LABEL: test_vfcesbs_any_bool:
 220 ; CHECK: vfcesbs {{%v[0-9]+}}, %v24, %v26
 221 ; CHECK: lhi %r2, 0
 222 ; CHECK: lochile %r2, 1
 223 ; CHECK: br %r14
 224   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
 225                                                    <4 x float> %b)
 226   %res = extractvalue {<4 x i32>, i32} %call, 1
 227   %cmp = icmp ne i32 %res, 3
 228   %ext = zext i1 %cmp to i32
 229   ret i32 %ext
 230 }
 231
 232 ; VFCESBS, storing to %ptr if any elements are equal.
 233 define <4 x i32> @test_vfcesbs_any_store(<4 x float> %a, <4 x float> %b,
 234                                          i32 *%ptr) {
 235 ; CHECK-LABEL: test_vfcesbs_any_store:
 236 ; CHECK-NOT: %r
 237 ; CHECK: vfcesbs %v24, %v24, %v26
 238 ; CHECK-NEXT: {{bor|bnler}} %r14
 239 ; CHECK: mvhi 0(%r2), 0
 240 ; CHECK: br %r14
 241   %call = call {<4 x i32>, i32} @llvm.s390.vfcesbs(<4 x float> %a,
 242                                                    <4 x float> %b)
 243   %res = extractvalue {<4 x i32>, i32} %call, 0
 244   %cc = extractvalue {<4 x i32>, i32} %call, 1
 245   %cmp = icmp ule i32 %cc, 2
 246   br i1 %cmp, label %store, label %exit
 247
 248 store:
 249   store i32 0, i32 *%ptr
 250   br label %exit
 251
 252 exit:
 253   ret <4 x i32> %res
 254 }
 255
 256 ; VFCHSBS with no processing of the result.
 257 define i32 @test_vfchsbs(<4 x float> %a, <4 x float> %b) {
 258 ; CHECK-LABEL: test_vfchsbs:
 259 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
 260 ; CHECK: ipm %r2
 261 ; CHECK: srl %r2, 28
 262 ; CHECK: br %r14
 263   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
 264                                                    <4 x float> %b)
 265   %res = extractvalue {<4 x i32>, i32} %call, 1
 266   ret i32 %res
 267 }
 268
 269 ; VFCHSBS, returning 1 if not all elements are higher.
 270 define i32 @test_vfchsbs_notall_bool(<4 x float> %a, <4 x float> %b) {
 271 ; CHECK-LABEL: test_vfchsbs_notall_bool:
 272 ; CHECK: vfchsbs {{%v[0-9]+}}, %v24, %v26
 273 ; CHECK: lhi %r2, 0
 274 ; CHECK: lochinhe %r2, 1
 275 ; CHECK: br %r14
 276   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
 277                                                    <4 x float> %b)
 278   %res = extractvalue {<4 x i32>, i32} %call, 1
 279   %cmp = icmp sge i32 %res, 1
 280   %ext = zext i1 %cmp to i32
 281   ret i32 %ext
 282 }
 283
 284 ; VFCHSBS, storing to %ptr if not all elements are higher.
 285 define <4 x i32> @test_vfchsbs_notall_store(<4 x float> %a, <4 x float> %b,
 286                                             i32 *%ptr) {
 287 ; CHECK-LABEL: test_vfchsbs_notall_store:
 288 ; CHECK-NOT: %r
 289 ; CHECK: vfchsbs %v24, %v24, %v26
 290 ; CHECK-NEXT: {{bher|ber}} %r14
 291 ; CHECK: mvhi 0(%r2), 0
 292 ; CHECK: br %r14
 293   %call = call {<4 x i32>, i32} @llvm.s390.vfchsbs(<4 x float> %a,
 294                                                    <4 x float> %b)
 295   %res = extractvalue {<4 x i32>, i32} %call, 0
 296   %cc = extractvalue {<4 x i32>, i32} %call, 1
 297   %cmp = icmp ugt i32 %cc, 0
 298   br i1 %cmp, label %store, label %exit
 299
 300 store:
 301   store i32 0, i32 *%ptr
 302   br label %exit
 303
 304 exit:
 305   ret <4 x i32> %res
 306 }
 307
 308 ; VFCHESBS with no processing of the result.
 309 define i32 @test_vfchesbs(<4 x float> %a, <4 x float> %b) {
 310 ; CHECK-LABEL: test_vfchesbs:
 311 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
 312 ; CHECK: ipm %r2
 313 ; CHECK: srl %r2, 28
 314 ; CHECK: br %r14
 315   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
 316                                                     <4 x float> %b)
 317   %res = extractvalue {<4 x i32>, i32} %call, 1
 318   ret i32 %res
 319 }
 320
 321 ; VFCHESBS, returning 1 if neither element is higher or equal.
 322 define i32 @test_vfchesbs_none_bool(<4 x float> %a, <4 x float> %b) {
 323 ; CHECK-LABEL: test_vfchesbs_none_bool:
 324 ; CHECK: vfchesbs {{%v[0-9]+}}, %v24, %v26
 325 ; CHECK: lhi %r2, 0
 326 ; CHECK: lochio %r2, 1
 327 ; CHECK: br %r14
 328   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
 329                                                     <4 x float> %b)
 330   %res = extractvalue {<4 x i32>, i32} %call, 1
 331   %cmp = icmp eq i32 %res, 3
 332   %ext = zext i1 %cmp to i32
 333   ret i32 %ext
 334 }
 335
 336 ; VFCHESBS, storing to %ptr if neither element is higher or equal.
 337 define <4 x i32> @test_vfchesbs_none_store(<4 x float> %a, <4 x float> %b,
 338                                            i32 *%ptr) {
 339 ; CHECK-LABEL: test_vfchesbs_none_store:
 340 ; CHECK-NOT: %r
 341 ; CHECK: vfchesbs %v24, %v24, %v26
 342 ; CHECK-NEXT: {{bnor|bler}} %r14
 343 ; CHECK: mvhi 0(%r2), 0
 344 ; CHECK: br %r14
 345   %call = call {<4 x i32>, i32} @llvm.s390.vfchesbs(<4 x float> %a,
 346                                                     <4 x float> %b)
 347   %res = extractvalue {<4 x i32>, i32} %call, 0
 348   %cc = extractvalue {<4 x i32>, i32} %call, 1
 349   %cmp = icmp uge i32 %cc, 3
 350   br i1 %cmp, label %store, label %exit
 351
 352 store:
 353   store i32 0, i32 *%ptr
 354   br label %exit
 355
 356 exit:
 357   ret <4 x i32> %res
 358 }
 359
 360 ; VFTCISB with the lowest useful class selector and no processing of the result.
 361 define i32 @test_vftcisb(<4 x float> %a) {
 362 ; CHECK-LABEL: test_vftcisb:
 363 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 1
 364 ; CHECK: ipm %r2
 365 ; CHECK: srl %r2, 28
 366 ; CHECK: br %r14
 367   %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 1)
 368   %res = extractvalue {<4 x i32>, i32} %call, 1
 369   ret i32 %res
 370 }
 371
 372 ; VFTCISB with the highest useful class selector, returning 1 if all elements
 373 ; have the right class (CC == 0).
 374 define i32 @test_vftcisb_all_bool(<4 x float> %a) {
 375 ; CHECK-LABEL: test_vftcisb_all_bool:
 376 ; CHECK: vftcisb {{%v[0-9]+}}, %v24, 4094
 377 ; CHECK: lhi %r2, 0
 378 ; CHECK: lochie %r2, 1
 379 ; CHECK: br %r14
 380   %call = call {<4 x i32>, i32} @llvm.s390.vftcisb(<4 x float> %a, i32 4094)
 381   %res = extractvalue {<4 x i32>, i32} %call, 1
 382   %cmp = icmp eq i32 %res, 0
 383   %ext = zext i1 %cmp to i32
 384   ret i32 %ext
 385 }
 386
 387 ; VFISB with a rounding mode not usable via standard intrinsics.
 388 define <4 x float> @test_vfisb_0_4(<4 x float> %a) {
 389 ; CHECK-LABEL: test_vfisb_0_4:
 390 ; CHECK: vfisb %v24, %v24, 0, 4
 391 ; CHECK: br %r14
 392   %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 0, i32 4)
 393   ret <4 x float> %res
 394 }
 395
 396 ; VFISB with IEEE-inexact exception suppressed.
 397 define <4 x float> @test_vfisb_4_0(<4 x float> %a) {
 398 ; CHECK-LABEL: test_vfisb_4_0:
 399 ; CHECK: vfisb %v24, %v24, 4, 0
 400 ; CHECK: br %r14
 401   %res = call <4 x float> @llvm.s390.vfisb(<4 x float> %a, i32 4, i32 0)
 402   ret <4 x float> %res
 403 }
 404
 405 ; VFMAXDB.
 406 define <2 x double> @test_vfmaxdb(<2 x double> %a, <2 x double> %b) {
 407 ; CHECK-LABEL: test_vfmaxdb:
 408 ; CHECK: vfmaxdb %v24, %v24, %v26, 4
 409 ; CHECK: br %r14
 410   %res = call <2 x double> @llvm.s390.vfmaxdb(<2 x double> %a, <2 x double> %b, i32 4)
 411   ret <2 x double> %res
 412 }
 413
 414 ; VFMINDB.
 415 define <2 x double> @test_vfmindb(<2 x double> %a, <2 x double> %b) {
 416 ; CHECK-LABEL: test_vfmindb:
 417 ; CHECK: vfmindb %v24, %v24, %v26, 4
 418 ; CHECK: br %r14
 419   %res = call <2 x double> @llvm.s390.vfmindb(<2 x double> %a, <2 x double> %b, i32 4)
 420   ret <2 x double> %res
 421 }
 422
 423 ; VFMAXSB.
 424 define <4 x float> @test_vfmaxsb(<4 x float> %a, <4 x float> %b) {
 425 ; CHECK-LABEL: test_vfmaxsb:
 426 ; CHECK: vfmaxsb %v24, %v24, %v26, 4
 427 ; CHECK: br %r14
 428   %res = call <4 x float> @llvm.s390.vfmaxsb(<4 x float> %a, <4 x float> %b, i32 4)
 429   ret <4 x float> %res
 430 }
 431
 432 ; VFMINSB.
 433 define <4 x float> @test_vfminsb(<4 x float> %a, <4 x float> %b) {
 434 ; CHECK-LABEL: test_vfminsb:
 435 ; CHECK: vfminsb %v24, %v24, %v26, 4
 436 ; CHECK: br %r14
 437   %res = call <4 x float> @llvm.s390.vfminsb(<4 x float> %a, <4 x float> %b, i32 4)
 438   ret <4 x float> %res
 439 }
 440