llvm/test/CodeGen/Thumb2/cde-vec.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+cdecp0 -mattr=+cdecp1 -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
   3
   4 declare <16 x i8> @llvm.arm.cde.vcx1q(i32 immarg, i32 immarg)
   5 declare <16 x i8> @llvm.arm.cde.vcx1qa(i32 immarg, <16 x i8>, i32 immarg)
   6 declare <16 x i8> @llvm.arm.cde.vcx2q(i32 immarg, <16 x i8>, i32 immarg)
   7 declare <16 x i8> @llvm.arm.cde.vcx2qa(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
   8 declare <16 x i8> @llvm.arm.cde.vcx3q(i32 immarg, <16 x i8>, <16 x i8>, i32 immarg)
   9 declare <16 x i8> @llvm.arm.cde.vcx3qa(i32 immarg, <16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)
  10
  11 define arm_aapcs_vfpcc <16 x i8> @test_vcx1q_u8() {
  12 ; CHECK-LABEL: test_vcx1q_u8:
  13 ; CHECK:       @ %bb.0: @ %entry
  14 ; CHECK-NEXT:    vcx1 p0, q0, #1111
  15 ; CHECK-NEXT:    bx lr
  16 entry:
  17   %0 = call <16 x i8> @llvm.arm.cde.vcx1q(i32 0, i32 1111)
  18   ret <16 x i8> %0
  19 }
  20
  21 define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_1(<16 x i8> %acc) {
  22 ; CHECK-LABEL: test_vcx1qa_1:
  23 ; CHECK:       @ %bb.0: @ %entry
  24 ; CHECK-NEXT:    vcx1a p1, q0, #1112
  25 ; CHECK-NEXT:    bx lr
  26 entry:
  27   %0 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 1, <16 x i8> %acc, i32 1112)
  28   ret <16 x i8> %0
  29 }
  30
  31 define arm_aapcs_vfpcc <4 x i32> @test_vcx1qa_2(<4 x i32> %acc) {
  32 ; CHECK-LABEL: test_vcx1qa_2:
  33 ; CHECK:       @ %bb.0: @ %entry
  34 ; CHECK-NEXT:    vcx1a p0, q0, #1113
  35 ; CHECK-NEXT:    bx lr
  36 entry:
  37   %0 = bitcast <4 x i32> %acc to <16 x i8>
  38   %1 = call <16 x i8> @llvm.arm.cde.vcx1qa(i32 0, <16 x i8> %0, i32 1113)
  39   %2 = bitcast <16 x i8> %1 to <4 x i32>
  40   ret <4 x i32> %2
  41 }
  42
  43 define arm_aapcs_vfpcc <16 x i8> @test_vcx2q_u8(<8 x half> %n) {
  44 ; CHECK-LABEL: test_vcx2q_u8:
  45 ; CHECK:       @ %bb.0: @ %entry
  46 ; CHECK-NEXT:    vcx2 p1, q0, q0, #111
  47 ; CHECK-NEXT:    bx lr
  48 entry:
  49   %0 = bitcast <8 x half> %n to <16 x i8>
  50   %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 111)
  51   ret <16 x i8> %1
  52 }
  53
  54 define arm_aapcs_vfpcc <4 x float> @test_vcx2q(<4 x float> %n) {
  55 ; CHECK-LABEL: test_vcx2q:
  56 ; CHECK:       @ %bb.0: @ %entry
  57 ; CHECK-NEXT:    vcx2 p1, q0, q0, #112
  58 ; CHECK-NEXT:    bx lr
  59 entry:
  60   %0 = bitcast <4 x float> %n to <16 x i8>
  61   %1 = call <16 x i8> @llvm.arm.cde.vcx2q(i32 1, <16 x i8> %0, i32 112)
  62   %2 = bitcast <16 x i8> %1 to <4 x float>
  63   ret <4 x float> %2
  64 }
  65
  66 define arm_aapcs_vfpcc <4 x float> @test_vcx2qa(<4 x float> %acc, <2 x i64> %n) {
  67 ; CHECK-LABEL: test_vcx2qa:
  68 ; CHECK:       @ %bb.0: @ %entry
  69 ; CHECK-NEXT:    vcx2a p0, q0, q1, #113
  70 ; CHECK-NEXT:    bx lr
  71 entry:
  72   %0 = bitcast <4 x float> %acc to <16 x i8>
  73   %1 = bitcast <2 x i64> %n to <16 x i8>
  74   %2 = call <16 x i8> @llvm.arm.cde.vcx2qa(i32 0, <16 x i8> %0, <16 x i8> %1, i32 113)
  75   %3 = bitcast <16 x i8> %2 to <4 x float>
  76   ret <4 x float> %3
  77 }
  78
  79 define arm_aapcs_vfpcc <16 x i8> @test_vcx3q_u8(<8 x i16> %n, <4 x i32> %m) {
  80 ; CHECK-LABEL: test_vcx3q_u8:
  81 ; CHECK:       @ %bb.0: @ %entry
  82 ; CHECK-NEXT:    vcx3 p0, q0, q0, q1, #11
  83 ; CHECK-NEXT:    bx lr
  84 entry:
  85   %0 = bitcast <8 x i16> %n to <16 x i8>
  86   %1 = bitcast <4 x i32> %m to <16 x i8>
  87   %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 0, <16 x i8> %0, <16 x i8> %1, i32 11)
  88   ret <16 x i8> %2
  89 }
  90
  91 define arm_aapcs_vfpcc <2 x i64> @test_vcx3q(<2 x i64> %n, <4 x float> %m) {
  92 ; CHECK-LABEL: test_vcx3q:
  93 ; CHECK:       @ %bb.0: @ %entry
  94 ; CHECK-NEXT:    vcx3 p1, q0, q0, q1, #12
  95 ; CHECK-NEXT:    bx lr
  96 entry:
  97   %0 = bitcast <2 x i64> %n to <16 x i8>
  98   %1 = bitcast <4 x float> %m to <16 x i8>
  99   %2 = call <16 x i8> @llvm.arm.cde.vcx3q(i32 1, <16 x i8> %0, <16 x i8> %1, i32 12)
 100   %3 = bitcast <16 x i8> %2 to <2 x i64>
 101   ret <2 x i64> %3
 102 }
 103
 104 define arm_aapcs_vfpcc <16 x i8> @test_vcx3qa(<16 x i8> %acc, <8 x i16> %n, <4 x float> %m) {
 105 ; CHECK-LABEL: test_vcx3qa:
 106 ; CHECK:       @ %bb.0: @ %entry
 107 ; CHECK-NEXT:    vcx3a p1, q0, q1, q2, #13
 108 ; CHECK-NEXT:    bx lr
 109 entry:
 110   %0 = bitcast <8 x i16> %n to <16 x i8>
 111   %1 = bitcast <4 x float> %m to <16 x i8>
 112   %2 = call <16 x i8> @llvm.arm.cde.vcx3qa(i32 1, <16 x i8> %acc, <16 x i8> %0, <16 x i8> %1, i32 13)
 113   ret <16 x i8> %2
 114 }
 115
 116 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
 117 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
 118 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
 119 declare <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 immarg, <8 x i16>, i32 immarg, <8 x i1>)
 120 declare <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 immarg, <16 x i8>, i32 immarg, <16 x i1>)
 121 declare <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 immarg, <4 x i32>, <16 x i8>, i32 immarg, <4 x i1>)
 122 declare <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, i32 immarg, <4 x i1>)
 123 declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
 124 declare <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 immarg, <4 x float>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
 125
 126 define arm_aapcs_vfpcc <8 x i16> @test_vcx1q_m(<8 x i16> %inactive, i16 zeroext %p) {
 127 ; CHECK-LABEL: test_vcx1q_m:
 128 ; CHECK:       @ %bb.0: @ %entry
 129 ; CHECK-NEXT:    vmsr p0, r0
 130 ; CHECK-NEXT:    vpst
 131 ; CHECK-NEXT:    vcx1t p0, q0, #1111
 132 ; CHECK-NEXT:    bx lr
 133 entry:
 134   %0 = zext i16 %p to i32
 135   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 136   %2 = call <8 x i16> @llvm.arm.cde.vcx1q.predicated.v8i16.v8i1(i32 0, <8 x i16> %inactive, i32 1111, <8 x i1> %1)
 137   ret <8 x i16> %2
 138 }
 139
 140 define arm_aapcs_vfpcc <16 x i8> @test_vcx1qa_m(<16 x i8> %acc, i16 zeroext %p) {
 141 ; CHECK-LABEL: test_vcx1qa_m:
 142 ; CHECK:       @ %bb.0: @ %entry
 143 ; CHECK-NEXT:    vmsr p0, r0
 144 ; CHECK-NEXT:    vpst
 145 ; CHECK-NEXT:    vcx1at p1, q0, #1112
 146 ; CHECK-NEXT:    bx lr
 147 entry:
 148   %0 = zext i16 %p to i32
 149   %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
 150   %2 = call <16 x i8> @llvm.arm.cde.vcx1qa.predicated.v16i8.v16i1(i32 1, <16 x i8> %acc, i32 1112, <16 x i1> %1)
 151   ret <16 x i8> %2
 152 }
 153
 154 define arm_aapcs_vfpcc <4 x i32> @test_vcx2q_m(<4 x i32> %inactive, <4 x float> %n, i16 zeroext %p) {
 155 ; CHECK-LABEL: test_vcx2q_m:
 156 ; CHECK:       @ %bb.0: @ %entry
 157 ; CHECK-NEXT:    vmsr p0, r0
 158 ; CHECK-NEXT:    vpst
 159 ; CHECK-NEXT:    vcx2t p0, q0, q1, #111
 160 ; CHECK-NEXT:    bx lr
 161 entry:
 162   %0 = bitcast <4 x float> %n to <16 x i8>
 163   %1 = zext i16 %p to i32
 164   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
 165   %3 = call <4 x i32> @llvm.arm.cde.vcx2q.predicated.v4i32.v4i1(i32 0, <4 x i32> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
 166   ret <4 x i32> %3
 167 }
 168
 169 define arm_aapcs_vfpcc <4 x float> @test_vcx2qa_m(<4 x float> %acc, <8 x half> %n, i16 zeroext %p) {
 170 ; CHECK-LABEL: test_vcx2qa_m:
 171 ; CHECK:       @ %bb.0: @ %entry
 172 ; CHECK-NEXT:    vmsr p0, r0
 173 ; CHECK-NEXT:    vpst
 174 ; CHECK-NEXT:    vcx2at p0, q0, q1, #112
 175 ; CHECK-NEXT:    bx lr
 176 entry:
 177   %0 = bitcast <8 x half> %n to <16 x i8>
 178   %1 = zext i16 %p to i32
 179   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
 180   %3 = call <4 x float> @llvm.arm.cde.vcx2qa.predicated.v4f32.v4i1(i32 0, <4 x float> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
 181   ret <4 x float> %3
 182 }
 183
 184 define arm_aapcs_vfpcc <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
 185 ; CHECK-LABEL: test_vcx3q_m:
 186 ; CHECK:       @ %bb.0: @ %entry
 187 ; CHECK-NEXT:    vmsr p0, r0
 188 ; CHECK-NEXT:    vpst
 189 ; CHECK-NEXT:    vcx3t p0, q0, q1, q2, #11
 190 ; CHECK-NEXT:    bx lr
 191 entry:
 192   %0 = bitcast <4 x float> %n to <16 x i8>
 193   %1 = zext i16 %p to i32
 194   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
 195   %3 = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2)
 196   ret <2 x i64> %3
 197 }
 198
 199 define arm_aapcs_vfpcc <8 x half> @test_vcx3qa_m(<4 x float> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
 200 ; CHECK-LABEL: test_vcx3qa_m:
 201 ; CHECK:       @ %bb.0: @ %entry
 202 ; CHECK-NEXT:    vmsr p0, r0
 203 ; CHECK-NEXT:    vpst
 204 ; CHECK-NEXT:    vcx3at p0, q0, q1, q2, #12
 205 ; CHECK-NEXT:    bx lr
 206 entry:
 207   %0 = bitcast <8 x half> %n to <16 x i8>
 208   %1 = bitcast <4 x i32> %m to <16 x i8>
 209   %2 = zext i16 %p to i32
 210   %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
 211   %4 = call <4 x float> @llvm.arm.cde.vcx3qa.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
 212   %5 = bitcast <4 x float> %4 to <8 x half>
 213   ret <8 x half> %5
 214 }