llvm/test/CodeGen/Thumb2/mve-intrinsics/vcmulq.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
   3
   4 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
   5 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
   6
   7 declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>)
   8 declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>)
   9
  10 declare <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>)
  11 declare <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>)
  12
  13 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_f16(<8 x half> %a, <8 x half> %b) {
  14 ; CHECK-LABEL: test_vcmulq_f16:
  15 ; CHECK:       @ %bb.0: @ %entry
  16 ; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #0
  17 ; CHECK-NEXT:    bx lr
  18 entry:
  19   %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 0, <8 x half> %a, <8 x half> %b)
  20   ret <8 x half> %0
  21 }
  22
  23 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_f32(<4 x float> %a, <4 x float> %b) {
  24 ; CHECK-LABEL: test_vcmulq_f32:
  25 ; CHECK:       @ %bb.0: @ %entry
  26 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #0
  27 ; CHECK-NEXT:    vmov q0, q2
  28 ; CHECK-NEXT:    bx lr
  29 entry:
  30   %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %a, <4 x float> %b)
  31   ret <4 x float> %0
  32 }
  33
  34 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_f16(<8 x half> %a, <8 x half> %b) {
  35 ; CHECK-LABEL: test_vcmulq_rot90_f16:
  36 ; CHECK:       @ %bb.0: @ %entry
  37 ; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #90
  38 ; CHECK-NEXT:    bx lr
  39 entry:
  40   %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 1, <8 x half> %a, <8 x half> %b)
  41   ret <8 x half> %0
  42 }
  43
  44 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_f32(<4 x float> %a, <4 x float> %b) {
  45 ; CHECK-LABEL: test_vcmulq_rot90_f32:
  46 ; CHECK:       @ %bb.0: @ %entry
  47 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #90
  48 ; CHECK-NEXT:    vmov q0, q2
  49 ; CHECK-NEXT:    bx lr
  50 entry:
  51   %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %a, <4 x float> %b)
  52   ret <4 x float> %0
  53 }
  54
  55 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_f16(<8 x half> %a, <8 x half> %b) {
  56 ; CHECK-LABEL: test_vcmulq_rot180_f16:
  57 ; CHECK:       @ %bb.0: @ %entry
  58 ; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #180
  59 ; CHECK-NEXT:    bx lr
  60 entry:
  61   %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %a, <8 x half> %b)
  62   ret <8 x half> %0
  63 }
  64
  65 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_f32(<4 x float> %a, <4 x float> %b) {
  66 ; CHECK-LABEL: test_vcmulq_rot180_f32:
  67 ; CHECK:       @ %bb.0: @ %entry
  68 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #180
  69 ; CHECK-NEXT:    vmov q0, q2
  70 ; CHECK-NEXT:    bx lr
  71 entry:
  72   %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 2, <4 x float> %a, <4 x float> %b)
  73   ret <4 x float> %0
  74 }
  75
  76 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_f16(<8 x half> %a, <8 x half> %b) {
  77 ; CHECK-LABEL: test_vcmulq_rot270_f16:
  78 ; CHECK:       @ %bb.0: @ %entry
  79 ; CHECK-NEXT:    vcmul.f16 q0, q0, q1, #270
  80 ; CHECK-NEXT:    bx lr
  81 entry:
  82   %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %a, <8 x half> %b)
  83   ret <8 x half> %0
  84 }
  85
  86 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_f32(<4 x float> %a, <4 x float> %b) {
  87 ; CHECK-LABEL: test_vcmulq_rot270_f32:
  88 ; CHECK:       @ %bb.0: @ %entry
  89 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #270
  90 ; CHECK-NEXT:    vmov q0, q2
  91 ; CHECK-NEXT:    bx lr
  92 entry:
  93   %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 3, <4 x float> %a, <4 x float> %b)
  94   ret <4 x float> %0
  95 }
  96
  97 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
  98 ; CHECK-LABEL: test_vcmulq_m_f16:
  99 ; CHECK:       @ %bb.0: @ %entry
 100 ; CHECK-NEXT:    vmsr p0, r0
 101 ; CHECK-NEXT:    vpst
 102 ; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #0
 103 ; CHECK-NEXT:    bx lr
 104 entry:
 105   %0 = zext i16 %p to i32
 106   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 107   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 108   ret <8 x half> %2
 109 }
 110
 111 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 112 ; CHECK-LABEL: test_vcmulq_m_f32:
 113 ; CHECK:       @ %bb.0: @ %entry
 114 ; CHECK-NEXT:    vmsr p0, r0
 115 ; CHECK-NEXT:    vpst
 116 ; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #0
 117 ; CHECK-NEXT:    bx lr
 118 entry:
 119   %0 = zext i16 %p to i32
 120   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 121   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 122   ret <4 x float> %2
 123 }
 124
 125 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 126 ; CHECK-LABEL: test_vcmulq_rot90_m_f16:
 127 ; CHECK:       @ %bb.0: @ %entry
 128 ; CHECK-NEXT:    vmsr p0, r0
 129 ; CHECK-NEXT:    vpst
 130 ; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #90
 131 ; CHECK-NEXT:    bx lr
 132 entry:
 133   %0 = zext i16 %p to i32
 134   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 135   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 136   ret <8 x half> %2
 137 }
 138
 139 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 140 ; CHECK-LABEL: test_vcmulq_rot90_m_f32:
 141 ; CHECK:       @ %bb.0: @ %entry
 142 ; CHECK-NEXT:    vmsr p0, r0
 143 ; CHECK-NEXT:    vpst
 144 ; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #90
 145 ; CHECK-NEXT:    bx lr
 146 entry:
 147   %0 = zext i16 %p to i32
 148   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 149   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 150   ret <4 x float> %2
 151 }
 152
 153 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 154 ; CHECK-LABEL: test_vcmulq_rot180_m_f16:
 155 ; CHECK:       @ %bb.0: @ %entry
 156 ; CHECK-NEXT:    vmsr p0, r0
 157 ; CHECK-NEXT:    vpst
 158 ; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #180
 159 ; CHECK-NEXT:    bx lr
 160 entry:
 161   %0 = zext i16 %p to i32
 162   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 163   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 164   ret <8 x half> %2
 165 }
 166
 167 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 168 ; CHECK-LABEL: test_vcmulq_rot180_m_f32:
 169 ; CHECK:       @ %bb.0: @ %entry
 170 ; CHECK-NEXT:    vmsr p0, r0
 171 ; CHECK-NEXT:    vpst
 172 ; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #180
 173 ; CHECK-NEXT:    bx lr
 174 entry:
 175   %0 = zext i16 %p to i32
 176   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 177   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 178   ret <4 x float> %2
 179 }
 180
 181 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 182 ; CHECK-LABEL: test_vcmulq_rot270_m_f16:
 183 ; CHECK:       @ %bb.0: @ %entry
 184 ; CHECK-NEXT:    vmsr p0, r0
 185 ; CHECK-NEXT:    vpst
 186 ; CHECK-NEXT:    vcmult.f16 q0, q1, q2, #270
 187 ; CHECK-NEXT:    bx lr
 188 entry:
 189   %0 = zext i16 %p to i32
 190   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 191   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 192   ret <8 x half> %2
 193 }
 194
 195 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 196 ; CHECK-LABEL: test_vcmulq_rot270_m_f32:
 197 ; CHECK:       @ %bb.0: @ %entry
 198 ; CHECK-NEXT:    vmsr p0, r0
 199 ; CHECK-NEXT:    vpst
 200 ; CHECK-NEXT:    vcmult.f32 q0, q1, q2, #270
 201 ; CHECK-NEXT:    bx lr
 202 entry:
 203   %0 = zext i16 %p to i32
 204   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 205   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 206   ret <4 x float> %2
 207 }
 208
 209 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 210 ; CHECK-LABEL: test_vcmulq_x_f16:
 211 ; CHECK:       @ %bb.0: @ %entry
 212 ; CHECK-NEXT:    vmsr p0, r0
 213 ; CHECK-NEXT:    vpst
 214 ; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #0
 215 ; CHECK-NEXT:    bx lr
 216 entry:
 217   %0 = zext i16 %p to i32
 218   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 219   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 220   ret <8 x half> %2
 221 }
 222
 223 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 224 ; CHECK-LABEL: test_vcmulq_x_f32:
 225 ; CHECK:       @ %bb.0: @ %entry
 226 ; CHECK-NEXT:    vmsr p0, r0
 227 ; CHECK-NEXT:    vpst
 228 ; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #0
 229 ; CHECK-NEXT:    vmov q0, q2
 230 ; CHECK-NEXT:    bx lr
 231 entry:
 232   %0 = zext i16 %p to i32
 233   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 234   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 235   ret <4 x float> %2
 236 }
 237
 238 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 239 ; CHECK-LABEL: test_vcmulq_rot90_x_f16:
 240 ; CHECK:       @ %bb.0: @ %entry
 241 ; CHECK-NEXT:    vmsr p0, r0
 242 ; CHECK-NEXT:    vpst
 243 ; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #90
 244 ; CHECK-NEXT:    bx lr
 245 entry:
 246   %0 = zext i16 %p to i32
 247   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 248   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 249   ret <8 x half> %2
 250 }
 251
 252 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 253 ; CHECK-LABEL: test_vcmulq_rot90_x_f32:
 254 ; CHECK:       @ %bb.0: @ %entry
 255 ; CHECK-NEXT:    vmsr p0, r0
 256 ; CHECK-NEXT:    vpst
 257 ; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #90
 258 ; CHECK-NEXT:    vmov q0, q2
 259 ; CHECK-NEXT:    bx lr
 260 entry:
 261   %0 = zext i16 %p to i32
 262   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 263   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 264   ret <4 x float> %2
 265 }
 266
 267 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 268 ; CHECK-LABEL: test_vcmulq_rot180_x_f16:
 269 ; CHECK:       @ %bb.0: @ %entry
 270 ; CHECK-NEXT:    vmsr p0, r0
 271 ; CHECK-NEXT:    vpst
 272 ; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #180
 273 ; CHECK-NEXT:    bx lr
 274 entry:
 275   %0 = zext i16 %p to i32
 276   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 277   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 278   ret <8 x half> %2
 279 }
 280
 281 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 282 ; CHECK-LABEL: test_vcmulq_rot180_x_f32:
 283 ; CHECK:       @ %bb.0: @ %entry
 284 ; CHECK-NEXT:    vmsr p0, r0
 285 ; CHECK-NEXT:    vpst
 286 ; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #180
 287 ; CHECK-NEXT:    vmov q0, q2
 288 ; CHECK-NEXT:    bx lr
 289 entry:
 290   %0 = zext i16 %p to i32
 291   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 292   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 293   ret <4 x float> %2
 294 }
 295
 296 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
 297 ; CHECK-LABEL: test_vcmulq_rot270_x_f16:
 298 ; CHECK:       @ %bb.0: @ %entry
 299 ; CHECK-NEXT:    vmsr p0, r0
 300 ; CHECK-NEXT:    vpst
 301 ; CHECK-NEXT:    vcmult.f16 q0, q0, q1, #270
 302 ; CHECK-NEXT:    bx lr
 303 entry:
 304   %0 = zext i16 %p to i32
 305   %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
 306   %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
 307   ret <8 x half> %2
 308 }
 309
 310 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
 311 ; CHECK-LABEL: test_vcmulq_rot270_x_f32:
 312 ; CHECK:       @ %bb.0: @ %entry
 313 ; CHECK-NEXT:    vmsr p0, r0
 314 ; CHECK-NEXT:    vpst
 315 ; CHECK-NEXT:    vcmult.f32 q2, q0, q1, #270
 316 ; CHECK-NEXT:    vmov q0, q2
 317 ; CHECK-NEXT:    bx lr
 318 entry:
 319   %0 = zext i16 %p to i32
 320   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
 321   %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
 322   ret <4 x float> %2
 323 }