llvm/test/CodeGen/Thumb2/mve-gatherscatter-mmo.ll

   1 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -stop-after=finalize-isel -o - %s | FileCheck %s
   2
   3 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(ptr %base, <8 x i16> %offset) {
   4 ; CHECK-LABEL: name: test_vldrbq_gather_offset_s16
   5 ; CHECK: early-clobber %2:mqpr = MVE_VLDRBS16_rq %0, %1, 0, $noreg, $noreg :: (load (s64), align 1)
   6 entry:
   7   %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
   8   ret <8 x i16> %0
   9 }
  10
  11 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
  12 ; CHECK-LABEL: name: test_vldrbq_gather_offset_z_s32
  13 ; CHECK: early-clobber %4:mqpr = MVE_VLDRBS32_rq %0, %1, 1, killed %3, $noreg :: (load (s32), align 1)
  14 entry:
  15   %0 = zext i16 %p to i32
  16   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
  17   %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
  18   ret <4 x i32> %2
  19 }
  20
  21 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
  22 ; CHECK-LABEL: name: test_vldrdq_gather_base_s64
  23 ; CHECK: early-clobber %1:mqpr = MVE_VLDRDU64_qi %0, 616, 0, $noreg, $noreg :: (load (s128), align 1)
  24 entry:
  25   %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
  26   ret <2 x i64> %0
  27 }
  28
  29 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
  30 ; CHECK-LABEL: name: test_vldrwq_gather_base_z_f32
  31 ; CHECK: early-clobber %3:mqpr = MVE_VLDRWU32_qi %0, -300, 1, killed %2, $noreg :: (load (s128), align 1)
  32 entry:
  33   %0 = zext i16 %p to i32
  34   %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
  35   %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1)
  36   ret <4 x float> %2
  37 }
  38
  39 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(ptr %addr) {
  40 ; CHECK-LABEL: name: test_vldrdq_gather_base_wb_s64
  41 ; CHECK: %2:mqpr, early-clobber %3:mqpr = MVE_VLDRDU64_qi_pre %1, 576, 0, $noreg, $noreg :: (load (s128), align 1)
  42 entry:
  43   %0 = load <2 x i64>, ptr %addr, align 8
  44   %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
  45   %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
  46   store <2 x i64> %2, ptr %addr, align 8
  47   %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
  48   ret <2 x i64> %3
  49 }
  50
  51 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(ptr %addr, i16 zeroext %p) {
  52 ; CHECK-LABEL: name: test_vldrwq_gather_base_wb_z_f32
  53 ; CHECK: %4:mqpr, early-clobber %5:mqpr = MVE_VLDRWU32_qi_pre %3, -352, 1, killed %2, $noreg :: (load (s128), align 1)
  54 entry:
  55   %0 = load <4 x i32>, ptr %addr, align 8
  56   %1 = zext i16 %p to i32
  57   %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
  58   %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2)
  59   %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
  60   store <4 x i32> %4, ptr %addr, align 8
  61   %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
  62   ret <4 x float> %5
  63 }
  64
  65
  66 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
  67 ; CHECK-LABEL: name: test_vstrbq_scatter_offset_s32
  68 ; CHECK: MVE_VSTRB32_rq %2, %0, %1, 0, $noreg, $noreg :: (store (s32), align 1)
  69 entry:
  70   call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
  71   ret void
  72 }
  73
  74 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
  75 ; CHECK-LABEL: name: test_vstrbq_scatter_offset_p_s8
  76 ; CHECK: MVE_VSTRB8_rq %2, %0, %1, 1, killed %4, $noreg :: (store (s128), align 1)
  77 entry:
  78   %0 = zext i16 %p to i32
  79   %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
  80   call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
  81   ret void
  82 }
  83
  84 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
  85 ; CHECK-LABEL: name: test_vstrdq_scatter_base_u64
  86 ; CHECK: MVE_VSTRD64_qi %1, %0, -472, 0, $noreg, $noreg :: (store (s128), align 1)
  87 entry:
  88   call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value)
  89   ret void
  90 }
  91
  92 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
  93 ; CHECK-LABEL: name: test_vstrdq_scatter_base_p_s64
  94 ; CHECK: MVE_VSTRD64_qi %1, %0, 888, 1, killed %3, $noreg :: (store (s128), align 1)
  95 entry:
  96   %0 = zext i16 %p to i32
  97   %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
  98   call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <2 x i1> %1)
  99   ret void
 100 }
 101
 102 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(ptr %addr, <2 x i64> %value) {
 103 ; CHECK-LABEL: name: test_vstrdq_scatter_base_wb_s64
 104 ; CHECK: %3:mqpr = MVE_VSTRD64_qi_pre %1, %2, 208, 0, $noreg, $noreg :: (store (s128), align 1)
 105 entry:
 106   %0 = load <2 x i64>, ptr %addr, align 8
 107   %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
 108   store <2 x i64> %1, ptr %addr, align 8
 109   ret void
 110 }
 111
 112 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
 113 ; CHECK-LABEL: name: test_vstrdq_scatter_base_wb_p_s64
 114 ; CHECK: %5:mqpr = MVE_VSTRD64_qi_pre %1, %3, 248, 1, killed %4, $noreg :: (store (s128), align 1)
 115 entry:
 116   %0 = load <2 x i64>, ptr %addr, align 8
 117   %1 = zext i16 %p to i32
 118   %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
 119   %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 248, <2 x i64> %value, <2 x i1> %2)
 120   store <2 x i64> %3, ptr %addr, align 8
 121   ret void
 122 }
 123
 124 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
 125 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
 126 declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32)
 127 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32)
 128 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>)
 129 declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)
 130 declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
 131 declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)
 132 declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
 133 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr, <4 x i32>, <4 x i32>, i32, i32)
 134 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)
 135 declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
 136 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>)
 137 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
 138 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>)