llvm/test/CodeGen/RISCV/rvv/calling-conv.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV32
   3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV64
   4
   5 ; Check that we correctly scale the split part indirect offsets by VSCALE.
   6 define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) {
   7 ; CHECK-LABEL: callee_scalable_vector_split_indirect:
   8 ; CHECK:       # %bb.0:
   9 ; CHECK-NEXT:    csrr a1, vlenb
  10 ; CHECK-NEXT:    slli a1, a1, 3
  11 ; CHECK-NEXT:    add a1, a0, a1
  12 ; CHECK-NEXT:    vl8re32.v v24, (a0)
  13 ; CHECK-NEXT:    vl8re32.v v0, (a1)
  14 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
  15 ; CHECK-NEXT:    vadd.vv v8, v8, v24
  16 ; CHECK-NEXT:    vadd.vv v16, v16, v0
  17 ; CHECK-NEXT:    ret
  18   %a = add <vscale x 32 x i32> %x, %y
  19   ret <vscale x 32 x i32> %a
  20 }
  21
  22 ; Call the function above. Check that we set the arguments correctly.
  23 define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x i32> %x) {
  24 ; RV32-LABEL: caller_scalable_vector_split_indirect:
  25 ; RV32:       # %bb.0:
  26 ; RV32-NEXT:    addi sp, sp, -144
  27 ; RV32-NEXT:    .cfi_def_cfa_offset 144
  28 ; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
  29 ; RV32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
  30 ; RV32-NEXT:    .cfi_offset ra, -4
  31 ; RV32-NEXT:    .cfi_offset s0, -8
  32 ; RV32-NEXT:    addi s0, sp, 144
  33 ; RV32-NEXT:    .cfi_def_cfa s0, 0
  34 ; RV32-NEXT:    csrr a0, vlenb
  35 ; RV32-NEXT:    slli a0, a0, 4
  36 ; RV32-NEXT:    sub sp, sp, a0
  37 ; RV32-NEXT:    andi sp, sp, -128
  38 ; RV32-NEXT:    addi a0, sp, 128
  39 ; RV32-NEXT:    csrr a1, vlenb
  40 ; RV32-NEXT:    vs8r.v v8, (a0)
  41 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
  42 ; RV32-NEXT:    vmv.v.i v8, 0
  43 ; RV32-NEXT:    slli a1, a1, 3
  44 ; RV32-NEXT:    add a1, a0, a1
  45 ; RV32-NEXT:    addi a0, sp, 128
  46 ; RV32-NEXT:    vs8r.v v16, (a1)
  47 ; RV32-NEXT:    vmv.v.i v16, 0
  48 ; RV32-NEXT:    call callee_scalable_vector_split_indirect
  49 ; RV32-NEXT:    addi sp, s0, -144
  50 ; RV32-NEXT:    .cfi_def_cfa sp, 144
  51 ; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
  52 ; RV32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
  53 ; RV32-NEXT:    .cfi_restore ra
  54 ; RV32-NEXT:    .cfi_restore s0
  55 ; RV32-NEXT:    addi sp, sp, 144
  56 ; RV32-NEXT:    .cfi_def_cfa_offset 0
  57 ; RV32-NEXT:    ret
  58 ;
  59 ; RV64-LABEL: caller_scalable_vector_split_indirect:
  60 ; RV64:       # %bb.0:
  61 ; RV64-NEXT:    addi sp, sp, -144
  62 ; RV64-NEXT:    .cfi_def_cfa_offset 144
  63 ; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
  64 ; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
  65 ; RV64-NEXT:    .cfi_offset ra, -8
  66 ; RV64-NEXT:    .cfi_offset s0, -16
  67 ; RV64-NEXT:    addi s0, sp, 144
  68 ; RV64-NEXT:    .cfi_def_cfa s0, 0
  69 ; RV64-NEXT:    csrr a0, vlenb
  70 ; RV64-NEXT:    slli a0, a0, 4
  71 ; RV64-NEXT:    sub sp, sp, a0
  72 ; RV64-NEXT:    andi sp, sp, -128
  73 ; RV64-NEXT:    addi a0, sp, 128
  74 ; RV64-NEXT:    csrr a1, vlenb
  75 ; RV64-NEXT:    vs8r.v v8, (a0)
  76 ; RV64-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
  77 ; RV64-NEXT:    vmv.v.i v8, 0
  78 ; RV64-NEXT:    slli a1, a1, 3
  79 ; RV64-NEXT:    add a1, a0, a1
  80 ; RV64-NEXT:    addi a0, sp, 128
  81 ; RV64-NEXT:    vs8r.v v16, (a1)
  82 ; RV64-NEXT:    vmv.v.i v16, 0
  83 ; RV64-NEXT:    call callee_scalable_vector_split_indirect
  84 ; RV64-NEXT:    addi sp, s0, -144
  85 ; RV64-NEXT:    .cfi_def_cfa sp, 144
  86 ; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
  87 ; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
  88 ; RV64-NEXT:    .cfi_restore ra
  89 ; RV64-NEXT:    .cfi_restore s0
  90 ; RV64-NEXT:    addi sp, sp, 144
  91 ; RV64-NEXT:    .cfi_def_cfa_offset 0
  92 ; RV64-NEXT:    ret
  93   %c = alloca i64
  94   %a = call <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> %x)
  95   ret <vscale x 32 x i32> %a
  96 }
  97
  98 define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return() {
  99 ; RV32-LABEL: caller_tuple_return:
 100 ; RV32:       # %bb.0:
 101 ; RV32-NEXT:    addi sp, sp, -16
 102 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 103 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 104 ; RV32-NEXT:    .cfi_offset ra, -4
 105 ; RV32-NEXT:    call callee_tuple_return
 106 ; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 107 ; RV32-NEXT:    vmv2r.v v6, v8
 108 ; RV32-NEXT:    vmv2r.v v8, v10
 109 ; RV32-NEXT:    vmv2r.v v10, v6
 110 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 111 ; RV32-NEXT:    .cfi_restore ra
 112 ; RV32-NEXT:    addi sp, sp, 16
 113 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 114 ; RV32-NEXT:    ret
 115 ;
 116 ; RV64-LABEL: caller_tuple_return:
 117 ; RV64:       # %bb.0:
 118 ; RV64-NEXT:    addi sp, sp, -16
 119 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 120 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 121 ; RV64-NEXT:    .cfi_offset ra, -8
 122 ; RV64-NEXT:    call callee_tuple_return
 123 ; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 124 ; RV64-NEXT:    vmv2r.v v6, v8
 125 ; RV64-NEXT:    vmv2r.v v8, v10
 126 ; RV64-NEXT:    vmv2r.v v10, v6
 127 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 128 ; RV64-NEXT:    .cfi_restore ra
 129 ; RV64-NEXT:    addi sp, sp, 16
 130 ; RV64-NEXT:    .cfi_def_cfa_offset 0
 131 ; RV64-NEXT:    ret
 132   %a = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return()
 133   %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 0)
 134   %c = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 1)
 135   %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %c,   i32 0)
 136   %e = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d, <vscale x 4 x i32> %b, i32   1)
 137   ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %e
 138 }
 139
 140 declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return()
 141
 142 define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x) {
 143 ; RV32-LABEL: caller_tuple_argument:
 144 ; RV32:       # %bb.0:
 145 ; RV32-NEXT:    addi sp, sp, -16
 146 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 147 ; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 148 ; RV32-NEXT:    .cfi_offset ra, -4
 149 ; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 150 ; RV32-NEXT:    vmv2r.v v6, v8
 151 ; RV32-NEXT:    vmv2r.v v8, v10
 152 ; RV32-NEXT:    vmv2r.v v10, v6
 153 ; RV32-NEXT:    call callee_tuple_argument
 154 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 155 ; RV32-NEXT:    .cfi_restore ra
 156 ; RV32-NEXT:    addi sp, sp, 16
 157 ; RV32-NEXT:    .cfi_def_cfa_offset 0
 158 ; RV32-NEXT:    ret
 159 ;
 160 ; RV64-LABEL: caller_tuple_argument:
 161 ; RV64:       # %bb.0:
 162 ; RV64-NEXT:    addi sp, sp, -16
 163 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 164 ; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 165 ; RV64-NEXT:    .cfi_offset ra, -8
 166 ; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
 167 ; RV64-NEXT:    vmv2r.v v6, v8
 168 ; RV64-NEXT:    vmv2r.v v8, v10
 169 ; RV64-NEXT:    vmv2r.v v10, v6
 170 ; RV64-NEXT:    call callee_tuple_argument
 171 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 172 ; RV64-NEXT:    .cfi_restore ra
 173 ; RV64-NEXT:    addi sp, sp, 16
 174 ; RV64-NEXT:    .cfi_def_cfa_offset 0
 175 ; RV64-NEXT:    ret
 176   %a = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 0)
 177   %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 1)
 178   %c = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %b,   i32 0)
 179   %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %c, <vscale x 4 x i32> %a, i32   1)
 180   call void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d)
 181   ret void
 182 }
 183
 184 declare void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2))