llvm/test/CodeGen/AMDGPU/ret.ll

   1 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   2 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   3 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   4 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   5
   6 ; GCN-LABEL: {{^}}vgpr:
   7 ; GCN-DAG: v_mov_b32_e32 v1, v0
   8 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
   9 ; GCN: s_waitcnt expcnt(0)
  10 ; GCN: v_add_f32_e32 v0, 1.0, v1
  11 ; GCN-NOT: s_endpgm
  12 define amdgpu_vs { float, float } @vgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
  13 bb:
  14   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
  15   %x = fadd float %arg3, 1.000000e+00
  16   %a = insertvalue { float, float } undef, float %x, 0
  17   %b = insertvalue { float, float } %a, float %arg3, 1
  18   ret { float, float } %b
  19 }
  20
  21 ; GCN-LABEL: {{^}}vgpr_literal:
  22 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
  23
  24 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
  25 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
  26 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
  27 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
  28 ; GCN-DAG: s_waitcnt expcnt(0)
  29 ; GCN-NOT: s_endpgm
  30 define amdgpu_vs { float, float, float, float } @vgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
  31 bb:
  32   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
  33   ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
  34 }
  35
  36 ; GCN: .long 165580
  37 ; GCN-NEXT: .long 562
  38 ; GCN-NEXT: .long 165584
  39 ; GCN-NEXT: .long 562
  40 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
  41 ; GCN-NOT: v_mov_b32_e32 v0
  42 ; GCN-NOT: v_mov_b32_e32 v1
  43 ; GCN-NOT: v_mov_b32_e32 v2
  44 ; GCN: v_mov_b32_e32 v3, v4
  45 ; GCN: v_mov_b32_e32 v4, v6
  46 ; GCN-NOT: s_endpgm
  47 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
  48 bb:
  49   %i0 = extractelement <2 x i32> %arg4, i32 0
  50   %i1 = extractelement <2 x i32> %arg4, i32 1
  51   %i2 = extractelement <2 x i32> %arg7, i32 0
  52   %i3 = extractelement <2 x i32> %arg8, i32 0
  53   %f0 = bitcast i32 %i0 to float
  54   %f1 = bitcast i32 %i1 to float
  55   %f2 = bitcast i32 %i2 to float
  56   %f3 = bitcast i32 %i3 to float
  57   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
  58   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
  59   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
  60   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
  61   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
  62   ret { float, float, float, float, float } %r4
  63 }
  64
  65 ; GCN: .long 165580
  66 ; GCN-NEXT: .long 1
  67 ; GCN-NEXT: .long 165584
  68 ; GCN-NEXT: .long 1
  69 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
  70 ; GCN: v_mov_b32_e32 v0, 1.0
  71 ; GCN-NOT: s_endpgm
  72 define amdgpu_ps float @ps_input_ena_no_inputs(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
  73 bb:
  74   ret float 1.000000e+00
  75 }
  76
  77 ; GCN: .long 165580
  78 ; GCN-NEXT: .long 2081
  79 ; GCN-NEXT: .long 165584
  80 ; GCN-NEXT: .long 2081
  81 ; GCN-LABEL: {{^}}ps_input_ena_pos_w:
  82 ; GCN-DAG: v_mov_b32_e32 v0, v4
  83 ; GCN-DAG: v_mov_b32_e32 v1, v2
  84 ; GCN-DAG: v_mov_b32_e32 v2, v3
  85 ; GCN-NOT: s_endpgm
  86 define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
  87 bb:
  88   %f = bitcast <2 x i32> %arg8 to <2 x float>
  89   %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
  90   %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
  91   ret { float, <2 x float> } %s1
  92 }
  93
  94 ; GCN: .long 165580
  95 ; GCN-NEXT: .long 562
  96 ; GCN-NEXT: .long 165584
  97 ; GCN-NEXT: .long 563
  98 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
  99 ; GCN-DAG: v_mov_b32_e32 v0, v2
 100 ; GCN-DAG: v_mov_b32_e32 v1, v3
 101 ; GCN: v_mov_b32_e32 v2, v4
 102 ; GCN-DAG: v_mov_b32_e32 v3, v6
 103 ; GCN-DAG: v_mov_b32_e32 v4, v8
 104 ; GCN-NOT: s_endpgm
 105 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
 106 bb:
 107   %i0 = extractelement <2 x i32> %arg4, i32 0
 108   %i1 = extractelement <2 x i32> %arg4, i32 1
 109   %i2 = extractelement <2 x i32> %arg7, i32 0
 110   %i3 = extractelement <2 x i32> %arg8, i32 0
 111   %f0 = bitcast i32 %i0 to float
 112   %f1 = bitcast i32 %i1 to float
 113   %f2 = bitcast i32 %i2 to float
 114   %f3 = bitcast i32 %i3 to float
 115   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
 116   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
 117   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
 118   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
 119   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
 120   ret { float, float, float, float, float } %r4
 121 }
 122
 123 ; GCN: .long 165580
 124 ; GCN-NEXT: .long 562
 125 ; GCN-NEXT: .long 165584
 126 ; GCN-NEXT: .long 631
 127 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
 128 ; GCN-DAG: v_mov_b32_e32 v0, v2
 129 ; GCN-DAG: v_mov_b32_e32 v1, v3
 130 ; GCN-DAG: v_mov_b32_e32 v2, v6
 131 ; GCN-DAG: v_mov_b32_e32 v3, v8
 132 ; GCN-DAG: v_mov_b32_e32 v4, v12
 133 ; GCN-NOT: s_endpgm
 134 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
 135 bb:
 136   %i0 = extractelement <2 x i32> %arg4, i32 0
 137   %i1 = extractelement <2 x i32> %arg4, i32 1
 138   %i2 = extractelement <2 x i32> %arg7, i32 0
 139   %i3 = extractelement <2 x i32> %arg8, i32 0
 140   %f0 = bitcast i32 %i0 to float
 141   %f1 = bitcast i32 %i1 to float
 142   %f2 = bitcast i32 %i2 to float
 143   %f3 = bitcast i32 %i3 to float
 144   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
 145   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
 146   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
 147   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
 148   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
 149   ret { float, float, float, float, float } %r4
 150 }
 151
 152 ; GCN: .long 165580
 153 ; GCN-NEXT: .long 562
 154 ; GCN-NEXT: .long 165584
 155 ; GCN-NEXT: .long 946
 156 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
 157 ; GCN-NOT: v_mov_b32_e32 v0
 158 ; GCN-NOT: v_mov_b32_e32 v1
 159 ; GCN-NOT: v_mov_b32_e32 v2
 160 ; GCN: v_mov_b32_e32 v3, v4
 161 ; GCN: v_mov_b32_e32 v4, v8
 162 ; GCN-NOT: s_endpgm
 163 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
 164 bb:
 165   %i0 = extractelement <2 x i32> %arg4, i32 0
 166   %i1 = extractelement <2 x i32> %arg4, i32 1
 167   %i2 = extractelement <2 x i32> %arg7, i32 0
 168   %i3 = extractelement <2 x i32> %arg8, i32 0
 169   %f0 = bitcast i32 %i0 to float
 170   %f1 = bitcast i32 %i1 to float
 171   %f2 = bitcast i32 %i2 to float
 172   %f3 = bitcast i32 %i3 to float
 173   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
 174   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
 175   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
 176   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
 177   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
 178   ret { float, float, float, float, float } %r4
 179 }
 180
 181 ; GCN-LABEL: {{^}}sgpr:
 182 ; GCN-DAG: s_mov_b32 s2, s3
 183 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
 184 ; GCN-NOT: s_endpgm
 185 define amdgpu_vs { i32, i32, i32 } @sgpr(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 186 bb:
 187   %x = add i32 %arg2, 2
 188   %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
 189   %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
 190   %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
 191   ret { i32, i32, i32 } %c
 192 }
 193
 194 ; GCN-LABEL: {{^}}sgpr_literal:
 195 ; GCN: s_mov_b32 s0, 5
 196 ; GCN-NOT: s_mov_b32 s0, s0
 197 ; GCN-DAG: s_mov_b32 s1, 6
 198 ; GCN-DAG: s_mov_b32 s2, 7
 199 ; GCN-DAG: s_mov_b32 s3, 8
 200 ; GCN-NOT: s_endpgm
 201 define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 202 bb:
 203   %x = add i32 %arg2, 2
 204   ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
 205 }
 206
 207 ; GCN-LABEL: {{^}}both:
 208 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
 209 ; GCN-DAG: v_mov_b32_e32 v1, v0
 210 ; GCN-DAG: s_mov_b32 s1, s2
 211 ; GCN-DAG: s_waitcnt expcnt(0)
 212 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
 213 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
 214 ; GCN-DAG: s_mov_b32 s2, s3
 215 ; GCN-NOT: s_endpgm
 216 define amdgpu_vs { float, i32, float, i32, i32 } @both(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 217 bb:
 218   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
 219   %v = fadd float %arg3, 1.000000e+00
 220   %s = add i32 %arg2, 2
 221   %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
 222   %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
 223   %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
 224   %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
 225   %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
 226   ret { float, i32, float, i32, i32 } %a4
 227 }
 228
 229 ; GCN-LABEL: {{^}}structure_literal:
 230 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
 231
 232 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
 233 ; GCN-DAG: s_mov_b32 s0, 2
 234 ; GCN-DAG: s_mov_b32 s1, 3
 235 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
 236 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
 237 ; GCN-DAG: s_waitcnt expcnt(0)
 238 define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal(ptr addrspace(4) inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 239 bb:
 240   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
 241   ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
 242 }
 243
 244 ; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
 245 ; GCN: codeLenInByte = 0{{$}}
 246 define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
 247   ret float undef
 248 }
 249
 250 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 251
 252 attributes #0 = { nounwind }
 253 attributes #1 = { nounwind "InitialPSInputAddr"="0" }
 254 attributes #2 = { nounwind "InitialPSInputAddr"="1" }
 255 attributes #3 = { nounwind "InitialPSInputAddr"="119" }
 256 attributes #4 = { nounwind "InitialPSInputAddr"="418" }