test/CodeGen/AMDGPU/ret.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
   3
   4 ; GCN-LABEL: {{^}}vgpr:
   5 ; GCN-DAG: v_mov_b32_e32 v1, v0
   6 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
   7 ; GCN: s_waitcnt expcnt(0)
   8 ; GCN: v_add_f32_e32 v0, 1.0, v1
   9 ; GCN-NOT: s_endpgm
  10 define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
  11 bb:
  12   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
  13   %x = fadd float %arg3, 1.000000e+00
  14   %a = insertvalue { float, float } undef, float %x, 0
  15   %b = insertvalue { float, float } %a, float %arg3, 1
  16   ret { float, float } %b
  17 }
  18
  19 ; GCN-LABEL: {{^}}vgpr_literal:
  20 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
  21
  22 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
  23 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
  24 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
  25 ; GCN-DAG: v_mov_b32_e32 v3, -1.0
  26 ; GCN-DAG: s_waitcnt expcnt(0)
  27 ; GCN-NOT: s_endpgm
  28 define amdgpu_vs { float, float, float, float } @vgpr_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
  29 bb:
  30   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
  31   ret { float, float, float, float } { float 1.000000e+00, float 2.000000e+00, float 4.000000e+00, float -1.000000e+00 }
  32 }
  33
  34 ; GCN: .long 165580
  35 ; GCN-NEXT: .long 562
  36 ; GCN-NEXT: .long 165584
  37 ; GCN-NEXT: .long 562
  38 ; GCN-LABEL: {{^}}vgpr_ps_addr0:
  39 ; GCN-NOT: v_mov_b32_e32 v0
  40 ; GCN-NOT: v_mov_b32_e32 v1
  41 ; GCN-NOT: v_mov_b32_e32 v2
  42 ; GCN: v_mov_b32_e32 v3, v4
  43 ; GCN: v_mov_b32_e32 v4, v6
  44 ; GCN-NOT: s_endpgm
  45 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr0([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
  46 bb:
  47   %i0 = extractelement <2 x i32> %arg4, i32 0
  48   %i1 = extractelement <2 x i32> %arg4, i32 1
  49   %i2 = extractelement <2 x i32> %arg7, i32 0
  50   %i3 = extractelement <2 x i32> %arg8, i32 0
  51   %f0 = bitcast i32 %i0 to float
  52   %f1 = bitcast i32 %i1 to float
  53   %f2 = bitcast i32 %i2 to float
  54   %f3 = bitcast i32 %i3 to float
  55   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
  56   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
  57   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
  58   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
  59   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
  60   ret { float, float, float, float, float } %r4
  61 }
  62
  63 ; GCN: .long 165580
  64 ; GCN-NEXT: .long 1
  65 ; GCN-NEXT: .long 165584
  66 ; GCN-NEXT: .long 1
  67 ; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
  68 ; GCN: v_mov_b32_e32 v0, 1.0
  69 ; GCN-NOT: s_endpgm
  70 define amdgpu_ps float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
  71 bb:
  72   ret float 1.000000e+00
  73 }
  74
  75 ; GCN: .long 165580
  76 ; GCN-NEXT: .long 2081
  77 ; GCN-NEXT: .long 165584
  78 ; GCN-NEXT: .long 2081
  79 ; GCN-LABEL: {{^}}ps_input_ena_pos_w:
  80 ; GCN-DAG: v_mov_b32_e32 v0, v4
  81 ; GCN-DAG: v_mov_b32_e32 v1, v2
  82 ; GCN-DAG: v_mov_b32_e32 v2, v3
  83 ; GCN-NOT: s_endpgm
  84 define amdgpu_ps { float, <2 x float> } @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #1 {
  85 bb:
  86   %f = bitcast <2 x i32> %arg8 to <2 x float>
  87   %s = insertvalue { float, <2 x float> } undef, float %arg14, 0
  88   %s1 = insertvalue { float, <2 x float> } %s, <2 x float> %f, 1
  89   ret { float, <2 x float> } %s1
  90 }
  91
  92 ; GCN: .long 165580
  93 ; GCN-NEXT: .long 562
  94 ; GCN-NEXT: .long 165584
  95 ; GCN-NEXT: .long 563
  96 ; GCN-LABEL: {{^}}vgpr_ps_addr1:
  97 ; GCN-DAG: v_mov_b32_e32 v0, v2
  98 ; GCN-DAG: v_mov_b32_e32 v1, v3
  99 ; GCN: v_mov_b32_e32 v2, v4
 100 ; GCN-DAG: v_mov_b32_e32 v3, v6
 101 ; GCN-DAG: v_mov_b32_e32 v4, v8
 102 ; GCN-NOT: s_endpgm
 103 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr1([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #2 {
 104 bb:
 105   %i0 = extractelement <2 x i32> %arg4, i32 0
 106   %i1 = extractelement <2 x i32> %arg4, i32 1
 107   %i2 = extractelement <2 x i32> %arg7, i32 0
 108   %i3 = extractelement <2 x i32> %arg8, i32 0
 109   %f0 = bitcast i32 %i0 to float
 110   %f1 = bitcast i32 %i1 to float
 111   %f2 = bitcast i32 %i2 to float
 112   %f3 = bitcast i32 %i3 to float
 113   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
 114   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
 115   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
 116   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
 117   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
 118   ret { float, float, float, float, float } %r4
 119 }
 120
 121 ; GCN: .long 165580
 122 ; GCN-NEXT: .long 562
 123 ; GCN-NEXT: .long 165584
 124 ; GCN-NEXT: .long 631
 125 ; GCN-LABEL: {{^}}vgpr_ps_addr119:
 126 ; GCN-DAG: v_mov_b32_e32 v0, v2
 127 ; GCN-DAG: v_mov_b32_e32 v1, v3
 128 ; GCN-DAG: v_mov_b32_e32 v2, v6
 129 ; GCN-DAG: v_mov_b32_e32 v3, v8
 130 ; GCN-DAG: v_mov_b32_e32 v4, v12
 131 ; GCN-NOT: s_endpgm
 132 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr119([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #3 {
 133 bb:
 134   %i0 = extractelement <2 x i32> %arg4, i32 0
 135   %i1 = extractelement <2 x i32> %arg4, i32 1
 136   %i2 = extractelement <2 x i32> %arg7, i32 0
 137   %i3 = extractelement <2 x i32> %arg8, i32 0
 138   %f0 = bitcast i32 %i0 to float
 139   %f1 = bitcast i32 %i1 to float
 140   %f2 = bitcast i32 %i2 to float
 141   %f3 = bitcast i32 %i3 to float
 142   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
 143   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
 144   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
 145   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
 146   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
 147   ret { float, float, float, float, float } %r4
 148 }
 149
 150 ; GCN: .long 165580
 151 ; GCN-NEXT: .long 562
 152 ; GCN-NEXT: .long 165584
 153 ; GCN-NEXT: .long 946
 154 ; GCN-LABEL: {{^}}vgpr_ps_addr418:
 155 ; GCN-NOT: v_mov_b32_e32 v0
 156 ; GCN-NOT: v_mov_b32_e32 v1
 157 ; GCN-NOT: v_mov_b32_e32 v2
 158 ; GCN: v_mov_b32_e32 v3, v4
 159 ; GCN: v_mov_b32_e32 v4, v8
 160 ; GCN-NOT: s_endpgm
 161 define amdgpu_ps { float, float, float, float, float } @vgpr_ps_addr418([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <2 x i32> %arg4, <2 x i32> %arg5, <3 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <2 x i32> %arg9, float %arg10, float %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18) #4 {
 162 bb:
 163   %i0 = extractelement <2 x i32> %arg4, i32 0
 164   %i1 = extractelement <2 x i32> %arg4, i32 1
 165   %i2 = extractelement <2 x i32> %arg7, i32 0
 166   %i3 = extractelement <2 x i32> %arg8, i32 0
 167   %f0 = bitcast i32 %i0 to float
 168   %f1 = bitcast i32 %i1 to float
 169   %f2 = bitcast i32 %i2 to float
 170   %f3 = bitcast i32 %i3 to float
 171   %r0 = insertvalue { float, float, float, float, float } undef, float %f0, 0
 172   %r1 = insertvalue { float, float, float, float, float } %r0, float %f1, 1
 173   %r2 = insertvalue { float, float, float, float, float } %r1, float %f2, 2
 174   %r3 = insertvalue { float, float, float, float, float } %r2, float %f3, 3
 175   %r4 = insertvalue { float, float, float, float, float } %r3, float %arg12, 4
 176   ret { float, float, float, float, float } %r4
 177 }
 178
 179 ; GCN-LABEL: {{^}}sgpr:
 180 ; GCN-DAG: s_mov_b32 s2, s3
 181 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
 182 ; GCN-NOT: s_endpgm
 183 define amdgpu_vs { i32, i32, i32 } @sgpr([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 184 bb:
 185   %x = add i32 %arg2, 2
 186   %a = insertvalue { i32, i32, i32 } undef, i32 %x, 0
 187   %b = insertvalue { i32, i32, i32 } %a, i32 %arg1, 1
 188   %c = insertvalue { i32, i32, i32 } %a, i32 %arg2, 2
 189   ret { i32, i32, i32 } %c
 190 }
 191
 192 ; GCN-LABEL: {{^}}sgpr_literal:
 193 ; GCN: s_mov_b32 s0, 5
 194 ; GCN-NOT: s_mov_b32 s0, s0
 195 ; GCN-DAG: s_mov_b32 s1, 6
 196 ; GCN-DAG: s_mov_b32 s2, 7
 197 ; GCN-DAG: s_mov_b32 s3, 8
 198 ; GCN-NOT: s_endpgm
 199 define amdgpu_vs { i32, i32, i32, i32 } @sgpr_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 200 bb:
 201   %x = add i32 %arg2, 2
 202   ret { i32, i32, i32, i32 } { i32 5, i32 6, i32 7, i32 8 }
 203 }
 204
 205 ; GCN-LABEL: {{^}}both:
 206 ; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm
 207 ; GCN-DAG: v_mov_b32_e32 v1, v0
 208 ; GCN-DAG: s_mov_b32 s1, s2
 209 ; GCN-DAG: s_waitcnt expcnt(0)
 210 ; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
 211 ; GCN-DAG: s_add_{{i|u}}32 s0, s3, 2
 212 ; GCN-DAG: s_mov_b32 s2, s3
 213 ; GCN-NOT: s_endpgm
 214 define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 215 bb:
 216   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
 217   %v = fadd float %arg3, 1.000000e+00
 218   %s = add i32 %arg2, 2
 219   %a0 = insertvalue { float, i32, float, i32, i32 } undef, float %v, 0
 220   %a1 = insertvalue { float, i32, float, i32, i32 } %a0, i32 %s, 1
 221   %a2 = insertvalue { float, i32, float, i32, i32 } %a1, float %arg3, 2
 222   %a3 = insertvalue { float, i32, float, i32, i32 } %a2, i32 %arg1, 3
 223   %a4 = insertvalue { float, i32, float, i32, i32 } %a3, i32 %arg2, 4
 224   ret { float, i32, float, i32, i32 } %a4
 225 }
 226
 227 ; GCN-LABEL: {{^}}structure_literal:
 228 ; GCN: exp mrt0 v0, v0, v0, v0 done vm
 229
 230 ; GCN-DAG: v_mov_b32_e32 v0, 1.0
 231 ; GCN-DAG: s_mov_b32 s0, 2
 232 ; GCN-DAG: s_mov_b32 s1, 3
 233 ; GCN-DAG: v_mov_b32_e32 v1, 2.0
 234 ; GCN-DAG: v_mov_b32_e32 v2, 4.0
 235 ; GCN-DAG: s_waitcnt expcnt(0)
 236 define amdgpu_vs { { float, i32 }, { i32, <2 x float> } } @structure_literal([9 x <16 x i8>] addrspace(4)* inreg %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 {
 237 bb:
 238   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg3, float %arg3, float %arg3, float %arg3, i1 true, i1 true) #0
 239   ret { { float, i32 }, { i32, <2 x float> } } { { float, i32 } { float 1.000000e+00, i32 2 }, { i32, <2 x float> } { i32 3, <2 x float> <float 2.000000e+00, float 4.000000e+00> } }
 240 }
 241
 242 ; GCN-LABEL: {{^}}ret_return_to_epilog_pseudo_size:
 243 ; GCN: codeLenInByte = 0{{$}}
 244 define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
 245   ret float undef
 246 }
 247
 248 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
 249
 250 attributes #0 = { nounwind }
 251 attributes #1 = { nounwind "InitialPSInputAddr"="0" }
 252 attributes #2 = { nounwind "InitialPSInputAddr"="1" }
 253 attributes #3 = { nounwind "InitialPSInputAddr"="119" }
 254 attributes #4 = { nounwind "InitialPSInputAddr"="418" }