test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
   2
   3 declare i32 @llvm.amdgcn.workitem.id.x() #1
   4 declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
   5 declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
   6 declare float @llvm.fabs.f32(float) #1
   7
   8 ; SI-LABEL: {{^}}test_div_scale_f32_1:
   9 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  10 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
  11 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
  12 ; SI: buffer_store_dword [[RESULT0]]
  13 ; SI: s_endpgm
  14 define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
  15   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  16   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  17   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
  18
  19   %a = load volatile float, float addrspace(1)* %gep.0, align 4
  20   %b = load volatile float, float addrspace(1)* %gep.1, align 4
  21
  22   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
  23   %result0 = extractvalue { float, i1 } %result, 0
  24   store float %result0, float addrspace(1)* %out, align 4
  25   ret void
  26 }
  27
  28 ; SI-LABEL: {{^}}test_div_scale_f32_2:
  29 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  30 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
  31 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
  32 ; SI: buffer_store_dword [[RESULT0]]
  33 ; SI: s_endpgm
  34 define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
  35   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  36   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
  37   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
  38
  39   %a = load volatile float, float addrspace(1)* %gep.0, align 4
  40   %b = load volatile float, float addrspace(1)* %gep.1, align 4
  41
  42   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
  43   %result0 = extractvalue { float, i1 } %result, 0
  44   store float %result0, float addrspace(1)* %out, align 4
  45   ret void
  46 }
  47
  48 ; SI-LABEL: {{^}}test_div_scale_f64_1:
  49 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  50 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
  51 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
  52 ; SI: buffer_store_dwordx2 [[RESULT0]]
  53 ; SI: s_endpgm
  54 define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
  55   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  56   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
  57   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
  58
  59   %a = load volatile double, double addrspace(1)* %gep.0, align 8
  60   %b = load volatile double, double addrspace(1)* %gep.1, align 8
  61
  62   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
  63   %result0 = extractvalue { double, i1 } %result, 0
  64   store double %result0, double addrspace(1)* %out, align 8
  65   ret void
  66 }
  67
  68 ; SI-LABEL: {{^}}test_div_scale_f64_2:
  69 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
  70 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
  71 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
  72 ; SI: buffer_store_dwordx2 [[RESULT0]]
  73 ; SI: s_endpgm
  74 define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
  75   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  76   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
  77   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
  78
  79   %a = load volatile double, double addrspace(1)* %gep.0, align 8
  80   %b = load volatile double, double addrspace(1)* %gep.1, align 8
  81
  82   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
  83   %result0 = extractvalue { double, i1 } %result, 0
  84   store double %result0, double addrspace(1)* %out, align 8
  85   ret void
  86 }
  87
  88 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_1:
  89 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
  90 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
  91 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
  92 ; SI: buffer_store_dword [[RESULT0]]
  93 ; SI: s_endpgm
  94 define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
  95   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
  96   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
  97
  98   %b = load float, float addrspace(1)* %gep, align 4
  99
 100   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
 101   %result0 = extractvalue { float, i1 } %result, 0
 102   store float %result0, float addrspace(1)* %out, align 4
 103   ret void
 104 }
 105
 106 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_num_2:
 107 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
 108 ; SI-DAG: s_load_dword [[A:s[0-9]+]]
 109 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
 110 ; SI: buffer_store_dword [[RESULT0]]
 111 ; SI: s_endpgm
 112 define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
 113   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 114   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 115
 116   %b = load float, float addrspace(1)* %gep, align 4
 117
 118   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
 119   %result0 = extractvalue { float, i1 } %result, 0
 120   store float %result0, float addrspace(1)* %out, align 4
 121   ret void
 122 }
 123
 124 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_1:
 125 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
 126 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
 127 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
 128 ; SI: buffer_store_dword [[RESULT0]]
 129 ; SI: s_endpgm
 130 define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
 131   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 132   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 133
 134   %a = load float, float addrspace(1)* %gep, align 4
 135
 136   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
 137   %result0 = extractvalue { float, i1 } %result, 0
 138   store float %result0, float addrspace(1)* %out, align 4
 139   ret void
 140 }
 141
 142 ; SI-LABEL: {{^}}test_div_scale_f32_scalar_den_2:
 143 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
 144 ; SI-DAG: s_load_dword [[B:s[0-9]+]]
 145 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
 146 ; SI: buffer_store_dword [[RESULT0]]
 147 ; SI: s_endpgm
 148 define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
 149   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 150   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 151
 152   %a = load float, float addrspace(1)* %gep, align 4
 153
 154   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
 155   %result0 = extractvalue { float, i1 } %result, 0
 156   store float %result0, float addrspace(1)* %out, align 4
 157   ret void
 158 }
 159
 160 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_1:
 161 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
 162 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
 163 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
 164 ; SI: buffer_store_dwordx2 [[RESULT0]]
 165 ; SI: s_endpgm
 166 define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
 167   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 168   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 169
 170   %b = load double, double addrspace(1)* %gep, align 8
 171
 172   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
 173   %result0 = extractvalue { double, i1 } %result, 0
 174   store double %result0, double addrspace(1)* %out, align 8
 175   ret void
 176 }
 177
 178 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_num_2:
 179 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
 180 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
 181 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
 182 ; SI: buffer_store_dwordx2 [[RESULT0]]
 183 ; SI: s_endpgm
 184 define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
 185   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 186   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 187
 188   %b = load double, double addrspace(1)* %gep, align 8
 189
 190   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
 191   %result0 = extractvalue { double, i1 } %result, 0
 192   store double %result0, double addrspace(1)* %out, align 8
 193   ret void
 194 }
 195
 196 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_1:
 197 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 198 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
 199 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
 200 ; SI: buffer_store_dwordx2 [[RESULT0]]
 201 ; SI: s_endpgm
 202 define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
 203   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 204   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 205
 206   %a = load double, double addrspace(1)* %gep, align 8
 207
 208   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
 209   %result0 = extractvalue { double, i1 } %result, 0
 210   store double %result0, double addrspace(1)* %out, align 8
 211   ret void
 212 }
 213
 214 ; SI-LABEL: {{^}}test_div_scale_f64_scalar_den_2:
 215 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 216 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
 217 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
 218 ; SI: buffer_store_dwordx2 [[RESULT0]]
 219 ; SI: s_endpgm
 220 define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
 221   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 222   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 223
 224   %a = load double, double addrspace(1)* %gep, align 8
 225
 226   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
 227   %result0 = extractvalue { double, i1 } %result, 0
 228   store double %result0, double addrspace(1)* %out, align 8
 229   ret void
 230 }
 231
 232 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_1:
 233 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
 234 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c
 235 ; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
 236 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
 237 ; SI: buffer_store_dword [[RESULT0]]
 238 ; SI: s_endpgm
 239 define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
 240   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
 241   %result0 = extractvalue { float, i1 } %result, 0
 242   store float %result0, float addrspace(1)* %out, align 4
 243   ret void
 244 }
 245
 246 ; SI-LABEL: {{^}}test_div_scale_f32_all_scalar_2:
 247 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
 248 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0x1c
 249 ; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
 250 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
 251 ; SI: buffer_store_dword [[RESULT0]]
 252 ; SI: s_endpgm
 253 define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind {
 254   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
 255   %result0 = extractvalue { float, i1 } %result, 0
 256   store float %result0, float addrspace(1)* %out, align 4
 257   ret void
 258 }
 259
 260 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_1:
 261 ; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x13
 262 ; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x1d
 263 ; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
 264 ; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
 265 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
 266 ; SI: buffer_store_dwordx2 [[RESULT0]]
 267 ; SI: s_endpgm
 268 define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
 269   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
 270   %result0 = extractvalue { double, i1 } %result, 0
 271   store double %result0, double addrspace(1)* %out, align 8
 272   ret void
 273 }
 274
 275 ; SI-LABEL: {{^}}test_div_scale_f64_all_scalar_2:
 276 ; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0x13
 277 ; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0x1d
 278 ; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
 279 ; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
 280 ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
 281 ; SI: buffer_store_dwordx2 [[RESULT0]]
 282 ; SI: s_endpgm
 283 define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind {
 284   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
 285   %result0 = extractvalue { double, i1 } %result, 0
 286   store double %result0, double addrspace(1)* %out, align 8
 287   ret void
 288 }
 289
 290 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_num:
 291 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 292 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
 293 ; SI: buffer_store_dword [[RESULT0]]
 294 ; SI: s_endpgm
 295 define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
 296   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 297   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
 298   %a = load float, float addrspace(1)* %gep.0, align 4
 299
 300   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
 301   %result0 = extractvalue { float, i1 } %result, 0
 302   store float %result0, float addrspace(1)* %out, align 4
 303   ret void
 304 }
 305
 306 ; SI-LABEL: {{^}}test_div_scale_f32_inline_imm_den:
 307 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 308 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
 309 ; SI: buffer_store_dword [[RESULT0]]
 310 ; SI: s_endpgm
 311 define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
 312   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 313   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
 314   %a = load float, float addrspace(1)* %gep.0, align 4
 315
 316   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
 317   %result0 = extractvalue { float, i1 } %result, 0
 318   store float %result0, float addrspace(1)* %out, align 4
 319   ret void
 320 }
 321
 322 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_num:
 323 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
 324 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 325 ; SI: v_and_b32_e32 [[ABS_A:v[0-9]+]], 0x7fffffff, [[A]]
 326 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]]
 327 ; SI: buffer_store_dword [[RESULT0]]
 328 ; SI: s_endpgm
 329 define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
 330   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 331   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
 332   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 333
 334   %a = load volatile float, float addrspace(1)* %gep.0, align 4
 335   %b = load volatile float, float addrspace(1)* %gep.1, align 4
 336
 337   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 338
 339   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
 340   %result0 = extractvalue { float, i1 } %result, 0
 341   store float %result0, float addrspace(1)* %out, align 4
 342   ret void
 343 }
 344
 345 ; SI-LABEL: {{^}}test_div_scale_f32_fabs_den:
 346 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
 347 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 348 ; SI: v_and_b32_e32 [[ABS_B:v[0-9]+]], 0x7fffffff, [[B]]
 349 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]]
 350 ; SI: buffer_store_dword [[RESULT0]]
 351 ; SI: s_endpgm
 352 define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
 353   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
 354   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
 355   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 356
 357   %a = load volatile float, float addrspace(1)* %gep.0, align 4
 358   %b = load volatile float, float addrspace(1)* %gep.1, align 4
 359
 360   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 361
 362   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
 363   %result0 = extractvalue { float, i1 } %result, 0
 364   store float %result0, float addrspace(1)* %out, align 4
 365   ret void
 366 }
 367
 368 ; SI-LABEL: {{^}}test_div_scale_f32_val_undef_val:
 369 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 370 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]]
 371 define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 {
 372   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
 373   %result0 = extractvalue { float, i1 } %result, 0
 374   store float %result0, float addrspace(1)* %out, align 4
 375   ret void
 376 }
 377
 378 ; SI-LABEL: {{^}}test_div_scale_f32_undef_val_val:
 379 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000
 380 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}}
 381 define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 {
 382   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
 383   %result0 = extractvalue { float, i1 } %result, 0
 384   store float %result0, float addrspace(1)* %out, align 4
 385   ret void
 386 }
 387
 388 ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val:
 389 ; SI-NOT: v0
 390 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0
 391 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
 392   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
 393   %result0 = extractvalue { float, i1 } %result, 0
 394   store float %result0, float addrspace(1)* %out, align 4
 395   ret void
 396 }
 397
 398 ; SI-LABEL: {{^}}test_div_scale_f64_val_undef_val:
 399 ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}}
 400 ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000
 401 ; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, v[0:1], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}
 402 define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 {
 403   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
 404   %result0 = extractvalue { double, i1 } %result, 0
 405   store double %result0, double addrspace(1)* %out, align 8
 406   ret void
 407 }
 408
 409 attributes #0 = { nounwind }
 410 attributes #1 = { nounwind readnone speculatable }