clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

   1 // REQUIRES: amdgpu-registered-target
   2 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu tonga -S -emit-llvm -o - %s | FileCheck %s
   3 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -S -emit-llvm -o - %s | FileCheck %s
   4 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck %s
   5 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck %s
   6
   7 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
   8
   9 typedef unsigned long ulong;
  10 typedef unsigned int  uint;
  11
  12 // CHECK-LABEL: @test_div_fixup_f16
  13 // CHECK: call half @llvm.amdgcn.div.fixup.f16
  14 void test_div_fixup_f16(global half* out, half a, half b, half c)
  15 {
  16   *out = __builtin_amdgcn_div_fixuph(a, b, c);
  17 }
  18
  19 // CHECK-LABEL: @test_rcp_f16
  20 // CHECK: call half @llvm.amdgcn.rcp.f16
  21 void test_rcp_f16(global half* out, half a)
  22 {
  23   *out = __builtin_amdgcn_rcph(a);
  24 }
  25
  26 // CHECK-LABEL: @test_sqrt_f16
  27 // CHECK: call half @llvm.sqrt.f16
  28 void test_sqrt_f16(global half* out, half a)
  29 {
  30   *out = __builtin_amdgcn_sqrth(a);
  31 }
  32
  33 // CHECK-LABEL: @test_rsq_f16
  34 // CHECK: call half @llvm.amdgcn.rsq.f16
  35 void test_rsq_f16(global half* out, half a)
  36 {
  37   *out = __builtin_amdgcn_rsqh(a);
  38 }
  39
  40 // CHECK-LABEL: @test_sin_f16
  41 // CHECK: call half @llvm.amdgcn.sin.f16
  42 void test_sin_f16(global half* out, half a)
  43 {
  44   *out = __builtin_amdgcn_sinh(a);
  45 }
  46
  47 // CHECK-LABEL: @test_cos_f16
  48 // CHECK: call half @llvm.amdgcn.cos.f16
  49 void test_cos_f16(global half* out, half a)
  50 {
  51   *out = __builtin_amdgcn_cosh(a);
  52 }
  53
  54 // CHECK-LABEL: @test_ldexp_f16
  55 // CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
  56 // CHECK: call half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
  57 void test_ldexp_f16(global half* out, half a, int b)
  58 {
  59   *out = __builtin_amdgcn_ldexph(a, b);
  60 }
  61
  62 // CHECK-LABEL: @test_frexp_mant_f16
  63 // CHECK: call half @llvm.amdgcn.frexp.mant.f16
  64 void test_frexp_mant_f16(global half* out, half a)
  65 {
  66   *out = __builtin_amdgcn_frexp_manth(a);
  67 }
  68
  69 // CHECK-LABEL: @test_frexp_exp_f16
  70 // CHECK: call i16 @llvm.amdgcn.frexp.exp.i16.f16
  71 void test_frexp_exp_f16(global short* out, half a)
  72 {
  73   *out = __builtin_amdgcn_frexp_exph(a);
  74 }
  75
  76 // CHECK-LABEL: @test_fract_f16
  77 // CHECK: call half @llvm.amdgcn.fract.f16
  78 void test_fract_f16(global half* out, half a)
  79 {
  80   *out = __builtin_amdgcn_fracth(a);
  81 }
  82
  83 // CHECK-LABEL: @test_class_f16
  84 // CHECK: call i1 @llvm.amdgcn.class.f16
  85 void test_class_f16(global half* out, half a, int b)
  86 {
  87   *out = __builtin_amdgcn_classh(a, b);
  88 }
  89
  90 // CHECK-LABEL: @test_s_memrealtime
  91 // CHECK: call i64 @llvm.amdgcn.s.memrealtime()
  92 void test_s_memrealtime(global ulong* out)
  93 {
  94   *out = __builtin_amdgcn_s_memrealtime();
  95 }
  96
  97 // CHECK-LABEL: @test_s_dcache_wb()
  98 // CHECK: call void @llvm.amdgcn.s.dcache.wb()
  99 void test_s_dcache_wb()
 100 {
 101   __builtin_amdgcn_s_dcache_wb();
 102 }
 103
 104 // CHECK-LABEL: @test_mov_dpp
 105 // CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
 106 void test_mov_dpp(global int* out, int src)
 107 {
 108   *out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
 109 }
 110
 111 // CHECK-LABEL: @test_update_dpp
 112 // CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
 113 void test_update_dpp(global int* out, int arg1, int arg2)
 114 {
 115   *out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
 116 }
 117
 118 // CHECK-LABEL: @test_ds_fadd
 119 // CHECK: call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
 120 void test_ds_faddf(local float *out, float src) {
 121   *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
 122 }
 123
 124 // CHECK-LABEL: @test_ds_fmin
 125 // CHECK: call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
 126 void test_ds_fminf(local float *out, float src) {
 127   *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
 128 }
 129
 130 // CHECK-LABEL: @test_ds_fmax
 131 // CHECK: call float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
 132 void test_ds_fmaxf(local float *out, float src) {
 133   *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
 134 }
 135
 136 // CHECK-LABEL: @test_s_memtime
 137 // CHECK: call i64 @llvm.amdgcn.s.memtime()
 138 void test_s_memtime(global ulong* out)
 139 {
 140   *out = __builtin_amdgcn_s_memtime();
 141 }
 142
 143 // CHECK-LABEL: @test_perm
 144 // CHECK: call i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
 145 void test_perm(global uint* out, uint a, uint b, uint s)
 146 {
 147   *out = __builtin_amdgcn_perm(a, b, s);
 148 }
 149
 150 // CHECK-LABEL: @test_groupstaticsize
 151 // CHECK: call i32 @llvm.amdgcn.groupstaticsize()
 152 void test_groupstaticsize(global uint* out)
 153 {
 154   *out = __builtin_amdgcn_groupstaticsize();
 155 }