clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx12.cl

   1 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 \
   2 // RUN:   %s -emit-llvm -o - | FileCheck %s
   3
   4 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 \
   5 // RUN:   -S -o - %s | FileCheck -check-prefix=GFX12 %s
   6
   7 // REQUIRES: amdgpu-registered-target
   8
   9 typedef half  __attribute__((ext_vector_type(2))) half2;
  10 typedef short __attribute__((ext_vector_type(2))) short2;
  11
  12 // CHECK-LABEL: test_local_add_2bf16
  13 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
  14 // CHECK-NEXT: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
  15 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
  16
  17 // GFX12-LABEL:  test_local_add_2bf16
  18 // GFX12: ds_pk_add_rtn_bf16
  19 short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
  20   return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
  21 }
  22
  23 // CHECK-LABEL: test_local_add_2bf16_noret
  24 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
  25 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4
  26 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
  27
  28 // GFX12-LABEL:  test_local_add_2bf16_noret
  29 // GFX12: ds_pk_add_bf16
  30 void test_local_add_2bf16_noret(__local short2 *addr, short2 x) {
  31   __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
  32 }
  33
  34 // CHECK-LABEL: test_local_add_2f16
  35 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
  36 // GFX12-LABEL:  test_local_add_2f16
  37 // GFX12: ds_pk_add_rtn_f16
  38 half2 test_local_add_2f16(__local half2 *addr, half2 x) {
  39   return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
  40 }
  41
  42 // CHECK-LABEL: test_local_add_2f16_noret
  43 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
  44 // GFX12-LABEL:  test_local_add_2f16_noret
  45 // GFX12: ds_pk_add_f16
  46 void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
  47   __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
  48 }
  49
  50 // CHECK-LABEL: test_flat_add_2f16
  51 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
  52
  53 // GFX12-LABEL:  test_flat_add_2f16
  54 // GFX12: flat_atomic_pk_add_f16
  55 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
  56   return __builtin_amdgcn_flat_atomic_fadd_v2f16(addr, x);
  57 }
  58
  59 // CHECK-LABEL: test_flat_add_2bf16
  60 // CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
  61 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
  62 // CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
  63
  64 // GFX12-LABEL:  test_flat_add_2bf16
  65 // GFX12: flat_atomic_pk_add_bf16
  66 short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) {
  67   return __builtin_amdgcn_flat_atomic_fadd_v2bf16(addr, x);
  68 }
  69
  70 // CHECK-LABEL: test_global_add_half2
  71 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
  72
  73 // GFX12-LABEL:  test_global_add_half2
  74 // GFX12:  global_atomic_pk_add_f16 v2, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off th:TH_ATOMIC_RETURN
  75 void test_global_add_half2(__global half2 *addr, half2 x) {
  76   half2 *rtn;
  77   *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
  78 }
  79
  80 // CHECK-LABEL: test_global_add_half2_noret
  81 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
  82
  83 // GFX12-LABEL:  test_global_add_half2_noret
  84 // GFX12:  global_atomic_pk_add_f16 v[0:1], v2, off
  85 void test_global_add_half2_noret(__global half2 *addr, half2 x) {
  86   __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
  87 }
  88
  89 // CHECK-LABEL: test_global_add_2bf16
  90 // CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
  91 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
  92 // CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
  93
  94
  95 // GFX12-LABEL:  test_global_add_2bf16
  96 // GFX12: global_atomic_pk_add_bf16 v2, v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off th:TH_ATOMIC_RETURN
  97 void test_global_add_2bf16(__global short2 *addr, short2 x) {
  98   short2 *rtn;
  99   *rtn = __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x);
 100 }
 101
 102 // CHECK-LABEL: test_global_add_2bf16_noret
 103 // CHECK: [[BC:%.+]] = bitcast <2 x i16> %{{.+}} to <2 x bfloat>
 104 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
 105 // CHECK: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
 106
 107 // GFX12-LABEL:  test_global_add_2bf16_noret
 108 // GFX12: global_atomic_pk_add_bf16 v[0:1], v2, off
 109 void test_global_add_2bf16_noret(__global short2 *addr, short2 x) {
 110   __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x);
 111 }