1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s
4 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 define double @v_trig_preop_f64(double %a, i32 %b) {
9 ; GCN-LABEL: v_trig_preop_f64:
11 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
13 ; GCN-NEXT: s_setpc_b64 s[30:31]
15 ; GFX10PLUS-LABEL: v_trig_preop_f64:
17 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
19 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
20 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
24 define double @v_trig_preop_f64_imm(double %a, i32 %b) {
25 ; GCN-LABEL: v_trig_preop_f64_imm:
27 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28 ; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
29 ; GCN-NEXT: s_setpc_b64 s[30:31]
31 ; GFX10PLUS-LABEL: v_trig_preop_f64_imm:
33 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
35 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
36 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
40 define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
41 ; CI-LABEL: s_trig_preop_f64:
43 ; CI-NEXT: s_load_dword s2, s[4:5], 0x2
44 ; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
45 ; CI-NEXT: s_waitcnt lgkmcnt(0)
46 ; CI-NEXT: v_mov_b32_e32 v0, s2
47 ; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
48 ; CI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
49 ; CI-NEXT: s_waitcnt vmcnt(0)
52 ; VI-LABEL: s_trig_preop_f64:
54 ; VI-NEXT: s_load_dword s2, s[4:5], 0x8
55 ; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
56 ; VI-NEXT: s_waitcnt lgkmcnt(0)
57 ; VI-NEXT: v_mov_b32_e32 v0, s2
58 ; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
59 ; VI-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
60 ; VI-NEXT: s_waitcnt vmcnt(0)
63 ; GFX9-LABEL: s_trig_preop_f64:
65 ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
66 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
67 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
68 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
69 ; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
70 ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
71 ; GFX9-NEXT: s_waitcnt vmcnt(0)
74 ; GFX10-LABEL: s_trig_preop_f64:
76 ; GFX10-NEXT: s_clause 0x1
77 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
78 ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
79 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
80 ; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
81 ; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
82 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
83 ; GFX10-NEXT: s_endpgm
85 ; GFX11-LABEL: s_trig_preop_f64:
87 ; GFX11-NEXT: s_clause 0x1
88 ; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
89 ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8
90 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
91 ; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[2:3], s0
92 ; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
93 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
94 ; GFX11-NEXT: s_endpgm
95 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
96 store volatile double %result, ptr undef
100 define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
101 ; GCN-LABEL: s_trig_preop_f64_imm:
103 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
104 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
105 ; GCN-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
106 ; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
107 ; GCN-NEXT: s_waitcnt vmcnt(0)
110 ; GFX10-LABEL: s_trig_preop_f64_imm:
112 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
113 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
114 ; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
115 ; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
116 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
117 ; GFX10-NEXT: s_endpgm
119 ; GFX11-LABEL: s_trig_preop_f64_imm:
121 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
122 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
123 ; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
124 ; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
125 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
126 ; GFX11-NEXT: s_endpgm
127 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
128 store volatile double %result, ptr undef
132 declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
134 attributes #0 = { nounwind readnone speculatable }