1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,CI %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,VI %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8 define double @v_trig_preop_f64(double %a, i32 %b) {
9 ; GCN-LABEL: v_trig_preop_f64:
11 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
13 ; GCN-NEXT: s_setpc_b64 s[30:31]
15 ; GFX10PLUS-LABEL: v_trig_preop_f64:
17 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18 ; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], v2
19 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
20 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
24 define double @v_trig_preop_f64_imm(double %a, i32 %b) {
25 ; GCN-LABEL: v_trig_preop_f64_imm:
27 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28 ; GCN-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
29 ; GCN-NEXT: s_setpc_b64 s[30:31]
31 ; GFX10PLUS-LABEL: v_trig_preop_f64_imm:
33 ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34 ; GFX10PLUS-NEXT: v_trig_preop_f64 v[0:1], v[0:1], 7
35 ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
36 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
40 define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
41 ; CI-LABEL: s_trig_preop_f64:
43 ; CI-NEXT: s_load_dword s2, s[8:9], 0x2
44 ; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
45 ; CI-NEXT: s_waitcnt lgkmcnt(0)
46 ; CI-NEXT: v_mov_b32_e32 v0, s2
47 ; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
48 ; CI-NEXT: s_add_u32 s0, s0, 4
49 ; CI-NEXT: s_addc_u32 s1, s1, 0
50 ; CI-NEXT: v_mov_b32_e32 v3, s1
51 ; CI-NEXT: v_mov_b32_e32 v2, s0
52 ; CI-NEXT: flat_store_dword v[0:1], v0
53 ; CI-NEXT: s_waitcnt vmcnt(0)
54 ; CI-NEXT: flat_store_dword v[2:3], v1
55 ; CI-NEXT: s_waitcnt vmcnt(0)
58 ; VI-LABEL: s_trig_preop_f64:
60 ; VI-NEXT: s_load_dword s2, s[8:9], 0x8
61 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
62 ; VI-NEXT: s_waitcnt lgkmcnt(0)
63 ; VI-NEXT: v_mov_b32_e32 v0, s2
64 ; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
65 ; VI-NEXT: s_add_u32 s0, s0, 4
66 ; VI-NEXT: s_addc_u32 s1, s1, 0
67 ; VI-NEXT: v_mov_b32_e32 v3, s1
68 ; VI-NEXT: v_mov_b32_e32 v2, s0
69 ; VI-NEXT: flat_store_dword v[0:1], v0
70 ; VI-NEXT: s_waitcnt vmcnt(0)
71 ; VI-NEXT: flat_store_dword v[2:3], v1
72 ; VI-NEXT: s_waitcnt vmcnt(0)
75 ; GFX9-LABEL: s_trig_preop_f64:
77 ; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8
78 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
79 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
80 ; GFX9-NEXT: v_mov_b32_e32 v0, s2
81 ; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
82 ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
83 ; GFX9-NEXT: s_waitcnt vmcnt(0)
86 ; GFX10-LABEL: s_trig_preop_f64:
88 ; GFX10-NEXT: s_clause 0x1
89 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
90 ; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8
91 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
92 ; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
93 ; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
94 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
95 ; GFX10-NEXT: s_endpgm
97 ; GFX11-LABEL: s_trig_preop_f64:
99 ; GFX11-NEXT: s_clause 0x1
100 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
101 ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
102 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
103 ; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
104 ; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
105 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
106 ; GFX11-NEXT: s_endpgm
107 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
108 store volatile double %result, ptr undef
112 define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
113 ; CI-LABEL: s_trig_preop_f64_imm:
115 ; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
116 ; CI-NEXT: s_waitcnt lgkmcnt(0)
117 ; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
118 ; CI-NEXT: s_add_u32 s0, s0, 4
119 ; CI-NEXT: s_addc_u32 s1, s1, 0
120 ; CI-NEXT: v_mov_b32_e32 v3, s1
121 ; CI-NEXT: v_mov_b32_e32 v2, s0
122 ; CI-NEXT: flat_store_dword v[0:1], v0
123 ; CI-NEXT: s_waitcnt vmcnt(0)
124 ; CI-NEXT: flat_store_dword v[2:3], v1
125 ; CI-NEXT: s_waitcnt vmcnt(0)
128 ; VI-LABEL: s_trig_preop_f64_imm:
130 ; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
131 ; VI-NEXT: s_waitcnt lgkmcnt(0)
132 ; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
133 ; VI-NEXT: s_add_u32 s0, s0, 4
134 ; VI-NEXT: s_addc_u32 s1, s1, 0
135 ; VI-NEXT: v_mov_b32_e32 v3, s1
136 ; VI-NEXT: v_mov_b32_e32 v2, s0
137 ; VI-NEXT: flat_store_dword v[0:1], v0
138 ; VI-NEXT: s_waitcnt vmcnt(0)
139 ; VI-NEXT: flat_store_dword v[2:3], v1
140 ; VI-NEXT: s_waitcnt vmcnt(0)
143 ; GFX9-LABEL: s_trig_preop_f64_imm:
145 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
146 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
147 ; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
148 ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
149 ; GFX9-NEXT: s_waitcnt vmcnt(0)
150 ; GFX9-NEXT: s_endpgm
152 ; GFX10-LABEL: s_trig_preop_f64_imm:
154 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
155 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
156 ; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
157 ; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
158 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
159 ; GFX10-NEXT: s_endpgm
161 ; GFX11-LABEL: s_trig_preop_f64_imm:
163 ; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
164 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
165 ; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
166 ; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
167 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
168 ; GFX11-NEXT: s_endpgm
169 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
170 store volatile double %result, ptr undef
174 declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
176 attributes #0 = { nounwind readnone speculatable }