1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
2 # RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
3 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
4 # RUN: llc -mtriple=amdgcn -mcpu=fiji -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
5 # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes=si-peephole-sdwa -o - %s | FileCheck -check-prefix=SDWA %s
7 name: add_f16_u32_preserve
8 tracksRegLiveness: true
10 - { id: 0, class: vreg_64 }
11 - { id: 1, class: vreg_64 }
12 - { id: 2, class: sreg_64 }
13 - { id: 3, class: vgpr_32 }
14 - { id: 4, class: vgpr_32 }
15 - { id: 5, class: vgpr_32 }
16 - { id: 6, class: vgpr_32 }
17 - { id: 7, class: vgpr_32 }
18 - { id: 8, class: vgpr_32 }
19 - { id: 9, class: vgpr_32 }
20 - { id: 10, class: vgpr_32 }
21 - { id: 11, class: vgpr_32 }
22 - { id: 12, class: vgpr_32 }
23 - { id: 13, class: vgpr_32 }
26 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
28 ; SDWA-LABEL: name: add_f16_u32_preserve
29 ; SDWA: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
31 ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
32 ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
33 ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
34 ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
35 ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
36 ; SDWA-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
37 ; SDWA-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
38 ; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
39 ; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
40 ; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
41 ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
42 ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
43 ; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
44 ; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
45 %2 = COPY $sgpr30_sgpr31
46 %1 = COPY $vgpr2_vgpr3
47 %0 = COPY $vgpr0_vgpr1
48 %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
49 %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
51 %5 = V_AND_B32_e32 65535, %3, implicit $exec
52 %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
53 %7 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
54 %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec
56 %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
57 %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
58 %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
59 %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
61 %13 = V_OR_B32_e64 %10, %12, implicit $exec
63 FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
64 $sgpr30_sgpr31 = COPY %2
65 S_SETPC_B64_return $sgpr30_sgpr31
68 name: sdwa_preserve_keep
69 tracksRegLiveness: true
71 - { id: 0, class: vreg_64 }
72 - { id: 1, class: vreg_64 }
73 - { id: 2, class: sreg_64 }
74 - { id: 3, class: vgpr_32 }
75 - { id: 4, class: vgpr_32 }
76 - { id: 5, class: sreg_32_xm0_xexec }
77 - { id: 6, class: vgpr_32 }
78 - { id: 7, class: vgpr_32 }
79 - { id: 8, class: sreg_32_xm0 }
80 - { id: 9, class: vgpr_32 }
81 - { id: 10, class: sreg_32_xm0 }
82 - { id: 11, class: vgpr_32 }
83 - { id: 17, class: vgpr_32 }
86 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
88 ; SDWA-LABEL: name: sdwa_preserve_keep
89 ; SDWA: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
91 ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
92 ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
93 ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
94 ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
95 ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
96 ; SDWA-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 8, [[FLAT_LOAD_DWORD]], implicit $exec
97 ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 255
98 ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_]], implicit $exec
99 ; SDWA-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[FLAT_LOAD_DWORD1]], 0, 5, 2, 4, implicit $exec, implicit [[V_AND_B32_e64_]](tied-def 0)
100 ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_MOV_B32_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
101 ; SDWA-NEXT: S_ENDPGM 0
102 %2 = COPY $sgpr30_sgpr31
103 %1 = COPY $vgpr2_vgpr3
104 %0 = COPY $vgpr0_vgpr1
105 %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
106 %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
108 %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
109 %10:sreg_32_xm0 = S_MOV_B32 255
110 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
111 %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
112 FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
117 name: sdwa_preserve_remove
118 tracksRegLiveness: true
120 - { id: 0, class: vreg_64 }
121 - { id: 1, class: vreg_64 }
122 - { id: 2, class: sreg_64 }
123 - { id: 3, class: vgpr_32 }
124 - { id: 4, class: vgpr_32 }
125 - { id: 5, class: sreg_32_xm0_xexec }
126 - { id: 6, class: vgpr_32 }
127 - { id: 7, class: vgpr_32 }
128 - { id: 8, class: sreg_32_xm0 }
129 - { id: 9, class: vgpr_32 }
130 - { id: 10, class: sreg_32_xm0 }
131 - { id: 11, class: vgpr_32 }
132 - { id: 17, class: vgpr_32 }
135 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
137 ; SDWA-LABEL: name: sdwa_preserve_remove
138 ; SDWA: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
140 ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
141 ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
142 ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
143 ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
144 ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
145 ; SDWA-NEXT: [[V_LSHRREV_B16_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B16_e64 8, [[FLAT_LOAD_DWORD]], implicit $exec
146 ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 65535
147 ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[FLAT_LOAD_DWORD]], killed [[S_MOV_B32_]], implicit $exec
148 ; SDWA-NEXT: [[V_MOV_B32_sdwa:%[0-9]+]]:vgpr_32 = V_MOV_B32_sdwa 0, [[FLAT_LOAD_DWORD1]], 0, 5, 2, 4, implicit $exec, implicit [[FLAT_LOAD_DWORD]](tied-def 0)
149 ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_MOV_B32_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
150 ; SDWA-NEXT: S_ENDPGM 0
151 %2 = COPY $sgpr30_sgpr31
152 %1 = COPY $vgpr2_vgpr3
153 %0 = COPY $vgpr0_vgpr1
154 %3 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
155 %4 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
157 %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
158 %10:sreg_32_xm0 = S_MOV_B32 65535
159 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
160 %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
161 FLAT_STORE_DWORD %0, %17, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
166 name: add_f16_u32_preserve_different_bb
167 tracksRegLiveness: true
169 ; SDWA-LABEL: name: add_f16_u32_preserve_different_bb
171 ; SDWA-NEXT: successors: %bb.1(0x80000000)
172 ; SDWA-NEXT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
174 ; SDWA-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31
175 ; SDWA-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
176 ; SDWA-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
177 ; SDWA-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
178 ; SDWA-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
179 ; SDWA-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[FLAT_LOAD_DWORD]], implicit $exec
180 ; SDWA-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, [[FLAT_LOAD_DWORD1]], implicit $exec
181 ; SDWA-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[FLAT_LOAD_DWORD]], 8, 8, implicit $exec
182 ; SDWA-NEXT: [[V_LSHRREV_B32_e32_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e32 24, [[FLAT_LOAD_DWORD1]], implicit $exec
185 ; SDWA-NEXT: successors: %bb.2(0x80000000)
187 ; SDWA-NEXT: [[V_MUL_F32_sdwa:%[0-9]+]]:vgpr_32 = V_MUL_F32_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 5, 0, 1, 3, implicit $mode, implicit $exec
190 ; SDWA-NEXT: [[V_ADD_F16_sdwa:%[0-9]+]]:vgpr_32 = V_ADD_F16_sdwa 0, [[FLAT_LOAD_DWORD]], 0, [[FLAT_LOAD_DWORD1]], 0, 0, 1, 2, 4, 5, implicit $mode, implicit $exec, implicit [[V_MUL_F32_sdwa]](tied-def 0)
191 ; SDWA-NEXT: FLAT_STORE_DWORD [[COPY2]], [[V_ADD_F16_sdwa]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
192 ; SDWA-NEXT: $sgpr30_sgpr31 = COPY [[COPY]]
193 ; SDWA-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
195 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
197 %2:sreg_64 = COPY $sgpr30_sgpr31
198 %1:vreg_64 = COPY $vgpr2_vgpr3
199 %0:vreg_64 = COPY $vgpr0_vgpr1
200 %3:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
201 %4:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32))
203 %5:vgpr_32 = V_AND_B32_e32 65535, %3, implicit $exec
204 %6:vgpr_32 = V_LSHRREV_B32_e64 16, %4, implicit $exec
205 %7:vgpr_32 = V_BFE_U32_e64 %3, 8, 8, implicit $exec
206 %8:vgpr_32 = V_LSHRREV_B32_e32 24, %4, implicit $exec
208 %9:vgpr_32 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $mode, implicit $exec
209 %10:vgpr_32 = V_LSHLREV_B16_e64 8, %9, implicit $exec
212 %11:vgpr_32 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $mode, implicit $exec
213 %12:vgpr_32 = V_LSHLREV_B32_e64 16, %11, implicit $exec
216 %13:vgpr_32 = V_OR_B32_e64 %10, %12, implicit $exec
218 FLAT_STORE_DWORD %0, %13, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32))
219 $sgpr30_sgpr31 = COPY %2
220 S_SETPC_B64_return $sgpr30_sgpr31
223 # Should not add kill flag to reused ops in SDWAInst
227 tracksRegLiveness: true
231 ; SDWA-LABEL: name: multiuse_kill
232 ; SDWA: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
233 ; SDWA-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
234 ; SDWA-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
235 ; SDWA-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF]], implicit $exec
236 ; SDWA-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
237 ; SDWA-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed [[S_MOV_B32_]], [[DEF1]], implicit $exec
238 ; SDWA-NEXT: [[V_OR_B32_sdwa:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_]], 0, 6, 0, 4, 6, implicit $exec
239 ; SDWA-NEXT: [[V_LSHLREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, killed [[DEF2]], implicit $exec
240 ; SDWA-NEXT: [[V_OR_B32_sdwa1:%[0-9]+]]:vgpr_32 = V_OR_B32_sdwa 0, [[DEF1]], 0, [[V_LSHLREV_B32_e64_1]], 0, 6, 0, 4, 6, implicit $exec
241 ; SDWA-NEXT: S_ENDPGM 0
242 %0:vgpr_32 = IMPLICIT_DEF
243 %1:vgpr_32 = IMPLICIT_DEF
244 %2:vgpr_32 = IMPLICIT_DEF
245 %3:vgpr_32 = V_LSHLREV_B32_e64 16, killed %0, implicit $exec
246 %4:sreg_32 = S_MOV_B32 65535
247 %5:vgpr_32 = V_AND_B32_e64 killed %4, killed %1, implicit $exec
248 %6:vgpr_32 = V_OR_B32_e64 %5, killed %3, implicit $exec
249 %7:vgpr_32 = V_LSHLREV_B32_e64 16, killed %2, implicit $exec
250 %8:vgpr_32 = V_OR_B32_e64 %5, killed %7, implicit $exec