1 # RUN: llc -march=amdgcn -mcpu=fiji -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
2 # RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-peephole-sdwa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDWA %s
4 # SDWA-LABEL: {{^}}add_f16_u32_preserve
6 # SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
7 # SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
9 # SDWA: v_mul_f32_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_3
10 # SDWA: v_add_f16_sdwa [[RES:v[0-9]+]], [[FIRST]], [[SECOND]] dst_sel:BYTE_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 src1_sel:WORD_1
12 # SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[RES]]
15 name: add_f16_u32_preserve
16 tracksRegLiveness: true
18 - { id: 0, class: vreg_64 }
19 - { id: 1, class: vreg_64 }
20 - { id: 2, class: sreg_64 }
21 - { id: 3, class: vgpr_32 }
22 - { id: 4, class: vgpr_32 }
23 - { id: 5, class: vgpr_32 }
24 - { id: 6, class: vgpr_32 }
25 - { id: 7, class: vgpr_32 }
26 - { id: 8, class: vgpr_32 }
27 - { id: 9, class: vgpr_32 }
28 - { id: 10, class: vgpr_32 }
29 - { id: 11, class: vgpr_32 }
30 - { id: 12, class: vgpr_32 }
31 - { id: 13, class: vgpr_32 }
34 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
36 %2 = COPY $sgpr30_sgpr31
37 %1 = COPY $vgpr2_vgpr3
38 %0 = COPY $vgpr0_vgpr1
39 %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
40 %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
42 %5 = V_AND_B32_e32 65535, %3, implicit $exec
43 %6 = V_LSHRREV_B32_e64 16, %4, implicit $exec
44 %7 = V_BFE_U32 %3, 8, 8, implicit $exec
45 %8 = V_LSHRREV_B32_e32 24, %4, implicit $exec
47 %9 = V_ADD_F16_e64 0, %5, 0, %6, 0, 0, implicit $exec
48 %10 = V_LSHLREV_B16_e64 8, %9, implicit $exec
49 %11 = V_MUL_F32_e64 0, %7, 0, %8, 0, 0, implicit $exec
50 %12 = V_LSHLREV_B32_e64 16, %11, implicit $exec
52 %13 = V_OR_B32_e64 %10, %12, implicit $exec
54 FLAT_STORE_DWORD %0, %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
55 $sgpr30_sgpr31 = COPY %2
56 S_SETPC_B64_return $sgpr30_sgpr31
59 # SDWA-LABEL: sdwa_preserve_keep
60 # SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
61 # SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
63 # SDWA: v_and_b32_e32 [[AND:v[0-9]+]], 0xff, [[FIRST]]
64 # SDWA: v_mov_b32_sdwa [[AND]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
66 # SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[AND]]
68 name: sdwa_preserve_keep
69 tracksRegLiveness: true
71 - { id: 0, class: vreg_64 }
72 - { id: 1, class: vreg_64 }
73 - { id: 2, class: sreg_64 }
74 - { id: 3, class: vgpr_32 }
75 - { id: 4, class: vgpr_32 }
76 - { id: 5, class: sreg_32_xm0_xexec }
77 - { id: 6, class: vgpr_32 }
78 - { id: 7, class: vgpr_32 }
79 - { id: 8, class: sreg_32_xm0 }
80 - { id: 9, class: vgpr_32 }
81 - { id: 10, class: sreg_32_xm0 }
82 - { id: 11, class: vgpr_32 }
83 - { id: 17, class: vgpr_32 }
86 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
88 %2 = COPY $sgpr30_sgpr31
89 %1 = COPY $vgpr2_vgpr3
90 %0 = COPY $vgpr0_vgpr1
91 %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
92 %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
94 %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
95 %10:sreg_32_xm0 = S_MOV_B32 255
96 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
97 %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
98 FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
103 # SDWA-LABEL: sdwa_preserve_remove
104 # SDWA: flat_load_dword [[FIRST:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
105 # SDWA: flat_load_dword [[SECOND:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]
107 # SDWA: v_mov_b32_sdwa [[FIRST]], [[SECOND]] dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
109 # SDWA: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], [[FIRST]]
111 name: sdwa_preserve_remove
112 tracksRegLiveness: true
114 - { id: 0, class: vreg_64 }
115 - { id: 1, class: vreg_64 }
116 - { id: 2, class: sreg_64 }
117 - { id: 3, class: vgpr_32 }
118 - { id: 4, class: vgpr_32 }
119 - { id: 5, class: sreg_32_xm0_xexec }
120 - { id: 6, class: vgpr_32 }
121 - { id: 7, class: vgpr_32 }
122 - { id: 8, class: sreg_32_xm0 }
123 - { id: 9, class: vgpr_32 }
124 - { id: 10, class: sreg_32_xm0 }
125 - { id: 11, class: vgpr_32 }
126 - { id: 17, class: vgpr_32 }
129 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $sgpr30_sgpr31
131 %2 = COPY $sgpr30_sgpr31
132 %1 = COPY $vgpr2_vgpr3
133 %0 = COPY $vgpr0_vgpr1
134 %3 = FLAT_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
135 %4 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
137 %9:vgpr_32 = V_LSHRREV_B16_e64 8, %3, implicit $exec
138 %10:sreg_32_xm0 = S_MOV_B32 65535
139 %11:vgpr_32 = V_AND_B32_e64 %3, killed %10, implicit $exec
140 %17:vgpr_32 = V_MOV_B32_sdwa 0, %4, 0, 5, 2, 4, implicit $exec, implicit %11(tied-def 0)
141 FLAT_STORE_DWORD %0, %17, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)