Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / MC / AMDGPU / gfx11_asm_vopd_features.s
blobc442800eb06dbf20133deae9e7c11426ec5d8214
1 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX11 %s
3 //===----------------------------------------------------------------------===//
4 // A VOPD instruction can use one or more literals,
5 // provided that they are identical.
6 //===----------------------------------------------------------------------===//
8 // LITERAL
10 v_dual_mul_f32 v11, v1, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
11 // GFX11: encoding: [0x01,0x05,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
13 // LITERAL*2
15 v_dual_mul_f32 v11, 0x24681357, v2 :: v_dual_mul_f32 v10, 0x24681357, v5
16 // GFX11: encoding: [0xff,0x04,0xc6,0xc8,0xff,0x0a,0x0a,0x0b,0x57,0x13,0x68,0x24]
18 // LITERAL*2 (this is an unclear case because literals have different size, but SP3 accepts this code)
20 v_dual_add_f32 v6, 0xfe0b, v5 :: v_dual_dot2acc_f32_f16 v255, 0xfe0b, v4
21 // GFX11: encoding: [0xff,0x0a,0x18,0xc9,0xff,0x08,0xfe,0x06,0x0b,0xfe,0x00,0x00]
23 // LITERAL + KIMM
25 v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ;
26 // GFX11: encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
28 // KIMM + LITERAL
30 v_dual_fmamk_f32 v122, v74, 0xa0172923, v161 :: v_dual_lshlrev_b32 v247, 0xa0172923, v99
31 // GFX11: encoding: [0x4a,0x43,0xa3,0xc8,0xff,0xc6,0xf6,0x7a,0x23,0x29,0x17,0xa0]
33 // KIMM + LITERAL (this is an unclear case because literals have different size, but SP3 accepts this code)
35 v_dual_fmamk_f32 v122, v74, 0xfe0b, v162 :: v_dual_dot2acc_f32_f16 v247, 0xfe0b, v99
36 // GFX11: encoding: [0x4a,0x45,0x99,0xc8,0xff,0xc6,0xf6,0x7a,0x0b,0xfe,0x00,0x00]
38 // KIMM*2
40 v_dual_fmamk_f32 v122, 0xdeadbeef, 0xdeadbeef, v161 :: v_dual_fmamk_f32 v123, 0xdeadbeef, 0xdeadbeef, v162
41 // GFX11: encoding: [0xff,0x42,0x85,0xc8,0xff,0x44,0x7b,0x7a,0xef,0xbe,0xad,0xde]
43 //===----------------------------------------------------------------------===//
44 // A VOPD instruction can use 2 scalar operands,
45 // but implicit VCC must be counted in.
46 //===----------------------------------------------------------------------===//
48 // 2 different SGPRs
50 v_dual_mul_f32 v0, s1, v2 :: v_dual_mul_f32 v3, s4, v5
51 // GFX11: encoding: [0x01,0x04,0xc6,0xc8,0x04,0x0a,0x02,0x00]
53 // SGPR + LITERAL
55 v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, v160, v98
56 // GFX11: encoding: [0x4a,0x42,0x65,0xc8,0xa0,0xc5,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
58 v_dual_mov_b32 v247, v160 :: v_dual_fmaak_f32 v122, s74, v161, 2.741
59 // GFX11: encoding: [0xa0,0x01,0x02,0xca,0x4a,0x42,0x7b,0xf7,0x8b,0x6c,0x2f,0x40]
61 // SGPR*2 + LITERAL
63 v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_and_b32 v247, s74, v98
64 // GFX11: encoding: [0x4a,0x42,0x65,0xc8,0x4a,0xc4,0xf6,0x7a,0x8b,0x6c,0x2f,0x40]
66 // SGPR + LITERAL*2
68 v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, v6, 2.741, v1
69 // GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x06,0x03,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
71 // SGPR*2 + LITERAL*2
73 v_dual_fmaak_f32 v122, s74, v161, 2.741 :: v_dual_fmamk_f32 v3, s74, 2.741, v1
74 // GFX11: encoding: [0x4a,0x42,0x45,0xc8,0x4a,0x02,0x02,0x7a,0x8b,0x6c,0x2f,0x40]
76 // LITERAL + VCC
78 v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, v2, v3
79 // GFX11: encoding: [0x00,0x43,0x53,0xc8,0x02,0x07,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
81 // LITERAL*2 + VCC
83 v_dual_fmaak_f32 v122, v0, v161, 2.741 :: v_dual_cndmask_b32 v1, 2.741, v3
84 // GFX11: encoding: [0x00,0x43,0x53,0xc8,0xff,0x06,0x00,0x7a,0x8b,0x6c,0x2f,0x40]
86 // LITERAL*2 + VCC*2
88 v_dual_cndmask_b32 v255, 0xbabe, v2 :: v_dual_cndmask_b32 v6, 0xbabe, v3
89 // GFX11: encoding: [0xff,0x04,0x52,0xca,0xff,0x06,0x06,0xff,0xbe,0xba,0x00,0x00]
91 // SGPR*2 + VCC
93 v_dual_add_f32 v255, s105, v2 :: v_dual_cndmask_b32 v6, s105, v3
94 // GFX11: encoding: [0x69,0x04,0x12,0xc9,0x69,0x06,0x06,0xff]
96 // SGPR*2 + VCC*2
98 v_dual_cndmask_b32 v255, s1, v2 :: v_dual_cndmask_b32 v6, s1, v3
99 // GFX11: encoding: [0x01,0x04,0x52,0xca,0x01,0x06,0x06,0xff]
101 // VCC*2
103 v_dual_add_f32 v255, vcc_lo, v2 :: v_dual_cndmask_b32 v6, v1, v3
104 // GFX11: encoding: [0x6a,0x04,0x12,0xc9,0x01,0x07,0x06,0xff]
106 //===----------------------------------------------------------------------===//
107 // One dst register must be even and the other odd.
108 //===----------------------------------------------------------------------===//
110 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v21
111 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00]
113 v_dual_mul_f32 v1, v10, v20 :: v_dual_mul_f32 v0, v11, v21
114 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x01]
116 //===----------------------------------------------------------------------===//
117 // srcX0 and srcY0 must use different VGPR banks.
118 //===----------------------------------------------------------------------===//
120 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v21
121 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00]
123 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v12, v21
124 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0c,0x2b,0x00,0x00]
126 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v13, v21
127 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0d,0x2b,0x00,0x00]
129 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v15, v21
130 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0f,0x2b,0x00,0x00]
132 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v16, v21
133 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x10,0x2b,0x00,0x00]
135 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v17, v21
136 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x11,0x2b,0x00,0x00]
138 //===----------------------------------------------------------------------===//
139 // srcX1 and srcY1 must use different VGPR banks.
140 //===----------------------------------------------------------------------===//
142 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v21
143 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2b,0x00,0x00]
145 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v22
146 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2d,0x00,0x00]
148 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v23
149 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x2f,0x00,0x00]
151 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v25
152 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x33,0x00,0x00]
154 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v26
155 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x35,0x00,0x00]
157 v_dual_mul_f32 v0, v10, v20 :: v_dual_mul_f32 v1, v11, v27
158 // GFX11: encoding: [0x0a,0x29,0xc6,0xc8,0x0b,0x37,0x00,0x00]
160 //===----------------------------------------------------------------------===//
161 // srcX2 and srcY2 must use different VGPR banks.
162 //===----------------------------------------------------------------------===//
164 v_dual_fmamk_f32 v6, v1, 0xaf123456, v0 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v1
165 // GFX11: encoding: [0x01,0x01,0x84,0xc8,0x02,0x03,0x04,0x06,0x56,0x34,0x12,0xaf]
167 v_dual_fmamk_f32 v6, v1, 0xaf123456, v1 :: v_dual_fmamk_f32 v5, v2, 0xaf123456, v0
168 // GFX11: encoding: [0x01,0x03,0x84,0xc8,0x02,0x01,0x04,0x06,0x56,0x34,0x12,0xaf]
170 v_dual_fmac_f32 v6, v1, v2 :: v_dual_fmamk_f32 v7, v2, 0xaf123456, v7
171 // GFX11: encoding: [0x01,0x05,0x04,0xc8,0x02,0x0f,0x06,0x06,0x56,0x34,0x12,0xaf]
173 v_dual_fmac_f32 v7, v1, v2 :: v_dual_fmamk_f32 v6, v2, 0xaf123456, v6
174 // GFX11: encoding: [0x01,0x05,0x04,0xc8,0x02,0x0d,0x06,0x07,0x56,0x34,0x12,0xaf]
176 v_dual_fmamk_f32 v5, v1, 0xaf123456, v5 :: v_dual_fmac_f32 v6, v2, v3
177 // GFX11: encoding: [0x01,0x0b,0x80,0xc8,0x02,0x07,0x06,0x05,0x56,0x34,0x12,0xaf]
179 v_dual_fmamk_f32 v6, v1, 0xaf123456, v6 :: v_dual_fmac_f32 v5, v2, v3
180 // GFX11: encoding: [0x01,0x0d,0x80,0xc8,0x02,0x07,0x04,0x06,0x56,0x34,0x12,0xaf]