1 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1032 %s
2 # RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64,-wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1064 %s
4 # GFX1032: v_cmp_lt_f32_e32 vcc_lo, s2, v4
5 # GFX1064: v_cmp_lt_f32_e32 vcc, s2, v4
8 # GFX1032: v_cmp_ge_i32_e64 s2, s0, v2
9 # GFX1064: v_cmp_ge_i32_e64 s[2:3], s0, v2
10 0x02,0x00,0x86,0xd4,0x00,0x04,0x02,0x00
12 # GFX1032: v_cmp_ge_i32_sdwa vcc_lo, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
13 # GFX1064: v_cmp_ge_i32_sdwa vcc, v0, v2 src0_sel:WORD_1 src1_sel:DWORD
14 0xf9,0x04,0x0c,0x7d,0x00,0x00,0x05,0x06
16 # GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
17 # GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
18 0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06
20 # GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0
21 # GFX1064: v_cmp_class_f32_e32 vcc, s0, v0
24 # GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
25 # GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
26 0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06
28 # GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
29 # GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
30 0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06
32 # GFX1032: v_cndmask_b32_e32 v5, 0, v2, vcc_lo
33 # GFX1064: v_cndmask_b32_e32 v5, 0, v2, vcc ;
36 # GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo
37 # GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ;
40 # GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
41 # GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
42 0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06
44 # GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
45 # GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
46 0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00
48 # GFX1032: v_add_co_u32_e64 v2, vcc_lo, s0, v2
49 # GFX1064: v_add_co_u32_e64 v2, vcc, s0, v2
50 0x02,0x6a,0x0f,0xd7,0x00,0x04,0x02,0x00
52 # GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
53 # GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
56 # GFX1032: v_sub_co_u32_e64 v2, vcc_lo, s0, v2
57 # GFX1064: v_sub_co_u32_e64 v2, vcc, s0, v2
58 0x02,0x6a,0x10,0xd7,0x00,0x04,0x02,0x00
60 # GFX1032: v_subrev_co_u32_e64 v2, vcc_lo, s0, v2
61 # GFX1064: v_subrev_co_u32_e64 v2, vcc, s0, v2
62 0x02,0x6a,0x19,0xd7,0x00,0x04,0x02,0x00
64 # GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
65 # GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ;
68 # GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
69 # GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ;
72 # GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
73 # GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
74 0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06
76 # GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
77 # GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
78 0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06
80 # GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
81 # GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
82 0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06
84 # GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
85 # GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
86 0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e
88 # GFX1032: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
89 # GFX1064: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
90 0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0x00
92 # FIXME: Results in invalid v_subrev_u16_dpp which apparently has the same encoding but does not exist in GFX10
94 # gfx1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
95 # gfx1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
96 # 0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00
98 # FIXME: Results in v_mul_lo_u16_dpp
100 # gfx1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
101 # gfx1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
102 # 0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00
104 # FIXME: gives v_lshlrev_b16_dpp
106 # gfx1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
107 # gfx1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
108 # 0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00
110 # GFX1032: v_add_co_u32_e64 v0, s0, v0, v2
111 # GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2
112 0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00
114 # GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
115 # GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
116 0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00
118 # GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2
119 # GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2
120 0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00
122 # GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
123 # GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
124 0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00
126 # GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2
127 # GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2
128 0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00
130 # GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
131 # GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
132 0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00
134 # GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
135 # GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
136 0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00
138 # GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
139 # GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ;
140 0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01
142 # GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2
143 # GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2
144 0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04
146 # GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
147 # GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
148 0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04
150 # GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
151 # GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
152 0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04
154 # GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
155 # GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
156 0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04
158 # GFX1032: v_cmpx_neq_f32_e32 v0, v1
159 # GFX1064: v_cmpx_neq_f32_e32 v0, v1
162 # GFX1032: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
163 # GFX1064: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
164 0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06
166 # GFX1032: v_cmpx_class_f32_e64 v0, 1
167 # GFX1064: v_cmpx_class_f32_e64 v0, 1
168 0x00,0x00,0x98,0xd4,0x00,0x03,0x01,0x00
170 # GFX1032: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
171 # GFX1064: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
172 0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86