Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / MC / AMDGPU / vop3p.s
bloba84ada261abe89f057864033e71f8fff54b74256
1 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck -check-prefix=GFX9 %s
2 //
3 // Test op_sel/op_sel_hi
4 //
6 v_pk_add_u16 v1, v2, v3
7 // GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18]
9 v_pk_add_u16 v1, v2, v3 op_sel:[0,0]
10 // GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18]
12 v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1]
13 // GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18]
15 v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
16 // GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18]
18 v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0]
19 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x00]
21 v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
22 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x00]
24 v_pk_add_u16 v1, v2, v3 op_sel:[1,0]
25 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x18]
27 v_pk_add_u16 v1, v2, v3 op_sel:[0,1]
28 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x50,0x8a,0xd3,0x02,0x07,0x02,0x18]
30 v_pk_add_u16 v1, v2, v3 op_sel:[1,1]
31 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x8a,0xd3,0x02,0x07,0x02,0x18]
33 v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1]
34 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x10]
36 v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0]
37 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x08]
39 v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
40 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x8a,0xd3,0x02,0x07,0x02,0x18]
42 v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
43 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x08]
45 v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
46 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x50,0x8a,0xd3,0x02,0x07,0x02,0x10]
48 v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
49 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x10]
51 v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
52 // GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x50,0x8a,0xd3,0x02,0x07,0x02,0x08]
55 // Test src2 op_sel/op_sel_hi
58 v_pk_fma_f16 v8, v0, s0, v1
59 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
61 v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,0] neg_hi:[0,0,0]
62 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
64 v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1] neg_lo:[0,0,0] neg_hi:[0,0,0]
65 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
67 v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[1,1,1]
68 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x1c]
70 v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,0] op_sel_hi:[0,0,0]
71 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel_hi:[0,0,0] ; encoding: [0x08,0x00,0x8e,0xd3,0x00,0x01,0x04,0x04]
73 v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1]
74 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 op_sel:[0,0,1] op_sel_hi:[0,0,1] ; encoding: [0x08,0x60,0x8e,0xd3,0x00,0x01,0x04,0x04]
77 // Test neg_lo/neg_hi
80 v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1]
81 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0xfc]
83 v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1]
84 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0x1c]
86 v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1]
87 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,1,1] neg_hi:[1,1,1] ; encoding: [0x08,0x47,0x8e,0xd3,0x00,0x01,0x04,0xfc]
89 v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0]
90 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[1,0,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x3c]
92 v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0]
93 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,1,0] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x5c]
95 v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1]
96 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_lo:[0,0,1] ; encoding: [0x08,0x40,0x8e,0xd3,0x00,0x01,0x04,0x9c]
98 v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0]
99 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[1,0,0] ; encoding: [0x08,0x41,0x8e,0xd3,0x00,0x01,0x04,0x1c]
101 v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0]
102 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,1,0] ; encoding: [0x08,0x42,0x8e,0xd3,0x00,0x01,0x04,0x1c]
104 v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1]
105 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 neg_hi:[0,0,1] ; encoding: [0x08,0x44,0x8e,0xd3,0x00,0x01,0x04,0x1c]
108 // Test clamp
109 v_pk_fma_f16 v8, v0, s0, v1 clamp
110 // GFX9: v_pk_fma_f16 v8, v0, s0, v1 clamp ; encoding: [0x08,0xc0,0x8e,0xd3,0x00,0x01,0x04,0x1c]
112 v_pk_add_u16 v1, v2, v3 clamp
113 // GFX9: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0xc0,0x8a,0xd3,0x02,0x07,0x02,0x18]
115 v_pk_min_i16 v0, v1, v2 clamp
116 // GFX9: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x88,0xd3,0x01,0x05,0x02,0x18]
119 // Instruction tests:
122 v_pk_mul_lo_u16 v0, v1, v2
123 // GFX9: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x81,0xd3,0x01,0x05,0x02,0x18]
125 v_pk_add_i16 v0, v1, v2
126 // GFX9: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x82,0xd3,0x01,0x05,0x02,0x18]
128 v_pk_sub_i16 v0, v1, v2
129 // GFX9: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x83,0xd3,0x01,0x05,0x02,0x18]
131 v_pk_lshlrev_b16 v0, v1, v2
132 // GFX9: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x40,0x84,0xd3,0x01,0x05,0x02,0x18]
134 v_pk_lshrrev_b16 v0, v1, v2
135 // GFX9: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x40,0x85,0xd3,0x01,0x05,0x02,0x18]
137 v_pk_ashrrev_i16 v0, v1, v2
138 // GFX9: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x86,0xd3,0x01,0x05,0x02,0x18]
140 v_pk_max_i16 v0, v1, v2
141 // GFX9: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x87,0xd3,0x01,0x05,0x02,0x18]
143 v_pk_min_i16 v0, v1, v2
144 // GFX9: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x88,0xd3,0x01,0x05,0x02,0x18]
146 v_pk_add_u16 v0, v1, v2
147 // GFX9: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18]
149 v_pk_max_u16 v0, v1, v2
150 // GFX9: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18]
152 v_pk_min_u16 v0, v1, v2
153 // GFX9: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18]
155 v_pk_fma_f16 v0, v1, v2, v3
156 // GFX9: v_pk_fma_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c]
158 v_pk_add_f16 v0, v1, v2
159 // GFX9: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18]
161 v_pk_mul_f16 v0, v1, v2
162 // GFX9: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x90,0xd3,0x01,0x05,0x02,0x18]
164 v_pk_min_f16 v0, v1, v2
165 // GFX9: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x91,0xd3,0x01,0x05,0x02,0x18]
167 v_pk_max_f16 v0, v1, v2
168 // GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x92,0xd3,0x01,0x05,0x02,0x18]