1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - -amdgpu-codegenprepare-mul24=0 < %s | FileCheck -check-prefix=GFX9 %s
4 define i16 @num_sign_bits_mul_i48_0(i8 %X, i8 %Y, i8 %Z, i8 %W) {
5 ; GFX9-LABEL: num_sign_bits_mul_i48_0:
7 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8 ; GFX9-NEXT: v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
9 ; GFX9-NEXT: v_mul_i32_i24_sdwa v1, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
10 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1
11 ; GFX9-NEXT: s_setpc_b64 s[30:31]
12 %A = sext i8 %X to i48
13 %B = sext i8 %Y to i48
14 %C = sext i8 %Z to i48
15 %D = sext i8 %W to i48
16 %mul0 = mul i48 %A, %B
17 %mul1 = mul i48 %C, %D
18 %mul2 = mul i48 %mul0, %mul1
19 %trunc = trunc i48 %mul2 to i16
23 define i16 @num_sign_bits_mul_i48_1(i8 %X, i8 %Y, i8 %Z, i8 %W) {
24 ; GFX9-LABEL: num_sign_bits_mul_i48_1:
26 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27 ; GFX9-NEXT: v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
28 ; GFX9-NEXT: v_mul_i32_i24_sdwa v2, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
29 ; GFX9-NEXT: v_mul_hi_i32_i24_e32 v1, v0, v2
30 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v2
31 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 24, v[0:1]
32 ; GFX9-NEXT: s_setpc_b64 s[30:31]
33 %A = sext i8 %X to i48
34 %B = sext i8 %Y to i48
35 %C = sext i8 %Z to i48
36 %D = sext i8 %W to i48
37 %mul0 = mul i48 %A, %B
38 %mul1 = mul i48 %C, %D
39 %mul2 = mul i48 %mul0, %mul1
40 %ashr = ashr i48 %mul2, 24
41 %trunc = trunc i48 %ashr to i16
45 define i32 @num_sign_bits_mul_i32_7(i32 %x, i32 %y, i32 %z, i32 %w) {
46 ; GFX9-LABEL: num_sign_bits_mul_i32_7:
48 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 25
50 ; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 25
51 ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 25
52 ; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 25
53 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
54 ; GFX9-NEXT: v_mul_lo_u32 v1, v2, v3
55 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
56 ; GFX9-NEXT: s_setpc_b64 s[30:31]
57 %x.shl = shl i32 %x, 7
58 %x.bits = ashr i32 %x.shl, 7
60 %y.shl = shl i32 %y, 7
61 %y.bits = ashr i32 %y.shl, 7
63 %z.shl = shl i32 %z, 7
64 %z.bits = ashr i32 %z.shl, 7
66 %w.shl = shl i32 %w, 7
67 %w.bits = ashr i32 %w.shl, 7
69 %mul0 = mul i32 %x.bits, %y.bits
70 %mul1 = mul i32 %z.bits, %w.bits
71 %mul2 = mul i32 %mul0, %mul1
75 define i32 @num_sign_bits_mul_i32_8(i32 %x, i32 %y, i32 %z, i32 %w) {
76 ; GFX9-LABEL: num_sign_bits_mul_i32_8:
78 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1
80 ; GFX9-NEXT: v_mul_i32_i24_e32 v1, v2, v3
81 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
82 ; GFX9-NEXT: s_setpc_b64 s[30:31]
83 %x.shl = shl i32 %x, 8
84 %x.bits = ashr i32 %x.shl, 8
86 %y.shl = shl i32 %y, 8
87 %y.bits = ashr i32 %y.shl, 8
89 %z.shl = shl i32 %z, 8
90 %z.bits = ashr i32 %z.shl, 8
92 %w.shl = shl i32 %w, 8
93 %w.bits = ashr i32 %w.shl, 8
95 %mul0 = mul i32 %x.bits, %y.bits
96 %mul1 = mul i32 %z.bits, %w.bits
97 %mul2 = mul i32 %mul0, %mul1
101 define i32 @num_sign_bits_mul_i32_9(i32 %x, i32 %y, i32 %z, i32 %w) {
102 ; GFX9-LABEL: num_sign_bits_mul_i32_9:
104 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 23
106 ; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 23
107 ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 23
108 ; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 23
109 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1
110 ; GFX9-NEXT: v_mul_i32_i24_e32 v1, v2, v3
111 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
112 ; GFX9-NEXT: s_setpc_b64 s[30:31]
113 %x.shl = shl i32 %x, 9
114 %x.bits = ashr i32 %x.shl, 9
116 %y.shl = shl i32 %y, 9
117 %y.bits = ashr i32 %y.shl, 9
119 %z.shl = shl i32 %z, 9
120 %z.bits = ashr i32 %z.shl, 9
122 %w.shl = shl i32 %w, 9
123 %w.bits = ashr i32 %w.shl, 9
125 %mul0 = mul i32 %x.bits, %y.bits
126 %mul1 = mul i32 %z.bits, %w.bits
127 %mul2 = mul i32 %mul0, %mul1
131 define i32 @num_sign_bits_mul_i32_10(i32 %x, i32 %y, i32 %z, i32 %w) {
132 ; GFX9-LABEL: num_sign_bits_mul_i32_10:
134 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 22
136 ; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 22
137 ; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 22
138 ; GFX9-NEXT: v_bfe_i32 v3, v3, 0, 22
139 ; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v1
140 ; GFX9-NEXT: v_mul_i32_i24_e32 v1, v2, v3
141 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
142 ; GFX9-NEXT: s_setpc_b64 s[30:31]
143 %x.shl = shl i32 %x, 10
144 %x.bits = ashr i32 %x.shl, 10
146 %y.shl = shl i32 %y, 10
147 %y.bits = ashr i32 %y.shl, 10
149 %z.shl = shl i32 %z, 10
150 %z.bits = ashr i32 %z.shl, 10
152 %w.shl = shl i32 %w, 10
153 %w.bits = ashr i32 %w.shl, 10
155 %mul0 = mul i32 %x.bits, %y.bits
156 %mul1 = mul i32 %z.bits, %w.bits
157 %mul2 = mul i32 %mul0, %mul1
161 define i32 @known_bits_mul24() {
162 ; GFX9-LABEL: known_bits_mul24:
164 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
166 ; GFX9-NEXT: s_setpc_b64 s[30:31]
167 %r0 = call i32 @llvm.amdgcn.mul.i24(i32 0, i32 -7)
172 declare i32 @llvm.amdgcn.mul.i24(i32, i32)