1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
4 ; Reduce a 64-bit sub by a constant if we know the low 32-bits are all
7 ; sub i64:x, K if computeTrailingZeros(K) >= 32
8 ; => build_pair (sub x.hi, K.hi), x.lo
10 define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_0(i64 inreg %reg) {
11 ; GFX9-LABEL: s_sub_i64_const_low_bits_known0_0:
13 ; GFX9-NEXT: s_add_i32 s1, s1, 0xfffc0000
14 ; GFX9-NEXT: ; return to shader part epilog
15 %sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
19 define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_1(i64 inreg %reg) {
20 ; GFX9-LABEL: s_sub_i64_const_low_bits_known0_1:
22 ; GFX9-NEXT: s_add_i32 s1, s1, -1
23 ; GFX9-NEXT: ; return to shader part epilog
24 %sub = sub i64 %reg, 4294967296 ; (1 << 32)
28 define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_2(i64 inreg %reg) {
29 ; GFX9-LABEL: s_sub_i64_const_low_bits_known0_2:
31 ; GFX9-NEXT: s_add_i32 s1, s1, -2
32 ; GFX9-NEXT: ; return to shader part epilog
33 %sub = sub i64 %reg, 8589934592 ; (1 << 33)
37 define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_3(i64 inreg %reg) {
38 ; GFX9-LABEL: s_sub_i64_const_low_bits_known0_3:
40 ; GFX9-NEXT: s_add_i32 s1, s1, 0x80000000
41 ; GFX9-NEXT: ; return to shader part epilog
42 %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
46 define amdgpu_ps i64 @s_sub_i64_const_low_bits_known0_4(i64 inreg %reg) {
47 ; GFX9-LABEL: s_sub_i64_const_low_bits_known0_4:
49 ; GFX9-NEXT: s_add_i32 s1, s1, 1
50 ; GFX9-NEXT: ; return to shader part epilog
51 %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
55 define i64 @v_sub_i64_const_low_bits_known0_0(i64 %reg) {
56 ; GFX9-LABEL: v_sub_i64_const_low_bits_known0_0:
58 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX9-NEXT: v_add_u32_e32 v1, 0xfffc0000, v1
60 ; GFX9-NEXT: s_setpc_b64 s[30:31]
61 %sub = sub i64 %reg, 1125899906842624 ; (1 << 50)
65 define i64 @v_sub_i64_const_low_bits_known0_1(i64 %reg) {
66 ; GFX9-LABEL: v_sub_i64_const_low_bits_known0_1:
68 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
70 ; GFX9-NEXT: s_setpc_b64 s[30:31]
71 %sub = sub i64 %reg, 4294967296 ; (1 << 32)
75 define i64 @v_sub_i64_const_low_bits_known0_2(i64 %reg) {
76 ; GFX9-LABEL: v_sub_i64_const_low_bits_known0_2:
78 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX9-NEXT: v_add_u32_e32 v1, -2, v1
80 ; GFX9-NEXT: s_setpc_b64 s[30:31]
81 %sub = sub i64 %reg, 8589934592 ; (1 << 33)
85 define i64 @v_sub_i64_const_low_bits_known0_3(i64 %reg) {
86 ; GFX9-LABEL: v_sub_i64_const_low_bits_known0_3:
88 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GFX9-NEXT: v_add_u32_e32 v1, 0x80000000, v1
90 ; GFX9-NEXT: s_setpc_b64 s[30:31]
91 %sub = sub i64 %reg, -9223372036854775808 ; (1 << 63)
95 define i64 @v_sub_i64_const_low_bits_known0_4(i64 %reg) {
96 ; GFX9-LABEL: v_sub_i64_const_low_bits_known0_4:
98 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99 ; GFX9-NEXT: v_add_u32_e32 v1, 1, v1
100 ; GFX9-NEXT: s_setpc_b64 s[30:31]
101 %sub = sub i64 %reg, -4294967296 ; 0xffffffff00000000
105 define amdgpu_ps i64 @s_sub_i64_const_high_bits_known0_0(i64 inreg %reg) {
106 ; GFX9-LABEL: s_sub_i64_const_high_bits_known0_0:
108 ; GFX9-NEXT: s_add_u32 s0, s0, 1
109 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
110 ; GFX9-NEXT: ; return to shader part epilog
111 %sub = sub i64 %reg, 4294967295 ; (1 << 31)
115 define i64 @v_sub_i64_const_high_bits_known0_0(i64 %reg) {
116 ; GFX9-LABEL: v_sub_i64_const_high_bits_known0_0:
118 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
120 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
121 ; GFX9-NEXT: s_setpc_b64 s[30:31]
122 %sub = sub i64 %reg, 4294967295 ; (1 << 31)
126 define <2 x i64> @v_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) {
127 ; GFX9-LABEL: v_sub_v2i64_splat_const_low_bits_known0_0:
129 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
131 ; GFX9-NEXT: v_add_u32_e32 v3, -1, v3
132 ; GFX9-NEXT: s_setpc_b64 s[30:31]
133 %sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
137 define <2 x i64> @v_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) {
138 ; GFX9-LABEL: v_sub_v2i64_nonsplat_const_low_bits_known0_0:
140 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX9-NEXT: v_add_u32_e32 v1, -1, v1
142 ; GFX9-NEXT: v_add_u32_e32 v3, -2, v3
143 ; GFX9-NEXT: s_setpc_b64 s[30:31]
144 %sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
148 define amdgpu_ps <2 x i64> @s_sub_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
149 ; GFX9-LABEL: s_sub_v2i64_splat_const_low_bits_known0_0:
151 ; GFX9-NEXT: s_add_i32 s1, s1, -1
152 ; GFX9-NEXT: s_add_i32 s3, s3, -1
153 ; GFX9-NEXT: ; return to shader part epilog
154 %sub = sub <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32)
158 define amdgpu_ps <2 x i64> @s_sub_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) {
159 ; GFX9-LABEL: s_sub_v2i64_nonsplat_const_low_bits_known0_0:
161 ; GFX9-NEXT: s_add_i32 s1, s1, -1
162 ; GFX9-NEXT: s_add_i32 s3, s3, -2
163 ; GFX9-NEXT: ; return to shader part epilog
164 %sub = sub <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33)
168 ; We could reduce this to use a 32-bit sub if we use computeKnownBits
169 define i64 @v_sub_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) {
170 ; GFX9-LABEL: v_sub_i64_variable_high_bits_known0_0:
172 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173 ; GFX9-NEXT: v_subrev_co_u32_e32 v0, vcc, 0, v0
174 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v2, vcc
175 ; GFX9-NEXT: s_setpc_b64 s[30:31]
176 %zext.offset.hi32 = zext i32 %offset.hi32 to i64
177 %in.high.bits = shl i64 %zext.offset.hi32, 32
178 %sub = sub i64 %reg, %in.high.bits
182 ; We could reduce this to use a 32-bit sub if we use computeKnownBits
183 define amdgpu_ps i64 @s_sub_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) {
184 ; GFX9-LABEL: s_sub_i64_variable_high_bits_known0_0:
186 ; GFX9-NEXT: s_sub_u32 s0, s0, 0
187 ; GFX9-NEXT: s_subb_u32 s1, s1, s2
188 ; GFX9-NEXT: ; return to shader part epilog
189 %zext.offset.hi32 = zext i32 %offset.hi32 to i64
190 %in.high.bits = shl i64 %zext.offset.hi32, 32
191 %sub = sub i64 %reg, %in.high.bits