1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - < %s | FileCheck --check-prefixes=GCN,GFX89 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - < %s | FileCheck --check-prefixes=GCN,GFX89 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - < %s | FileCheck --check-prefixes=GCN,GFX10 %s
6 ; Test vector bitfield extract.
7 define i32 @v_srl_mask_i32(i32 %value) {
8 ; GFX89-LABEL: v_srl_mask_i32:
10 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5
12 ; GFX89-NEXT: s_setpc_b64 s[30:31]
14 ; GFX10-LABEL: v_srl_mask_i32:
16 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
18 ; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5
19 ; GFX10-NEXT: s_setpc_b64 s[30:31]
20 %1 = lshr i32 %value, 8
25 ; Test scalar bitfield extract.
26 define amdgpu_ps i32 @s_srl_mask_i32(i32 inreg %value) {
27 ; GCN-LABEL: s_srl_mask_i32:
29 ; GCN-NEXT: s_bfe_u32 s0, s0, 0x50008
30 ; GCN-NEXT: ; return to shader part epilog
31 %1 = lshr i32 %value, 8
36 ; Don't generate G_UBFX if the offset + width is too big.
37 define amdgpu_ps i32 @s_srl_big_mask_i32(i32 inreg %value) {
38 ; GCN-LABEL: s_srl_big_mask_i32:
40 ; GCN-NEXT: s_lshr_b32 s0, s0, 30
41 ; GCN-NEXT: ; return to shader part epilog
42 %1 = lshr i32 %value, 30
47 ; Test vector bitfield extract.
48 define i32 @v_mask_srl_i32(i32 %value) {
49 ; GFX89-LABEL: v_mask_srl_i32:
51 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX89-NEXT: v_bfe_u32 v0, v0, 8, 5
53 ; GFX89-NEXT: s_setpc_b64 s[30:31]
55 ; GFX10-LABEL: v_mask_srl_i32:
57 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
59 ; GFX10-NEXT: v_bfe_u32 v0, v0, 8, 5
60 ; GFX10-NEXT: s_setpc_b64 s[30:31]
61 %1 = and i32 %value, 7936 ; 31 << 8
66 ; Test scalar bitfield extract.
67 define amdgpu_ps i32 @s_mask_srl_i32(i32 inreg %value) {
68 ; GCN-LABEL: s_mask_srl_i32:
70 ; GCN-NEXT: s_bfe_u32 s0, s0, 0x50008
71 ; GCN-NEXT: ; return to shader part epilog
72 %1 = and i32 %value, 7936 ; 31 << 8
77 ; Test vector bitfield extract for 64-bits.
78 define i64 @v_srl_mask_i64(i64 %value) {
79 ; GFX89-LABEL: v_srl_mask_i64:
81 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82 ; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
83 ; GFX89-NEXT: v_mov_b32_e32 v1, 0
84 ; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10
85 ; GFX89-NEXT: s_setpc_b64 s[30:31]
87 ; GFX10-LABEL: v_srl_mask_i64:
89 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
91 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
92 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
93 ; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10
94 ; GFX10-NEXT: s_setpc_b64 s[30:31]
95 %1 = lshr i64 %value, 25
100 ; Test scalar bitfield extract for 64-bits.
101 define amdgpu_ps i64 @s_srl_mask_i64(i64 inreg %value) {
102 ; GCN-LABEL: s_srl_mask_i64:
104 ; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019
105 ; GCN-NEXT: ; return to shader part epilog
106 %1 = lshr i64 %value, 25
107 %2 = and i64 %1, 1023
111 ; Don't generate G_UBFX if the offset + width is too big.
112 define amdgpu_ps i64 @s_srl_big_mask_i64(i64 inreg %value) {
113 ; GCN-LABEL: s_srl_big_mask_i64:
115 ; GCN-NEXT: s_lshr_b32 s0, s1, 28
116 ; GCN-NEXT: s_mov_b32 s1, 0
117 ; GCN-NEXT: ; return to shader part epilog
118 %1 = lshr i64 %value, 60
123 ; Test vector bitfield extract for 64-bits.
124 ; TODO: No need for a 64-bit shift instruction when the extracted value is
125 ; entirely contained within the upper or lower half.
126 define i64 @v_mask_srl_i64(i64 %value) {
127 ; GFX89-LABEL: v_mask_srl_i64:
129 ; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX89-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
131 ; GFX89-NEXT: v_mov_b32_e32 v1, 0
132 ; GFX89-NEXT: v_bfe_u32 v0, v0, 0, 10
133 ; GFX89-NEXT: s_setpc_b64 s[30:31]
135 ; GFX10-LABEL: v_mask_srl_i64:
137 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
139 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 25, v[0:1]
140 ; GFX10-NEXT: v_mov_b32_e32 v1, 0
141 ; GFX10-NEXT: v_bfe_u32 v0, v0, 0, 10
142 ; GFX10-NEXT: s_setpc_b64 s[30:31]
143 %1 = and i64 %value, 34326183936 ; 1023 << 25
148 ; Test scalar bitfield extract for 64-bits.
149 define amdgpu_ps i64 @s_mask_srl_i64(i64 inreg %value) {
150 ; GCN-LABEL: s_mask_srl_i64:
152 ; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0xa0019
153 ; GCN-NEXT: ; return to shader part epilog
154 %1 = and i64 %value, 34326183936 ; 1023 << 25