1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5 define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6 ; GFX6-LABEL: load_2d_v4f32_xyzw:
8 ; GFX6-NEXT: s_mov_b32 s0, s2
9 ; GFX6-NEXT: s_mov_b32 s1, s3
10 ; GFX6-NEXT: s_mov_b32 s2, s4
11 ; GFX6-NEXT: s_mov_b32 s3, s5
12 ; GFX6-NEXT: s_mov_b32 s4, s6
13 ; GFX6-NEXT: s_mov_b32 s5, s7
14 ; GFX6-NEXT: s_mov_b32 s6, s8
15 ; GFX6-NEXT: s_mov_b32 s7, s9
16 ; GFX6-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
17 ; GFX6-NEXT: s_waitcnt vmcnt(0)
18 ; GFX6-NEXT: ; return to shader part epilog
20 ; GFX10-LABEL: load_2d_v4f32_xyzw:
22 ; GFX10-NEXT: s_mov_b32 s0, s2
23 ; GFX10-NEXT: s_mov_b32 s1, s3
24 ; GFX10-NEXT: s_mov_b32 s2, s4
25 ; GFX10-NEXT: s_mov_b32 s3, s5
26 ; GFX10-NEXT: s_mov_b32 s4, s6
27 ; GFX10-NEXT: s_mov_b32 s5, s7
28 ; GFX10-NEXT: s_mov_b32 s6, s8
29 ; GFX10-NEXT: s_mov_b32 s7, s9
30 ; GFX10-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
31 ; GFX10-NEXT: s_waitcnt vmcnt(0)
32 ; GFX10-NEXT: ; return to shader part epilog
33 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
37 define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
38 ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe:
40 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
41 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
42 ; GFX6-NEXT: s_mov_b32 s0, s2
43 ; GFX6-NEXT: s_mov_b32 s1, s3
44 ; GFX6-NEXT: s_mov_b32 s2, s4
45 ; GFX6-NEXT: s_mov_b32 s3, s5
46 ; GFX6-NEXT: s_mov_b32 s4, s6
47 ; GFX6-NEXT: s_mov_b32 s5, s7
48 ; GFX6-NEXT: s_mov_b32 s6, s8
49 ; GFX6-NEXT: s_mov_b32 s7, s9
50 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
51 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
52 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
53 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
54 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
55 ; GFX6-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe
56 ; GFX6-NEXT: s_mov_b32 s8, s10
57 ; GFX6-NEXT: s_mov_b32 s9, s11
58 ; GFX6-NEXT: s_mov_b32 s10, -1
59 ; GFX6-NEXT: s_mov_b32 s11, 0xf000
60 ; GFX6-NEXT: s_waitcnt vmcnt(0)
61 ; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0
62 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
63 ; GFX6-NEXT: ; return to shader part epilog
65 ; GFX10-LABEL: load_2d_v4f32_xyzw_tfe:
67 ; GFX10-NEXT: v_mov_b32_e32 v7, 0
68 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
69 ; GFX10-NEXT: v_mov_b32_e32 v6, v1
70 ; GFX10-NEXT: s_mov_b32 s0, s2
71 ; GFX10-NEXT: s_mov_b32 s1, s3
72 ; GFX10-NEXT: v_mov_b32_e32 v8, v7
73 ; GFX10-NEXT: v_mov_b32_e32 v9, v7
74 ; GFX10-NEXT: v_mov_b32_e32 v10, v7
75 ; GFX10-NEXT: v_mov_b32_e32 v11, v7
76 ; GFX10-NEXT: s_mov_b32 s2, s4
77 ; GFX10-NEXT: s_mov_b32 s3, s5
78 ; GFX10-NEXT: s_mov_b32 s4, s6
79 ; GFX10-NEXT: s_mov_b32 s5, s7
80 ; GFX10-NEXT: s_mov_b32 s6, s8
81 ; GFX10-NEXT: s_mov_b32 s7, s9
82 ; GFX10-NEXT: v_mov_b32_e32 v0, v7
83 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
84 ; GFX10-NEXT: v_mov_b32_e32 v2, v9
85 ; GFX10-NEXT: v_mov_b32_e32 v3, v10
86 ; GFX10-NEXT: v_mov_b32_e32 v4, v11
87 ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe
88 ; GFX10-NEXT: s_waitcnt vmcnt(0)
89 ; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
90 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
91 ; GFX10-NEXT: ; return to shader part epilog
92 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
93 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
94 %v.err = extractvalue { <4 x float>, i32 } %v, 1
95 store i32 %v.err, i32 addrspace(1)* %out, align 4
96 ret <4 x float> %v.vec
99 define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
100 ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
102 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
103 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
104 ; GFX6-NEXT: s_mov_b32 s0, s2
105 ; GFX6-NEXT: s_mov_b32 s1, s3
106 ; GFX6-NEXT: s_mov_b32 s2, s4
107 ; GFX6-NEXT: s_mov_b32 s3, s5
108 ; GFX6-NEXT: s_mov_b32 s4, s6
109 ; GFX6-NEXT: s_mov_b32 s5, s7
110 ; GFX6-NEXT: s_mov_b32 s6, s8
111 ; GFX6-NEXT: s_mov_b32 s7, s9
112 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
113 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
114 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
115 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
116 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
117 ; GFX6-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe lwe
118 ; GFX6-NEXT: s_mov_b32 s8, s10
119 ; GFX6-NEXT: s_mov_b32 s9, s11
120 ; GFX6-NEXT: s_mov_b32 s10, -1
121 ; GFX6-NEXT: s_mov_b32 s11, 0xf000
122 ; GFX6-NEXT: s_waitcnt vmcnt(0)
123 ; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0
124 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
125 ; GFX6-NEXT: ; return to shader part epilog
127 ; GFX10-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
129 ; GFX10-NEXT: v_mov_b32_e32 v7, 0
130 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
131 ; GFX10-NEXT: v_mov_b32_e32 v6, v1
132 ; GFX10-NEXT: s_mov_b32 s0, s2
133 ; GFX10-NEXT: s_mov_b32 s1, s3
134 ; GFX10-NEXT: v_mov_b32_e32 v8, v7
135 ; GFX10-NEXT: v_mov_b32_e32 v9, v7
136 ; GFX10-NEXT: v_mov_b32_e32 v10, v7
137 ; GFX10-NEXT: v_mov_b32_e32 v11, v7
138 ; GFX10-NEXT: s_mov_b32 s2, s4
139 ; GFX10-NEXT: s_mov_b32 s3, s5
140 ; GFX10-NEXT: s_mov_b32 s4, s6
141 ; GFX10-NEXT: s_mov_b32 s5, s7
142 ; GFX10-NEXT: s_mov_b32 s6, s8
143 ; GFX10-NEXT: s_mov_b32 s7, s9
144 ; GFX10-NEXT: v_mov_b32_e32 v0, v7
145 ; GFX10-NEXT: v_mov_b32_e32 v1, v8
146 ; GFX10-NEXT: v_mov_b32_e32 v2, v9
147 ; GFX10-NEXT: v_mov_b32_e32 v3, v10
148 ; GFX10-NEXT: v_mov_b32_e32 v4, v11
149 ; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe
150 ; GFX10-NEXT: s_waitcnt vmcnt(0)
151 ; GFX10-NEXT: global_store_dword v7, v4, s[10:11]
152 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
153 ; GFX10-NEXT: ; return to shader part epilog
154 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0)
155 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
156 %v.err = extractvalue { <4 x float>, i32 } %v, 1
157 store i32 %v.err, i32 addrspace(1)* %out, align 4
158 ret <4 x float> %v.vec
161 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
162 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
164 attributes #0 = { nounwind readonly }