1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
6 ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw:
8 ; GFX6-NEXT: s_mov_b32 s0, s2
9 ; GFX6-NEXT: s_mov_b32 s1, s3
10 ; GFX6-NEXT: s_mov_b32 s2, s4
11 ; GFX6-NEXT: s_mov_b32 s3, s5
12 ; GFX6-NEXT: s_mov_b32 s4, s6
13 ; GFX6-NEXT: s_mov_b32 s5, s7
14 ; GFX6-NEXT: s_mov_b32 s6, s8
15 ; GFX6-NEXT: s_mov_b32 s7, s9
16 ; GFX6-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da
17 ; GFX6-NEXT: s_waitcnt vmcnt(0)
18 ; GFX6-NEXT: ; return to shader part epilog
20 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw:
22 ; GFX10-NEXT: s_mov_b32 s0, s2
23 ; GFX10-NEXT: s_mov_b32 s1, s3
24 ; GFX10-NEXT: s_mov_b32 s2, s4
25 ; GFX10-NEXT: s_mov_b32 s3, s5
26 ; GFX10-NEXT: s_mov_b32 s4, s6
27 ; GFX10-NEXT: s_mov_b32 s5, s7
28 ; GFX10-NEXT: s_mov_b32 s6, s8
29 ; GFX10-NEXT: s_mov_b32 s7, s9
30 ; GFX10-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm
31 ; GFX10-NEXT: s_waitcnt vmcnt(0)
32 ; GFX10-NEXT: ; return to shader part epilog
33 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
37 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
38 ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
40 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
41 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
42 ; GFX6-NEXT: s_mov_b32 s0, s2
43 ; GFX6-NEXT: s_mov_b32 s1, s3
44 ; GFX6-NEXT: s_mov_b32 s2, s4
45 ; GFX6-NEXT: s_mov_b32 s3, s5
46 ; GFX6-NEXT: s_mov_b32 s4, s6
47 ; GFX6-NEXT: s_mov_b32 s5, s7
48 ; GFX6-NEXT: s_mov_b32 s6, s8
49 ; GFX6-NEXT: s_mov_b32 s7, s9
50 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
51 ; GFX6-NEXT: v_mov_b32_e32 v7, v2
52 ; GFX6-NEXT: v_mov_b32_e32 v8, v3
53 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
54 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
55 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
56 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
57 ; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da
58 ; GFX6-NEXT: s_mov_b32 s8, s10
59 ; GFX6-NEXT: s_mov_b32 s9, s11
60 ; GFX6-NEXT: s_mov_b32 s10, -1
61 ; GFX6-NEXT: s_mov_b32 s11, 0xf000
62 ; GFX6-NEXT: s_waitcnt vmcnt(0)
63 ; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0
64 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
65 ; GFX6-NEXT: ; return to shader part epilog
67 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
69 ; GFX10-NEXT: v_mov_b32_e32 v9, 0
70 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
71 ; GFX10-NEXT: v_mov_b32_e32 v6, v1
72 ; GFX10-NEXT: v_mov_b32_e32 v7, v2
73 ; GFX10-NEXT: v_mov_b32_e32 v8, v3
74 ; GFX10-NEXT: v_mov_b32_e32 v10, v9
75 ; GFX10-NEXT: v_mov_b32_e32 v11, v9
76 ; GFX10-NEXT: v_mov_b32_e32 v12, v9
77 ; GFX10-NEXT: v_mov_b32_e32 v13, v9
78 ; GFX10-NEXT: s_mov_b32 s0, s2
79 ; GFX10-NEXT: s_mov_b32 s1, s3
80 ; GFX10-NEXT: s_mov_b32 s2, s4
81 ; GFX10-NEXT: s_mov_b32 s3, s5
82 ; GFX10-NEXT: s_mov_b32 s4, s6
83 ; GFX10-NEXT: s_mov_b32 s5, s7
84 ; GFX10-NEXT: s_mov_b32 s6, s8
85 ; GFX10-NEXT: s_mov_b32 s7, s9
86 ; GFX10-NEXT: v_mov_b32_e32 v0, v9
87 ; GFX10-NEXT: v_mov_b32_e32 v1, v10
88 ; GFX10-NEXT: v_mov_b32_e32 v2, v11
89 ; GFX10-NEXT: v_mov_b32_e32 v3, v12
90 ; GFX10-NEXT: v_mov_b32_e32 v4, v13
91 ; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe
92 ; GFX10-NEXT: s_waitcnt vmcnt(0)
93 ; GFX10-NEXT: global_store_dword v9, v4, s[10:11]
94 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
95 ; GFX10-NEXT: ; return to shader part epilog
96 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
97 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
98 %v.err = extractvalue { <4 x float>, i32 } %v, 1
99 store i32 %v.err, i32 addrspace(1)* %out, align 4
100 ret <4 x float> %v.vec
103 define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
104 ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
106 ; GFX6-NEXT: v_mov_b32_e32 v5, v0
107 ; GFX6-NEXT: v_mov_b32_e32 v0, 0
108 ; GFX6-NEXT: s_mov_b32 s0, s2
109 ; GFX6-NEXT: s_mov_b32 s1, s3
110 ; GFX6-NEXT: s_mov_b32 s2, s4
111 ; GFX6-NEXT: s_mov_b32 s3, s5
112 ; GFX6-NEXT: s_mov_b32 s4, s6
113 ; GFX6-NEXT: s_mov_b32 s5, s7
114 ; GFX6-NEXT: s_mov_b32 s6, s8
115 ; GFX6-NEXT: s_mov_b32 s7, s9
116 ; GFX6-NEXT: v_mov_b32_e32 v6, v1
117 ; GFX6-NEXT: v_mov_b32_e32 v7, v2
118 ; GFX6-NEXT: v_mov_b32_e32 v8, v3
119 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
120 ; GFX6-NEXT: v_mov_b32_e32 v2, v0
121 ; GFX6-NEXT: v_mov_b32_e32 v3, v0
122 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
123 ; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe lwe da
124 ; GFX6-NEXT: s_mov_b32 s8, s10
125 ; GFX6-NEXT: s_mov_b32 s9, s11
126 ; GFX6-NEXT: s_mov_b32 s10, -1
127 ; GFX6-NEXT: s_mov_b32 s11, 0xf000
128 ; GFX6-NEXT: s_waitcnt vmcnt(0)
129 ; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0
130 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0)
131 ; GFX6-NEXT: ; return to shader part epilog
133 ; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
135 ; GFX10-NEXT: v_mov_b32_e32 v9, 0
136 ; GFX10-NEXT: v_mov_b32_e32 v5, v0
137 ; GFX10-NEXT: v_mov_b32_e32 v6, v1
138 ; GFX10-NEXT: v_mov_b32_e32 v7, v2
139 ; GFX10-NEXT: v_mov_b32_e32 v8, v3
140 ; GFX10-NEXT: v_mov_b32_e32 v10, v9
141 ; GFX10-NEXT: v_mov_b32_e32 v11, v9
142 ; GFX10-NEXT: v_mov_b32_e32 v12, v9
143 ; GFX10-NEXT: v_mov_b32_e32 v13, v9
144 ; GFX10-NEXT: s_mov_b32 s0, s2
145 ; GFX10-NEXT: s_mov_b32 s1, s3
146 ; GFX10-NEXT: s_mov_b32 s2, s4
147 ; GFX10-NEXT: s_mov_b32 s3, s5
148 ; GFX10-NEXT: s_mov_b32 s4, s6
149 ; GFX10-NEXT: s_mov_b32 s5, s7
150 ; GFX10-NEXT: s_mov_b32 s6, s8
151 ; GFX10-NEXT: s_mov_b32 s7, s9
152 ; GFX10-NEXT: v_mov_b32_e32 v0, v9
153 ; GFX10-NEXT: v_mov_b32_e32 v1, v10
154 ; GFX10-NEXT: v_mov_b32_e32 v2, v11
155 ; GFX10-NEXT: v_mov_b32_e32 v3, v12
156 ; GFX10-NEXT: v_mov_b32_e32 v4, v13
157 ; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe
158 ; GFX10-NEXT: s_waitcnt vmcnt(0)
159 ; GFX10-NEXT: global_store_dword v9, v4, s[10:11]
160 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
161 ; GFX10-NEXT: ; return to shader part epilog
162 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
163 %v.vec = extractvalue { <4 x float>, i32 } %v, 0
164 %v.err = extractvalue { <4 x float>, i32 } %v, 1
165 store i32 %v.err, i32 addrspace(1)* %out, align 4
166 ret <4 x float> %v.vec
169 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
170 declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
172 attributes #0 = { nounwind readonly }