1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=NO-MSAA %s
3 ; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=NO-MSAA %s
4 ; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1150 < %s | FileCheck -check-prefixes=MSAA %s
6 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
7 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(
8 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] {
9 ; NO-MSAA-NEXT: main_body:
10 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
11 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
12 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
13 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
14 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
15 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
16 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
17 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
18 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
20 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(
21 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] {
22 ; MSAA-NEXT: main_body:
23 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
24 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
25 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
26 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
27 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
28 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
29 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
30 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
31 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
32 ; MSAA-NEXT: ret [4 x float] [[I7]]
35 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
36 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
37 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
38 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
39 %i4 = insertvalue [4 x float] undef, float %i, 0
40 %i5 = insertvalue [4 x float] %i4, float %i1, 1
41 %i6 = insertvalue [4 x float] %i5, float %i2, 2
42 %i7 = insertvalue [4 x float] %i6, float %i3, 3
46 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
47 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(
48 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
49 ; NO-MSAA-NEXT: main_body:
50 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
51 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
52 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
53 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
54 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
55 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
56 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
57 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
58 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
60 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(
61 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
62 ; MSAA-NEXT: main_body:
63 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
64 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
65 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
66 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
67 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
68 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
69 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
70 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
71 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
72 ; MSAA-NEXT: ret [4 x float] [[I7]]
75 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
76 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
77 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
78 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 2, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
79 %i4 = insertvalue [4 x float] undef, float %i, 0
80 %i5 = insertvalue [4 x float] %i4, float %i1, 1
81 %i6 = insertvalue [4 x float] %i5, float %i2, 2
82 %i7 = insertvalue [4 x float] %i6, float %i3, 3
86 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
87 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(
88 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
89 ; NO-MSAA-NEXT: main_body:
90 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
91 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
92 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
93 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
94 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
95 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
96 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
97 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
98 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
100 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(
101 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
102 ; MSAA-NEXT: main_body:
103 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
104 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
105 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
106 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
107 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
108 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
109 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
110 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
111 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
112 ; MSAA-NEXT: ret [4 x float] [[I7]]
115 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
116 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
117 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
118 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 4, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
119 %i4 = insertvalue [4 x float] undef, float %i, 0
120 %i5 = insertvalue [4 x float] %i4, float %i1, 1
121 %i6 = insertvalue [4 x float] %i5, float %i2, 2
122 %i7 = insertvalue [4 x float] %i6, float %i3, 3
126 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
127 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(
128 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
129 ; NO-MSAA-NEXT: main_body:
130 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
131 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
132 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
133 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
134 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
135 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
136 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
137 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
138 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
140 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(
141 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
142 ; MSAA-NEXT: main_body:
143 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
144 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
145 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
146 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
147 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
148 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
149 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
150 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
151 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
152 ; MSAA-NEXT: ret [4 x float] [[I7]]
155 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
156 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
157 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
158 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 8, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
159 %i4 = insertvalue [4 x float] undef, float %i, 0
160 %i5 = insertvalue [4 x float] %i4, float %i1, 1
161 %i6 = insertvalue [4 x float] %i5, float %i2, 2
162 %i7 = insertvalue [4 x float] %i6, float %i3, 3
166 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
167 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(
168 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
169 ; NO-MSAA-NEXT: main_body:
170 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
171 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
172 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
173 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
174 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
175 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
176 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
177 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
178 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
180 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(
181 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
182 ; MSAA-NEXT: main_body:
183 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
184 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
185 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
186 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
187 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
188 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
189 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
190 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
191 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
192 ; MSAA-NEXT: ret [4 x float] [[I7]]
195 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
196 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
197 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
198 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
199 %i4 = insertvalue [4 x float] undef, float %i, 0
200 %i5 = insertvalue [4 x float] %i4, float %i1, 1
201 %i6 = insertvalue [4 x float] %i5, float %i2, 2
202 %i7 = insertvalue [4 x float] %i6, float %i3, 3
206 ; Don't combine because the vaddr inputs are not identical.
207 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(<8 x i32> inreg %rsrc, i32 %s0, i32 %t0, i32 %s1, i32 %t1, i32 %s2, i32 %t2, i32 %s3, i32 %t3) {
208 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(
209 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] {
210 ; NO-MSAA-NEXT: main_body:
211 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
212 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
213 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
214 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
215 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
216 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
217 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
218 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
219 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
221 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(
222 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] {
223 ; MSAA-NEXT: main_body:
224 ; MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
225 ; MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
226 ; MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
227 ; MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
228 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
229 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
230 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
231 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
232 ; MSAA-NEXT: ret [4 x float] [[I7]]
235 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s0, i32 %t0, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
236 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s1, i32 %t1, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
237 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s2, i32 %t2, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
238 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s3, i32 %t3, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
239 %i4 = insertvalue [4 x float] undef, float %i, 0
240 %i5 = insertvalue [4 x float] %i4, float %i1, 1
241 %i6 = insertvalue [4 x float] %i5, float %i2, 2
242 %i7 = insertvalue [4 x float] %i6, float %i3, 3
246 define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
247 ; NO-MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(
248 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
249 ; NO-MSAA-NEXT: main_body:
250 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
251 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
252 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
253 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
254 ; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
255 ; NO-MSAA-NEXT: [[I5:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
256 ; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
257 ; NO-MSAA-NEXT: [[I7:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
258 ; NO-MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0
259 ; NO-MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1
260 ; NO-MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2
261 ; NO-MSAA-NEXT: [[I11:%.*]] = insertvalue [8 x float] [[I10]], float [[I3]], 3
262 ; NO-MSAA-NEXT: [[I12:%.*]] = insertvalue [8 x float] [[I11]], float [[I4]], 4
263 ; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [8 x float] [[I12]], float [[I5]], 5
264 ; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [8 x float] [[I13]], float [[I6]], 6
265 ; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [8 x float] [[I14]], float [[I7]], 7
266 ; NO-MSAA-NEXT: ret [8 x float] [[I15]]
268 ; MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(
269 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
270 ; MSAA-NEXT: main_body:
271 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
272 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
273 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
274 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
275 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
276 ; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
277 ; MSAA-NEXT: [[I5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
278 ; MSAA-NEXT: [[I6:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
279 ; MSAA-NEXT: [[I7:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
280 ; MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0
281 ; MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1
282 ; MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2
283 ; MSAA-NEXT: [[I11:%.*]] = insertvalue [8 x float] [[I10]], float [[I3]], 3
284 ; MSAA-NEXT: [[I12:%.*]] = insertvalue [8 x float] [[I11]], float [[I4]], 4
285 ; MSAA-NEXT: [[I13:%.*]] = insertvalue [8 x float] [[I12]], float [[I5]], 5
286 ; MSAA-NEXT: [[I14:%.*]] = insertvalue [8 x float] [[I13]], float [[I6]], 6
287 ; MSAA-NEXT: [[I15:%.*]] = insertvalue [8 x float] [[I14]], float [[I7]], 7
288 ; MSAA-NEXT: ret [8 x float] [[I15]]
291 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
292 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
293 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
294 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
295 %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
296 %i5 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
297 %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
298 %i7 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
299 %i8 = insertvalue [8 x float] undef, float %i, 0
300 %i9 = insertvalue [8 x float] %i8, float %i1, 1
301 %i10 = insertvalue [8 x float] %i9, float %i2, 2
302 %i11 = insertvalue [8 x float] %i10, float %i3, 3
303 %i12 = insertvalue [8 x float] %i11, float %i4, 4
304 %i13 = insertvalue [8 x float] %i12, float %i5, 5
305 %i14 = insertvalue [8 x float] %i13, float %i6, 6
306 %i15 = insertvalue [8 x float] %i14, float %i7, 7
310 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
311 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(
312 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
313 ; NO-MSAA-NEXT: main_body:
314 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
315 ; NO-MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
316 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
317 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1
318 ; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
319 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2
320 ; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
321 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3
322 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
324 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(
325 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
326 ; MSAA-NEXT: main_body:
327 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
328 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
329 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
330 ; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
331 ; MSAA-NEXT: [[I6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
332 ; MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
333 ; MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1
334 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2
335 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3
336 ; MSAA-NEXT: ret [4 x float] [[I7]]
339 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
340 %i1 = insertvalue [4 x float] undef, float %i, 0
341 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
342 %i3 = insertvalue [4 x float] %i1, float %i2, 1
343 %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
344 %i5 = insertvalue [4 x float] %i3, float %i4, 2
345 %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
346 %i7 = insertvalue [4 x float] %i5, float %i6, 3
350 define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
351 ; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(
352 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
353 ; NO-MSAA-NEXT: main_body:
354 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
355 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
356 ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
357 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
358 ; NO-MSAA-NEXT: ret [2 x float] [[I3]]
360 ; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(
361 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
362 ; MSAA-NEXT: main_body:
363 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
364 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
365 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
366 ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
367 ; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
368 ; MSAA-NEXT: ret [2 x float] [[I3]]
371 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
372 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
373 %i2 = insertvalue [2 x float] undef, float %i, 0
374 %i3 = insertvalue [2 x float] %i2, float %i1, 1
378 define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
379 ; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(
380 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
381 ; NO-MSAA-NEXT: main_body:
382 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
383 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
384 ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
385 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
386 ; NO-MSAA-NEXT: ret [2 x float] [[I3]]
388 ; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(
389 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
390 ; MSAA-NEXT: main_body:
391 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
392 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
393 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
394 ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0
395 ; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1
396 ; MSAA-NEXT: ret [2 x float] [[I3]]
399 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
400 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
401 %i2 = insertvalue [2 x float] undef, float %i, 0
402 %i3 = insertvalue [2 x float] %i2, float %i1, 1
406 ; Don't combine because it's not profitable: the resulting msaa loads would
407 ; have 8 vdata outputs.
408 define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
409 ; NO-MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(
410 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
411 ; NO-MSAA-NEXT: main_body:
412 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
413 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
414 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0
415 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
416 ; NO-MSAA-NEXT: ret [2 x <2 x float>] [[I5]]
418 ; MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(
419 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
420 ; MSAA-NEXT: main_body:
421 ; MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
422 ; MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
423 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0
424 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
425 ; MSAA-NEXT: ret [2 x <2 x float>] [[I5]]
428 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
429 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
430 %i4 = insertvalue [2 x <2 x float>] undef, <2 x float> %i, 0
431 %i5 = insertvalue [2 x <2 x float>] %i4, <2 x float> %i1, 1
432 ret [2 x <2 x float>] %i5
438 define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
439 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(
440 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
441 ; NO-MSAA-NEXT: main_body:
442 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
443 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
444 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
445 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
446 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
447 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
448 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
449 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
450 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
452 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(
453 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
454 ; MSAA-NEXT: main_body:
455 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
456 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
457 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
458 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
459 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
460 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
461 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
462 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
463 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
464 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
465 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
466 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
467 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
468 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
469 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
470 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
471 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
472 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
473 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
474 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
475 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
476 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
477 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
480 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
481 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
482 %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
483 %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
484 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
485 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
486 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
487 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
488 ret [4 x <2 x float>] %i7
494 define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
495 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(
496 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
497 ; NO-MSAA-NEXT: main_body:
498 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
499 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
500 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
501 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
502 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
503 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
504 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
505 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
506 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
508 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(
509 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
510 ; MSAA-NEXT: main_body:
511 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
512 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
513 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
514 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
515 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
516 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
517 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
518 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
519 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
520 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
521 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
522 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
523 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
524 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
525 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
526 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
527 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
528 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
529 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
530 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
531 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
532 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
533 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
536 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
537 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
538 %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
539 %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 5, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
540 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
541 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
542 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
543 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
544 ret [4 x <2 x float>] %i7
550 define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
551 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(
552 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
553 ; NO-MSAA-NEXT: main_body:
554 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
555 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
556 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
557 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
558 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
559 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
560 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
561 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
562 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
564 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(
565 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
566 ; MSAA-NEXT: main_body:
567 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
568 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
569 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
570 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
571 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
572 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
573 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
574 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
575 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
576 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
577 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
578 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
579 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
580 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
581 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
582 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
583 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
584 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
585 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
586 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
587 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
588 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
589 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
592 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
593 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
594 %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
595 %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 6, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
596 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
597 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
598 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
599 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
600 ret [4 x <2 x float>] %i7
606 define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
607 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(
608 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
609 ; NO-MSAA-NEXT: main_body:
610 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
611 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
612 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
613 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
614 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
615 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
616 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
617 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
618 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
620 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(
621 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
622 ; MSAA-NEXT: main_body:
623 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
624 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
625 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
626 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
627 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
628 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
629 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
630 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
631 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
632 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
633 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
634 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
635 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
636 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
637 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
638 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
639 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
640 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
641 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
642 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
643 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
644 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
645 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
648 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
649 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
650 %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
651 %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 9, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
652 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
653 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
654 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
655 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
656 ret [4 x <2 x float>] %i7
662 define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
663 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(
664 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
665 ; NO-MSAA-NEXT: main_body:
666 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
667 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
668 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
669 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
670 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
671 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
672 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
673 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
674 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
676 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(
677 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
678 ; MSAA-NEXT: main_body:
679 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
680 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
681 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
682 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
683 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
684 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
685 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
686 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
687 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
688 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
689 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
690 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
691 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
692 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
693 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
694 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
695 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
696 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
697 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
698 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
699 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
700 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
701 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
704 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
705 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
706 %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
707 %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 10, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
708 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
709 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
710 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
711 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
712 ret [4 x <2 x float>] %i7
718 define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
719 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(
720 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
721 ; NO-MSAA-NEXT: main_body:
722 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
723 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
724 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
725 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
726 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
727 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
728 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
729 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
730 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
732 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(
733 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
734 ; MSAA-NEXT: main_body:
735 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
736 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
737 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
738 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
739 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
740 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
741 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
742 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
743 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
744 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
745 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
746 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
747 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
748 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
749 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
750 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
751 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
752 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
753 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
754 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
755 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
756 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
757 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
760 %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
761 %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
762 %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
763 %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32 12, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
764 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
765 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
766 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
767 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
768 ret [4 x <2 x float>] %i7
771 define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
772 ; NO-MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(
773 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
774 ; NO-MSAA-NEXT: main_body:
775 ; NO-MSAA-NEXT: [[I:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
776 ; NO-MSAA-NEXT: [[I1:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
777 ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0
778 ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1
779 ; NO-MSAA-NEXT: ret [2 x half] [[I3]]
781 ; MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(
782 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
783 ; MSAA-NEXT: main_body:
784 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
785 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x half> [[TMP0]], i64 0
786 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x half> [[TMP0]], i64 1
787 ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0
788 ; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1
789 ; MSAA-NEXT: ret [2 x half] [[I3]]
792 %i = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
793 %i1 = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
794 %i2 = insertvalue [2 x half] undef, half %i, 0
795 %i3 = insertvalue [2 x half] %i2, half %i1, 1
799 define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
800 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(
801 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
802 ; NO-MSAA-NEXT: main_body:
803 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
804 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
805 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
806 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
807 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
808 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
809 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
810 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
811 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
813 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(
814 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
815 ; MSAA-NEXT: main_body:
816 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
817 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
818 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
819 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
820 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
821 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
822 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
823 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
824 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
825 ; MSAA-NEXT: ret [4 x float] [[I7]]
828 %i = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
829 %i1 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
830 %i2 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
831 %i3 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
832 %i4 = insertvalue [4 x float] undef, float %i, 0
833 %i5 = insertvalue [4 x float] %i4, float %i1, 1
834 %i6 = insertvalue [4 x float] %i5, float %i2, 2
835 %i7 = insertvalue [4 x float] %i6, float %i3, 3
842 define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
843 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(
844 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
845 ; NO-MSAA-NEXT: main_body:
846 ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
847 ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
848 ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
849 ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
850 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
851 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
852 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
853 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
854 ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
856 ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(
857 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] {
858 ; MSAA-NEXT: main_body:
859 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
860 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
861 ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
862 ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0
863 ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
864 ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1
865 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
866 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0
867 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
868 ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1
869 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
870 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0
871 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
872 ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1
873 ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
874 ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0
875 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
876 ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1
877 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0
878 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1
879 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2
880 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3
881 ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]]
884 %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
885 %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
886 %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
887 %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
888 %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0
889 %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1
890 %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2
891 %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3
892 ret [4 x <2 x float>] %i7
900 define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
901 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(
902 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
903 ; NO-MSAA-NEXT: main_body:
904 ; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
905 ; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
906 ; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
907 ; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
908 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
909 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
910 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
911 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
912 ; NO-MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
914 ; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(
915 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
916 ; MSAA-NEXT: main_body:
917 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
918 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
919 ; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
920 ; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
921 ; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0
922 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
923 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP5]], i64 1
924 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
925 ; MSAA-NEXT: [[I:%.*]] = insertelement <3 x float> [[TMP6]], float [[TMP7]], i64 2
926 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
927 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <3 x float> undef, float [[TMP8]], i64 0
928 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
929 ; MSAA-NEXT: [[TMP11:%.*]] = insertelement <3 x float> [[TMP9]], float [[TMP10]], i64 1
930 ; MSAA-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP2]], i64 1
931 ; MSAA-NEXT: [[I1:%.*]] = insertelement <3 x float> [[TMP11]], float [[TMP12]], i64 2
932 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
933 ; MSAA-NEXT: [[TMP14:%.*]] = insertelement <3 x float> undef, float [[TMP13]], i64 0
934 ; MSAA-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
935 ; MSAA-NEXT: [[TMP16:%.*]] = insertelement <3 x float> [[TMP14]], float [[TMP15]], i64 1
936 ; MSAA-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP2]], i64 2
937 ; MSAA-NEXT: [[I2:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP17]], i64 2
938 ; MSAA-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
939 ; MSAA-NEXT: [[TMP19:%.*]] = insertelement <3 x float> undef, float [[TMP18]], i64 0
940 ; MSAA-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
941 ; MSAA-NEXT: [[TMP21:%.*]] = insertelement <3 x float> [[TMP19]], float [[TMP20]], i64 1
942 ; MSAA-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP2]], i64 3
943 ; MSAA-NEXT: [[I3:%.*]] = insertelement <3 x float> [[TMP21]], float [[TMP22]], i64 2
944 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
945 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
946 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
947 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
948 ; MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
951 %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
952 %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
953 %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
954 %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
955 %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
956 %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
957 %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
958 %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
959 ret [4 x <3 x float>] %i7
967 define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
968 ; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(
969 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
970 ; NO-MSAA-NEXT: main_body:
971 ; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
972 ; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 5, <8 x i32> [[RSRC]], i32 0, i32 0)
973 ; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 6, <8 x i32> [[RSRC]], i32 0, i32 0)
974 ; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 [[S]], i32 [[T]], i32 7, <8 x i32> [[RSRC]], i32 0, i32 0)
975 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
976 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
977 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
978 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
979 ; NO-MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
981 ; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(
982 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
983 ; MSAA-NEXT: main_body:
984 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
985 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 2, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
986 ; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 4, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0)
987 ; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
988 ; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0
989 ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
990 ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP5]], i64 1
991 ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
992 ; MSAA-NEXT: [[I:%.*]] = insertelement <3 x float> [[TMP6]], float [[TMP7]], i64 2
993 ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
994 ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <3 x float> undef, float [[TMP8]], i64 0
995 ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
996 ; MSAA-NEXT: [[TMP11:%.*]] = insertelement <3 x float> [[TMP9]], float [[TMP10]], i64 1
997 ; MSAA-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP2]], i64 1
998 ; MSAA-NEXT: [[I1:%.*]] = insertelement <3 x float> [[TMP11]], float [[TMP12]], i64 2
999 ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
1000 ; MSAA-NEXT: [[TMP14:%.*]] = insertelement <3 x float> undef, float [[TMP13]], i64 0
1001 ; MSAA-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
1002 ; MSAA-NEXT: [[TMP16:%.*]] = insertelement <3 x float> [[TMP14]], float [[TMP15]], i64 1
1003 ; MSAA-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP2]], i64 2
1004 ; MSAA-NEXT: [[I2:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP17]], i64 2
1005 ; MSAA-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
1006 ; MSAA-NEXT: [[TMP19:%.*]] = insertelement <3 x float> undef, float [[TMP18]], i64 0
1007 ; MSAA-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
1008 ; MSAA-NEXT: [[TMP21:%.*]] = insertelement <3 x float> [[TMP19]], float [[TMP20]], i64 1
1009 ; MSAA-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP2]], i64 3
1010 ; MSAA-NEXT: [[I3:%.*]] = insertelement <3 x float> [[TMP21]], float [[TMP22]], i64 2
1011 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0
1012 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1
1013 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2
1014 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3
1015 ; MSAA-NEXT: ret [4 x <3 x float>] [[I7]]
1018 %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 4, <8 x i32> %rsrc, i32 0, i32 0)
1019 %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 5, <8 x i32> %rsrc, i32 0, i32 0)
1020 %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 6, <8 x i32> %rsrc, i32 0, i32 0)
1021 %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32 7, i32 %s, i32 %t, i32 7, <8 x i32> %rsrc, i32 0, i32 0)
1022 %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0
1023 %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1
1024 %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2
1025 %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3
1026 ret [4 x <3 x float>] %i7
1029 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(<8 x i32> inreg %rsrc, float %vdata, i32 %s, i32 %t, i32 %fragid) {
1030 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(
1031 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] {
1032 ; NO-MSAA-NEXT: main_body:
1033 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1034 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
1035 ; NO-MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0)
1036 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
1037 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
1038 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
1039 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
1040 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
1041 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
1042 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
1044 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(
1045 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] {
1046 ; MSAA-NEXT: main_body:
1047 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1048 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
1049 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
1050 ; MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0)
1051 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1052 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
1053 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
1054 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
1055 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
1056 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
1057 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
1058 ; MSAA-NEXT: ret [4 x float] [[I7]]
1061 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
1062 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
1063 call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float %vdata, i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
1064 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
1065 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
1066 %i4 = insertvalue [4 x float] undef, float %i, 0
1067 %i5 = insertvalue [4 x float] %i4, float %i1, 1
1068 %i6 = insertvalue [4 x float] %i5, float %i2, 2
1069 %i7 = insertvalue [4 x float] %i6, float %i3, 3
1073 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %cond) {
1074 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(
1075 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] {
1076 ; NO-MSAA-NEXT: main_body:
1077 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1078 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
1079 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
1080 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
1081 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
1082 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
1083 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
1084 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
1085 ; NO-MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1
1086 ; NO-MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]]
1087 ; NO-MSAA: if_equal:
1088 ; NO-MSAA-NEXT: [[I9:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1089 ; NO-MSAA-NEXT: [[I10:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
1090 ; NO-MSAA-NEXT: [[I11:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
1091 ; NO-MSAA-NEXT: [[I12:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
1092 ; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0
1093 ; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1
1094 ; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2
1095 ; NO-MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3
1096 ; NO-MSAA-NEXT: br label [[MERGE:%.*]]
1097 ; NO-MSAA: if_unequal:
1098 ; NO-MSAA-NEXT: [[I17:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1099 ; NO-MSAA-NEXT: [[I18:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0)
1100 ; NO-MSAA-NEXT: [[I19:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0)
1101 ; NO-MSAA-NEXT: [[I20:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0)
1102 ; NO-MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0
1103 ; NO-MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1
1104 ; NO-MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2
1105 ; NO-MSAA-NEXT: [[I24:%.*]] = insertvalue [4 x float] [[I23]], float [[I20]], 3
1106 ; NO-MSAA-NEXT: br label [[MERGE]]
1108 ; NO-MSAA-NEXT: [[I25:%.*]] = phi [4 x float] [ [[I16]], [[IF_EQUAL]] ], [ [[I24]], [[IF_UNEQUAL]] ]
1109 ; NO-MSAA-NEXT: ret [4 x float] [[I25]]
1111 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(
1112 ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] {
1113 ; MSAA-NEXT: main_body:
1114 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1115 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
1116 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
1117 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2
1118 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
1119 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
1120 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
1121 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
1122 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
1123 ; MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1
1124 ; MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]]
1126 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1127 ; MSAA-NEXT: [[I9:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
1128 ; MSAA-NEXT: [[I10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
1129 ; MSAA-NEXT: [[I11:%.*]] = extractelement <4 x float> [[TMP1]], i64 2
1130 ; MSAA-NEXT: [[I12:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
1131 ; MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0
1132 ; MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1
1133 ; MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2
1134 ; MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3
1135 ; MSAA-NEXT: br label [[MERGE:%.*]]
1137 ; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0)
1138 ; MSAA-NEXT: [[I17:%.*]] = extractelement <4 x float> [[TMP2]], i64 0
1139 ; MSAA-NEXT: [[I18:%.*]] = extractelement <4 x float> [[TMP2]], i64 1
1140 ; MSAA-NEXT: [[I19:%.*]] = extractelement <4 x float> [[TMP2]], i64 2
1141 ; MSAA-NEXT: [[I20:%.*]] = extractelement <4 x float> [[TMP2]], i64 3
1142 ; MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0
1143 ; MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1
1144 ; MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2
1145 ; MSAA-NEXT: [[I24:%.*]] = insertvalue [4 x float] [[I23]], float [[I20]], 3
1146 ; MSAA-NEXT: br label [[MERGE]]
1148 ; MSAA-NEXT: [[I25:%.*]] = phi [4 x float] [ [[I16]], [[IF_EQUAL]] ], [ [[I24]], [[IF_UNEQUAL]] ]
1149 ; MSAA-NEXT: ret [4 x float] [[I25]]
1152 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
1153 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
1154 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
1155 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
1156 %i4 = insertvalue [4 x float] undef, float %i, 0
1157 %i5 = insertvalue [4 x float] %i4, float %i1, 1
1158 %i6 = insertvalue [4 x float] %i5, float %i2, 2
1159 %i7 = insertvalue [4 x float] %i6, float %i3, 3
1160 %i8 = trunc i32 %cond to i1
1161 br i1 %i8, label %if_equal, label %if_unequal
1163 %i9 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
1164 %i10 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
1165 %i11 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
1166 %i12 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
1167 %i13 = insertvalue [4 x float] undef, float %i9, 0
1168 %i14 = insertvalue [4 x float] %i13, float %i10, 1
1169 %i15 = insertvalue [4 x float] %i14, float %i11, 2
1170 %i16 = insertvalue [4 x float] %i15, float %i12, 3
1173 %i17 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
1174 %i18 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0)
1175 %i19 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0)
1176 %i20 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0)
1177 %i21 = insertvalue [4 x float] undef, float %i17, 0
1178 %i22 = insertvalue [4 x float] %i21, float %i18, 1
1179 %i23 = insertvalue [4 x float] %i22, float %i19, 2
1180 %i24 = insertvalue [4 x float] %i23, float %i20, 3
1183 %i25 = phi [4 x float] [%i16, %if_equal], [%i24, %if_unequal]
1184 ret [4 x float] %i25
1187 define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(<8 x i32> inreg %rsrc1, <8 x i32> inreg %rsrc2, i32 %s, i32 %t) {
1188 ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(
1189 ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
1190 ; NO-MSAA-NEXT: main_body:
1191 ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0)
1192 ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC1]], i32 0, i32 0)
1193 ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0)
1194 ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC2]], i32 0, i32 0)
1195 ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
1196 ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
1197 ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
1198 ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
1199 ; NO-MSAA-NEXT: ret [4 x float] [[I7]]
1201 ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(
1202 ; MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] {
1203 ; MSAA-NEXT: main_body:
1204 ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0)
1205 ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0
1206 ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1
1207 ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0)
1208 ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0
1209 ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 1
1210 ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0
1211 ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1
1212 ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2
1213 ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3
1214 ; MSAA-NEXT: ret [4 x float] [[I7]]
1217 %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc1, i32 0, i32 0)
1218 %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc1, i32 0, i32 0)
1219 %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc2, i32 0, i32 0)
1220 %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc2, i32 0, i32 0)
1221 %i4 = insertvalue [4 x float] undef, float %i, 0
1222 %i5 = insertvalue [4 x float] %i4, float %i1, 1
1223 %i6 = insertvalue [4 x float] %i5, float %i2, 2
1224 %i7 = insertvalue [4 x float] %i6, float %i3, 3
1228 declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1229 declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1230 declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1232 declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1233 declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1234 declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1236 declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
1238 declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32)
1240 attributes #0 = { nounwind readonly willreturn }