[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / clang / test / CodeGen / PowerPC / builtins-ppc-pair-mma-types.c
bloba414a2827b2c4345a4659ff2435218a4f2cbc24c
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple powerpc64le-unknown-unknown -target-cpu pwr10 \
3 // RUN: -emit-llvm %s -o - | FileCheck %s
4 // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -target-cpu pwr10 \
5 // RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-BE
7 // CHECK-LABEL: @testVQLocal(
8 // CHECK-NEXT: entry:
9 // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
10 // CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
11 // CHECK-NEXT: [[VQP:%.*]] = alloca ptr, align 8
12 // CHECK-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64
13 // CHECK-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64
14 // CHECK-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64
15 // CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
16 // CHECK-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
17 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
18 // CHECK-NEXT: store ptr [[TMP0]], ptr [[VQP]], align 8
19 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VQP]], align 8
20 // CHECK-NEXT: [[TMP3:%.*]] = load <512 x i1>, ptr [[TMP2]], align 64
21 // CHECK-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ1]], align 64
22 // CHECK-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
23 // CHECK-NEXT: store <512 x i1> [[TMP4]], ptr [[VQ2]], align 64
24 // CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
25 // CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
26 // CHECK-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
27 // CHECK-NEXT: store <512 x i1> [[TMP7]], ptr [[VQ3]], align 64
28 // CHECK-NEXT: [[TMP8:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64
29 // CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VQP]], align 8
30 // CHECK-NEXT: store <512 x i1> [[TMP8]], ptr [[TMP9]], align 64
31 // CHECK-NEXT: ret void
33 // CHECK-BE-LABEL: @testVQLocal(
34 // CHECK-BE-NEXT: entry:
35 // CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
36 // CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
37 // CHECK-BE-NEXT: [[VQP:%.*]] = alloca ptr, align 8
38 // CHECK-BE-NEXT: [[VQ1:%.*]] = alloca <512 x i1>, align 64
39 // CHECK-BE-NEXT: [[VQ2:%.*]] = alloca <512 x i1>, align 64
40 // CHECK-BE-NEXT: [[VQ3:%.*]] = alloca <512 x i1>, align 64
41 // CHECK-BE-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
42 // CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
43 // CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
44 // CHECK-BE-NEXT: store ptr [[TMP0]], ptr [[VQP]], align 8
45 // CHECK-BE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VQP]], align 8
46 // CHECK-BE-NEXT: [[TMP3:%.*]] = load <512 x i1>, ptr [[TMP2]], align 64
47 // CHECK-BE-NEXT: store <512 x i1> [[TMP3]], ptr [[VQ1]], align 64
48 // CHECK-BE-NEXT: [[TMP4:%.*]] = call <512 x i1> @llvm.ppc.mma.xxsetaccz()
49 // CHECK-BE-NEXT: store <512 x i1> [[TMP4]], ptr [[VQ2]], align 64
50 // CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
51 // CHECK-BE-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
52 // CHECK-BE-NEXT: [[TMP7:%.*]] = call <512 x i1> @llvm.ppc.mma.xvi4ger8(<16 x i8> [[TMP5]], <16 x i8> [[TMP6]])
53 // CHECK-BE-NEXT: store <512 x i1> [[TMP7]], ptr [[VQ3]], align 64
54 // CHECK-BE-NEXT: [[TMP8:%.*]] = load <512 x i1>, ptr [[VQ3]], align 64
55 // CHECK-BE-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VQP]], align 8
56 // CHECK-BE-NEXT: store <512 x i1> [[TMP8]], ptr [[TMP9]], align 64
57 // CHECK-BE-NEXT: ret void
59 void testVQLocal(int *ptr, vector unsigned char vc) {
60 __vector_quad *vqp = (__vector_quad *)ptr;
61 __vector_quad vq1 = *vqp;
62 __vector_quad vq2;
63 __builtin_mma_xxsetaccz(&vq2);
64 __vector_quad vq3;
65 __builtin_mma_xvi4ger8(&vq3, vc, vc);
66 *vqp = vq3;
69 // CHECK-LABEL: @testVPLocal(
70 // CHECK-NEXT: entry:
71 // CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
72 // CHECK-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
73 // CHECK-NEXT: [[VPP:%.*]] = alloca ptr, align 8
74 // CHECK-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32
75 // CHECK-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32
76 // CHECK-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32
77 // CHECK-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
78 // CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
79 // CHECK-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
80 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
81 // CHECK-NEXT: store ptr [[TMP0]], ptr [[VPP]], align 8
82 // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP]], align 8
83 // CHECK-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
84 // CHECK-NEXT: store <256 x i1> [[TMP3]], ptr [[VP1]], align 32
85 // CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
86 // CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
87 // CHECK-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
88 // CHECK-NEXT: store <256 x i1> [[TMP6]], ptr [[VP2]], align 64
89 // CHECK-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
90 // CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
91 // CHECK-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP8]], <16 x i8> [[TMP7]])
92 // CHECK-NEXT: store <256 x i1> [[TMP9]], ptr [[VP2]], align 64
93 // CHECK-NEXT: [[TMP10:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
94 // CHECK-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
95 // CHECK-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
96 // CHECK-NEXT: store <512 x i1> [[TMP12]], ptr [[VQ]], align 64
97 // CHECK-NEXT: [[TMP13:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
98 // CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[VPP]], align 8
99 // CHECK-NEXT: store <256 x i1> [[TMP13]], ptr [[TMP14]], align 32
100 // CHECK-NEXT: ret void
102 // CHECK-BE-LABEL: @testVPLocal(
103 // CHECK-BE-NEXT: entry:
104 // CHECK-BE-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
105 // CHECK-BE-NEXT: [[VC_ADDR:%.*]] = alloca <16 x i8>, align 16
106 // CHECK-BE-NEXT: [[VPP:%.*]] = alloca ptr, align 8
107 // CHECK-BE-NEXT: [[VP1:%.*]] = alloca <256 x i1>, align 32
108 // CHECK-BE-NEXT: [[VP2:%.*]] = alloca <256 x i1>, align 32
109 // CHECK-BE-NEXT: [[VP3:%.*]] = alloca <256 x i1>, align 32
110 // CHECK-BE-NEXT: [[VQ:%.*]] = alloca <512 x i1>, align 64
111 // CHECK-BE-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8
112 // CHECK-BE-NEXT: store <16 x i8> [[VC:%.*]], ptr [[VC_ADDR]], align 16
113 // CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
114 // CHECK-BE-NEXT: store ptr [[TMP0]], ptr [[VPP]], align 8
115 // CHECK-BE-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VPP]], align 8
116 // CHECK-BE-NEXT: [[TMP3:%.*]] = load <256 x i1>, ptr [[TMP2]], align 32
117 // CHECK-BE-NEXT: store <256 x i1> [[TMP3]], ptr [[VP1]], align 32
118 // CHECK-BE-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
119 // CHECK-BE-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
120 // CHECK-BE-NEXT: [[TMP6:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]])
121 // CHECK-BE-NEXT: store <256 x i1> [[TMP6]], ptr [[VP2]], align 64
122 // CHECK-BE-NEXT: [[TMP7:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
123 // CHECK-BE-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
124 // CHECK-BE-NEXT: [[TMP9:%.*]] = call <256 x i1> @llvm.ppc.vsx.assemble.pair(<16 x i8> [[TMP7]], <16 x i8> [[TMP8]])
125 // CHECK-BE-NEXT: store <256 x i1> [[TMP9]], ptr [[VP2]], align 64
126 // CHECK-BE-NEXT: [[TMP10:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
127 // CHECK-BE-NEXT: [[TMP11:%.*]] = load <16 x i8>, ptr [[VC_ADDR]], align 16
128 // CHECK-BE-NEXT: [[TMP12:%.*]] = call <512 x i1> @llvm.ppc.mma.xvf64ger(<256 x i1> [[TMP10]], <16 x i8> [[TMP11]])
129 // CHECK-BE-NEXT: store <512 x i1> [[TMP12]], ptr [[VQ]], align 64
130 // CHECK-BE-NEXT: [[TMP13:%.*]] = load <256 x i1>, ptr [[VP3]], align 32
131 // CHECK-BE-NEXT: [[TMP14:%.*]] = load ptr, ptr [[VPP]], align 8
132 // CHECK-BE-NEXT: store <256 x i1> [[TMP13]], ptr [[TMP14]], align 32
133 // CHECK-BE-NEXT: ret void
135 void testVPLocal(int *ptr, vector unsigned char vc) {
136 __vector_pair *vpp = (__vector_pair *)ptr;
137 __vector_pair vp1 = *vpp;
138 __vector_pair vp2;
139 __builtin_vsx_assemble_pair(&vp2, vc, vc);
140 __builtin_vsx_build_pair(&vp2, vc, vc);
141 __vector_pair vp3;
142 __vector_quad vq;
143 __builtin_mma_xvf64ger(&vq, vp3, vc);
144 *vpp = vp3;
147 // CHECK-LABEL: @testRestrictQualifiedPointer2(
148 // CHECK-NEXT: entry:
149 // CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca ptr, align 8
150 // CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
151 // CHECK-NEXT: store ptr [[ACC:%.*]], ptr [[ACC_ADDR]], align 8
152 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[ARR]], i64 0, i64 0
153 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8
154 // CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8
155 // CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
156 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
157 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
158 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
159 // CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
160 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
161 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
162 // CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
163 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
164 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
165 // CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
166 // CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
167 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
168 // CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
169 // CHECK-NEXT: ret void
171 // CHECK-BE-LABEL: @testRestrictQualifiedPointer2(
172 // CHECK-BE-NEXT: entry:
173 // CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca ptr, align 8
174 // CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
175 // CHECK-BE-NEXT: store ptr [[ACC:%.*]], ptr [[ACC_ADDR]], align 8
176 // CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[ARR]], i64 0, i64 0
177 // CHECK-BE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8
178 // CHECK-BE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ACC_ADDR]], align 8
179 // CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
180 // CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
181 // CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
182 // CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
183 // CHECK-BE-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
184 // CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
185 // CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
186 // CHECK-BE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
187 // CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
188 // CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
189 // CHECK-BE-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
190 // CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
191 // CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
192 // CHECK-BE-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
193 // CHECK-BE-NEXT: ret void
195 void testRestrictQualifiedPointer2(__vector_quad *__restrict acc) {
196 vector float arr[4];
197 __builtin_mma_disassemble_acc(arr, acc);
200 // CHECK-LABEL: @testVolatileQualifiedPointer2(
201 // CHECK-NEXT: entry:
202 // CHECK-NEXT: [[ACC_ADDR:%.*]] = alloca ptr, align 8
203 // CHECK-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
204 // CHECK-NEXT: store volatile ptr [[ACC:%.*]], ptr [[ACC_ADDR]], align 8
205 // CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[ARR]], i64 0, i64 0
206 // CHECK-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8
207 // CHECK-NEXT: [[TMP1:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8
208 // CHECK-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
209 // CHECK-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
210 // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
211 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
212 // CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
213 // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
214 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
215 // CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
216 // CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
217 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
218 // CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
219 // CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
220 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
221 // CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
222 // CHECK-NEXT: ret void
224 // CHECK-BE-LABEL: @testVolatileQualifiedPointer2(
225 // CHECK-BE-NEXT: entry:
226 // CHECK-BE-NEXT: [[ACC_ADDR:%.*]] = alloca ptr, align 8
227 // CHECK-BE-NEXT: [[ARR:%.*]] = alloca [4 x <4 x float>], align 16
228 // CHECK-BE-NEXT: store volatile ptr [[ACC:%.*]], ptr [[ACC_ADDR]], align 8
229 // CHECK-BE-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[ARR]], i64 0, i64 0
230 // CHECK-BE-NEXT: [[TMP0:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8
231 // CHECK-BE-NEXT: [[TMP1:%.*]] = load volatile ptr, ptr [[ACC_ADDR]], align 8
232 // CHECK-BE-NEXT: [[TMP2:%.*]] = load <512 x i1>, ptr [[TMP1]], align 64
233 // CHECK-BE-NEXT: [[TMP3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> [[TMP2]])
234 // CHECK-BE-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 0
235 // CHECK-BE-NEXT: [[TMP6:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 0
236 // CHECK-BE-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP6]], align 16
237 // CHECK-BE-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 1
238 // CHECK-BE-NEXT: [[TMP8:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 1
239 // CHECK-BE-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 16
240 // CHECK-BE-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 2
241 // CHECK-BE-NEXT: [[TMP10:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 2
242 // CHECK-BE-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP10]], align 16
243 // CHECK-BE-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP3]], 3
244 // CHECK-BE-NEXT: [[TMP12:%.*]] = getelementptr inbounds <16 x i8>, ptr [[ARRAYDECAY]], i32 3
245 // CHECK-BE-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP12]], align 16
246 // CHECK-BE-NEXT: ret void
248 void testVolatileQualifiedPointer2(__vector_quad *__volatile acc) {
249 vector float arr[4];
250 __builtin_mma_disassemble_acc(arr, acc);