1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
4 ; ModuleID = 'skip-vpt-debug.ll'
5 source_filename = "skip-vpt-debug.c"
6 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
7 target triple = "thumbv8.1m.main-arm-none-eabihf"
9 ; Function Attrs: nofree norecurse nounwind optsize
10 define hidden void @arm_max_no_idx_f32(float* nocapture readonly %pSrc, i32 %blockSize, float* nocapture %pResult) local_unnamed_addr #0 !dbg !13 {
12 call void @llvm.dbg.value(metadata float* %pSrc, metadata !24, metadata !DIExpression()), !dbg !29
13 call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29
14 call void @llvm.dbg.value(metadata float* %pResult, metadata !26, metadata !DIExpression()), !dbg !29
15 call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29
16 %cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30
17 br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31
19 vector.ph: ; preds = %entry
20 %n.rnd.up = add i32 %blockSize, 3, !dbg !31
21 %n.vec = and i32 %n.rnd.up, -4, !dbg !31
22 %0 = add i32 %n.vec, -4, !dbg !31
23 %1 = lshr i32 %0, 2, !dbg !31
24 %2 = add nuw nsw i32 %1, 1, !dbg !31
25 %3 = call i32 @llvm.start.loop.iterations.i32(i32 %2), !dbg !31
26 br label %vector.body, !dbg !31
28 vector.body: ; preds = %vector.body, %vector.ph
29 %lsr.iv1 = phi float* [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ]
30 %vec.phi = phi <4 x float> [ <float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000>, %vector.ph ], [ %10, %vector.body ]
31 %4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ]
32 %5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ]
33 %lsr.iv12 = bitcast float* %lsr.iv1 to <4 x float>*
34 %6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5)
36 %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34
37 %8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38
38 %9 = and <4 x i1> %6, %8, !dbg !40
39 %10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40
40 %scevgep = getelementptr float, float* %lsr.iv1, i32 4
41 %11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1)
42 %12 = icmp ne i32 %11, 0
43 br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41
45 middle.block: ; preds = %vector.body
46 %13 = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> %10), !dbg !31
47 br label %while.end, !dbg !45
49 while.end: ; preds = %middle.block, %entry
50 %maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29
51 store float %maxValue.0.lcssa, float* %pResult, align 4, !dbg !45, !tbaa !34
55 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
56 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
58 ; Function Attrs: nofree nosync nounwind readnone willreturn
59 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2
61 ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
62 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #3
64 ; Function Attrs: nofree nosync nounwind readnone willreturn
65 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2
67 ; Function Attrs: noduplicate nofree nosync nounwind willreturn
68 declare i32 @llvm.start.loop.iterations.i32(i32) #4
70 ; Function Attrs: noduplicate nofree nosync nounwind willreturn
71 declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
73 ; Function Attrs: nounwind readnone
74 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #5
76 attributes #0 = { nofree norecurse nounwind optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-fp16fml,-hwdiv-arm,-i8mm,-sb,-sha2" }
77 attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
78 attributes #2 = { nofree nosync nounwind readnone willreturn }
79 attributes #3 = { argmemonly nofree nosync nounwind readonly willreturn }
80 attributes #4 = { noduplicate nofree nosync nounwind willreturn }
81 attributes #5 = { nounwind readnone }
84 !llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11}
87 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
88 !1 = !DIFile(filename: "skip-vpt-debug.c", directory: "/home/vicspe01")
90 !3 = !{i32 7, !"Dwarf Version", i32 4}
91 !4 = !{i32 2, !"Debug Info Version", i32 3}
92 !5 = !{i32 1, !"wchar_size", i32 4}
93 !6 = !{i32 1, !"static_rwdata", i32 1}
94 !7 = !{i32 1, !"enumsize_buildattr", i32 2}
95 !8 = !{i32 1, !"armlib_unavailable", i32 0}
96 !9 = !{i32 1, !"branch-target-enforcement", i32 0}
97 !10 = !{i32 1, !"sign-return-address", i32 0}
98 !11 = !{i32 1, !"sign-return-address-all", i32 0}
99 !12 = !{!"Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]"}
100 !13 = distinct !DISubprogram(name: "arm_max_no_idx_f32", scope: !1, file: !1, line: 5, type: !14, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !23)
101 !14 = !DISubroutineType(types: !15)
102 !15 = !{null, !16, !20, !22}
103 !16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 32)
104 !17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18)
105 !18 = !DIDerivedType(tag: DW_TAG_typedef, name: "float32_t", file: !1, line: 1, baseType: !19)
106 !19 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
107 !20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !1, line: 2, baseType: !21)
108 !21 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
109 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 32)
110 !23 = !{!24, !25, !26, !27, !28}
111 !24 = !DILocalVariable(name: "pSrc", arg: 1, scope: !13, file: !1, line: 5, type: !16)
112 !25 = !DILocalVariable(name: "blockSize", arg: 2, scope: !13, file: !1, line: 5, type: !20)
113 !26 = !DILocalVariable(name: "pResult", arg: 3, scope: !13, file: !1, line: 6, type: !22)
114 !27 = !DILocalVariable(name: "maxValue", scope: !13, file: !1, line: 7, type: !18)
115 !28 = !DILocalVariable(name: "newVal", scope: !13, file: !1, line: 8, type: !18)
116 !29 = !DILocation(line: 0, scope: !13)
117 !30 = !DILocation(line: 10, column: 20, scope: !13)
118 !31 = !DILocation(line: 10, column: 3, scope: !13)
119 !32 = !DILocation(line: 11, column: 14, scope: !33)
120 !33 = distinct !DILexicalBlock(scope: !13, file: !1, line: 10, column: 26)
121 !34 = !{!35, !35, i64 0}
122 !35 = !{!"float", !36, i64 0}
123 !36 = !{!"omnipotent char", !37, i64 0}
124 !37 = !{!"Simple C/C++ TBAA"}
125 !38 = !DILocation(line: 12, column: 18, scope: !39)
126 !39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 12, column: 9)
127 !40 = !DILocation(line: 12, column: 9, scope: !33)
128 !41 = distinct !{!41, !31, !42, !43, !44}
129 !42 = !DILocation(line: 15, column: 3, scope: !13)
130 !43 = !{!"llvm.loop.mustprogress"}
131 !44 = !{!"llvm.loop.isvectorized", i32 1}
132 !45 = !DILocation(line: 16, column: 12, scope: !13)
133 !46 = !DILocation(line: 17, column: 1, scope: !13)
137 name: arm_max_no_idx_f32
139 exposesReturnsTwice: false
141 regBankSelected: false
144 tracksRegLiveness: true
148 - { reg: '$r0', virtual-reg: '' }
149 - { reg: '$r1', virtual-reg: '' }
150 - { reg: '$r2', virtual-reg: '' }
152 isFrameAddressTaken: false
153 isReturnAddressTaken: false
163 cvBytesOfCalleeSavedRegisters: 0
164 hasOpaqueSPAdjustment: false
166 hasMustTailInVarArgFunc: false
172 - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
173 stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
174 debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
175 - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
176 stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
177 debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
179 debugValueSubstitutions: []
182 value: float 0x3810000000000000
184 isTargetSpecific: false
185 machineFunctionInfo: {}
187 ; CHECK-LABEL: name: arm_max_no_idx_f32
189 ; CHECK: successors: %bb.4(0x30000000), %bb.1(0x50000000)
190 ; CHECK: liveins: $lr, $r0, $r1, $r2, $r7
191 ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
192 ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
193 ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
194 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
195 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
196 ; CHECK: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
197 ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8
198 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4
199 ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8
200 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
201 ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
202 ; CHECK: tCBZ renamable $r1, %bb.4, debug-location !31
203 ; CHECK: bb.1.vector.ph:
204 ; CHECK: successors: %bb.2(0x80000000)
205 ; CHECK: liveins: $r0, $r1, $r2
206 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
207 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
208 ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
209 ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
210 ; CHECK: renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, undef renamable $q0
211 ; CHECK: $lr = MVE_DLSTP_32 killed renamable $r1, debug-location !31
212 ; CHECK: bb.2.vector.body (align 4):
213 ; CHECK: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
214 ; CHECK: liveins: $lr, $q0, $r0, $r2
215 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
216 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
217 ; CHECK: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34)
218 ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
219 ; CHECK: MVE_VPTv4f32 8, renamable $q1, renamable $q0, 12, implicit-def $vpr, debug-location !40
220 ; CHECK: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40
221 ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
222 ; CHECK: $lr = MVE_LETP killed renamable $lr, %bb.2
223 ; CHECK: bb.3.middle.block:
224 ; CHECK: successors: %bb.5(0x80000000)
225 ; CHECK: liveins: $q0, $r2
226 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
227 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
228 ; CHECK: renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31
229 ; CHECK: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit killed $q0, debug-location !31
230 ; CHECK: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31
231 ; CHECK: tB %bb.5, 14 /* CC::al */, $noreg
233 ; CHECK: successors: %bb.5(0x80000000)
234 ; CHECK: liveins: $r2
235 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
236 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
237 ; CHECK: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
238 ; CHECK: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
239 ; CHECK: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
240 ; CHECK: bb.5.while.end:
241 ; CHECK: liveins: $r2, $s0
242 ; CHECK: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
243 ; CHECK: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
244 ; CHECK: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34)
245 ; CHECK: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46
246 ; CHECK: bb.6 (align 4):
247 ; CHECK: CONSTPOOL_ENTRY 0, %const.0, 4
249 successors: %bb.4(0x30000000), %bb.1(0x50000000)
250 liveins: $r0, $r1, $r2, $r7, $lr
252 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
253 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
254 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
255 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
256 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
257 frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
258 frame-setup CFI_INSTRUCTION def_cfa_offset 8
259 frame-setup CFI_INSTRUCTION offset $lr, -4
260 frame-setup CFI_INSTRUCTION offset $r7, -8
261 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
262 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
263 tCBZ renamable $r1, %bb.4, debug-location !31
266 successors: %bb.2(0x80000000)
267 liveins: $r0, $r1, $r2
269 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
270 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
271 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
272 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
273 renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg, debug-location !31
274 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg, debug-location !31
275 renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg, debug-location !31
276 renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
277 renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg, debug-location !31
278 renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, undef renamable $q0
279 renamable $lr = t2DoLoopStartTP killed renamable $r3, renamable $r1, debug-location !31
281 bb.2.vector.body (align 4):
282 successors: %bb.2(0x7c000000), %bb.3(0x04000000)
283 liveins: $lr, $q0, $r0, $r1, $r2
285 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
286 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
287 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg
288 MVE_VPST 2, implicit $vpr, debug-location !32
289 renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34)
290 DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
291 renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, debug-location !40
292 renamable $q0 = MVE_VORR killed renamable $q1, renamable $q1, 1, killed renamable $vpr, killed renamable $q0, debug-location !40
293 renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
294 DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
295 renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr
296 tB %bb.3, 14 /* CC::al */, $noreg
299 successors: %bb.5(0x80000000)
302 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
303 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
304 renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31
305 renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit $q0, debug-location !31
306 renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31
307 tB %bb.5, 14 /* CC::al */, $noreg
310 successors: %bb.5(0x80000000)
313 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
314 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
315 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
316 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
317 renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
322 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
323 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
324 VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34)
325 frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46
328 CONSTPOOL_ENTRY 0, %const.0, 4