1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - --verify-machineinstrs | FileCheck %s
4 source_filename = "skip-vpt-debug.c"
5 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
6 target triple = "thumbv8.1m.main-arm-none-eabihf"
8 define hidden void @arm_max_no_idx_f32(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocapture %pResult) local_unnamed_addr #0 !dbg !13 {
10 call void @llvm.dbg.value(metadata ptr %pSrc, metadata !24, metadata !DIExpression()), !dbg !29
11 call void @llvm.dbg.value(metadata i32 %blockSize, metadata !25, metadata !DIExpression()), !dbg !29
12 call void @llvm.dbg.value(metadata ptr %pResult, metadata !26, metadata !DIExpression()), !dbg !29
13 call void @llvm.dbg.value(metadata float 0x3810000000000000, metadata !27, metadata !DIExpression()), !dbg !29
14 %cmp.not7 = icmp eq i32 %blockSize, 0, !dbg !30
15 br i1 %cmp.not7, label %while.end, label %vector.ph, !dbg !31
17 vector.ph: ; preds = %entry
18 %n.rnd.up = add i32 %blockSize, 3, !dbg !31
19 %n.vec = and i32 %n.rnd.up, -4, !dbg !31
20 %0 = add i32 %n.vec, -4, !dbg !31
21 %1 = lshr i32 %0, 2, !dbg !31
22 %2 = add nuw nsw i32 %1, 1, !dbg !31
23 %3 = call i32 @llvm.start.loop.iterations.i32(i32 %2), !dbg !31
24 br label %vector.body, !dbg !31
26 vector.body: ; preds = %vector.body, %vector.ph
27 %lsr.iv1 = phi ptr [ %scevgep, %vector.body ], [ %pSrc, %vector.ph ]
28 %vec.phi = phi <4 x float> [ <float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000, float 0x3810000000000000>, %vector.ph ], [ %10, %vector.body ]
29 %4 = phi i32 [ %3, %vector.ph ], [ %11, %vector.body ]
30 %5 = phi i32 [ %blockSize, %vector.ph ], [ %7, %vector.body ]
31 %lsr.iv12 = bitcast ptr %lsr.iv1 to ptr
32 %6 = call <4 x i1> @llvm.arm.mve.vctp32(i32 %5)
34 %wide.masked.load = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %lsr.iv12, i32 4, <4 x i1> %6, <4 x float> poison), !dbg !32, !tbaa !34
35 %8 = fcmp nnan ninf nsz olt <4 x float> %vec.phi, %wide.masked.load, !dbg !38
36 %9 = and <4 x i1> %6, %8, !dbg !40
37 %10 = select <4 x i1> %9, <4 x float> %wide.masked.load, <4 x float> %vec.phi, !dbg !40
38 %scevgep = getelementptr float, ptr %lsr.iv1, i32 4
39 %11 = call i32 @llvm.loop.decrement.reg.i32(i32 %4, i32 1)
40 %12 = icmp ne i32 %11, 0
41 br i1 %12, label %vector.body, label %middle.block, !llvm.loop !41
43 middle.block: ; preds = %vector.body
44 %13 = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> %10), !dbg !31
45 br label %while.end, !dbg !45
47 while.end: ; preds = %middle.block, %entry
48 %maxValue.0.lcssa = phi float [ 0x3810000000000000, %entry ], [ %13, %middle.block ], !dbg !29
49 store float %maxValue.0.lcssa, ptr %pResult, align 4, !dbg !45, !tbaa !34
53 declare void @llvm.dbg.value(metadata, metadata, metadata) #1
55 declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) #2
57 declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #3
59 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #2
61 declare i32 @llvm.start.loop.iterations.i32(i32) #4
63 declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
65 declare <4 x i1> @llvm.arm.mve.vctp32(i32) #5
67 attributes #0 = { nofree norecurse nounwind optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main,+dsp,+fp-armv8d16,+fp-armv8d16sp,+fp16,+fp64,+fullfp16,+hwdiv,+lob,+mve,+mve.fp,+ras,+thumb-mode,+vfp2,+vfp2sp,+vfp3d16,+vfp3d16sp,+vfp4d16,+vfp4d16sp,-aes,-bf16,-cdecp0,-cdecp1,-cdecp2,-cdecp3,-cdecp4,-cdecp5,-cdecp6,-cdecp7,-crc,-crypto,-dotprod,-fp16fml,-hwdiv-arm,-i8mm,-sb,-sha2" }
68 attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
69 attributes #2 = { nofree nosync nounwind readnone willreturn }
70 attributes #3 = { argmemonly nofree nosync nounwind readonly willreturn }
71 attributes #4 = { noduplicate nofree nosync nounwind willreturn }
72 attributes #5 = { nounwind readnone }
75 !llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11}
78 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
79 !1 = !DIFile(filename: "skip-vpt-debug.c", directory: "/home/vicspe01")
81 !3 = !{i32 7, !"Dwarf Version", i32 4}
82 !4 = !{i32 2, !"Debug Info Version", i32 3}
83 !5 = !{i32 1, !"wchar_size", i32 4}
84 !6 = !{i32 1, !"static_rwdata", i32 1}
85 !7 = !{i32 1, !"enumsize_buildattr", i32 2}
86 !8 = !{i32 1, !"armlib_unavailable", i32 0}
87 !9 = !{i32 8, !"branch-target-enforcement", i32 0}
88 !10 = !{i32 8, !"sign-return-address", i32 0}
89 !11 = !{i32 8, !"sign-return-address-all", i32 0}
90 !12 = !{!"Component: ARM Compiler 6.17.0.0 (permissive) Tool: armclang [00000000]"}
91 !13 = distinct !DISubprogram(name: "arm_max_no_idx_f32", scope: !1, file: !1, line: 5, type: !14, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !23)
92 !14 = !DISubroutineType(types: !15)
93 !15 = !{null, !16, !20, !22}
94 !16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 32)
95 !17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18)
96 !18 = !DIDerivedType(tag: DW_TAG_typedef, name: "float32_t", file: !1, line: 1, baseType: !19)
97 !19 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
98 !20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !1, line: 2, baseType: !21)
99 !21 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
100 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 32)
101 !23 = !{!24, !25, !26, !27, !28}
102 !24 = !DILocalVariable(name: "pSrc", arg: 1, scope: !13, file: !1, line: 5, type: !16)
103 !25 = !DILocalVariable(name: "blockSize", arg: 2, scope: !13, file: !1, line: 5, type: !20)
104 !26 = !DILocalVariable(name: "pResult", arg: 3, scope: !13, file: !1, line: 6, type: !22)
105 !27 = !DILocalVariable(name: "maxValue", scope: !13, file: !1, line: 7, type: !18)
106 !28 = !DILocalVariable(name: "newVal", scope: !13, file: !1, line: 8, type: !18)
107 !29 = !DILocation(line: 0, scope: !13)
108 !30 = !DILocation(line: 10, column: 20, scope: !13)
109 !31 = !DILocation(line: 10, column: 3, scope: !13)
110 !32 = !DILocation(line: 11, column: 14, scope: !33)
111 !33 = distinct !DILexicalBlock(scope: !13, file: !1, line: 10, column: 26)
112 !34 = !{!35, !35, i64 0}
113 !35 = !{!"float", !36, i64 0}
114 !36 = !{!"omnipotent char", !37, i64 0}
115 !37 = !{!"Simple C/C++ TBAA"}
116 !38 = !DILocation(line: 12, column: 18, scope: !39)
117 !39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 12, column: 9)
118 !40 = !DILocation(line: 12, column: 9, scope: !33)
119 !41 = distinct !{!41, !31, !42, !43, !44}
120 !42 = !DILocation(line: 15, column: 3, scope: !13)
121 !43 = !{!"llvm.loop.mustprogress"}
122 !44 = !{!"llvm.loop.isvectorized", i32 1}
123 !45 = !DILocation(line: 16, column: 12, scope: !13)
124 !46 = !DILocation(line: 17, column: 1, scope: !13)
128 name: arm_max_no_idx_f32
130 exposesReturnsTwice: false
132 regBankSelected: false
135 tracksRegLiveness: true
139 - { reg: '$r0', virtual-reg: '' }
140 - { reg: '$r1', virtual-reg: '' }
141 - { reg: '$r2', virtual-reg: '' }
143 isFrameAddressTaken: false
144 isReturnAddressTaken: false
154 cvBytesOfCalleeSavedRegisters: 0
155 hasOpaqueSPAdjustment: false
157 hasMustTailInVarArgFunc: false
163 - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
164 stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
165 debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
166 - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
167 stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
168 debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
170 debugValueSubstitutions: []
173 value: float 0x3810000000000000
175 isTargetSpecific: false
176 machineFunctionInfo: {}
178 ; CHECK-LABEL: name: arm_max_no_idx_f32
180 ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.1(0x50000000)
181 ; CHECK-NEXT: liveins: $lr, $r0, $r1, $r2, $r7
183 ; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
184 ; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
185 ; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
186 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
187 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
188 ; CHECK-NEXT: frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
189 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
190 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4
191 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8
192 ; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
193 ; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
194 ; CHECK-NEXT: tCBZ renamable $r1, %bb.4, debug-location !31
196 ; CHECK-NEXT: bb.1.vector.ph:
197 ; CHECK-NEXT: successors: %bb.2(0x80000000)
198 ; CHECK-NEXT: liveins: $r0, $r1, $r2
200 ; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
201 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
202 ; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
203 ; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
204 ; CHECK-NEXT: renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, $noreg, undef renamable $q0
205 ; CHECK-NEXT: $lr = MVE_DLSTP_32 killed renamable $r1, debug-location !31
207 ; CHECK-NEXT: bb.2.vector.body (align 4):
208 ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
209 ; CHECK-NEXT: liveins: $lr, $q0, $r0, $r2
211 ; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
212 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
213 ; CHECK-NEXT: renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, $noreg, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34)
214 ; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
215 ; CHECK-NEXT: MVE_VPTv4f32 8, renamable $q1, renamable $q0, 12, implicit-def $vpr, debug-location !40
216 ; CHECK-NEXT: renamable $q0 = MVE_VORR killed renamable $q1, killed renamable $q1, 1, killed renamable $vpr, $noreg, killed renamable $q0, debug-location !40
217 ; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
218 ; CHECK-NEXT: $lr = MVE_LETP killed renamable $lr, %bb.2
220 ; CHECK-NEXT: bb.3.middle.block:
221 ; CHECK-NEXT: successors: %bb.5(0x80000000)
222 ; CHECK-NEXT: liveins: $q0, $r2
224 ; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
225 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
226 ; CHECK-NEXT: renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31
227 ; CHECK-NEXT: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit killed $q0, debug-location !31
228 ; CHECK-NEXT: renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31
229 ; CHECK-NEXT: tB %bb.5, 14 /* CC::al */, $noreg
232 ; CHECK-NEXT: successors: %bb.5(0x80000000)
233 ; CHECK-NEXT: liveins: $r2
235 ; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
236 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
237 ; CHECK-NEXT: DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
238 ; CHECK-NEXT: DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
239 ; CHECK-NEXT: renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
241 ; CHECK-NEXT: bb.5.while.end:
242 ; CHECK-NEXT: liveins: $r2, $s0
244 ; CHECK-NEXT: DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
245 ; CHECK-NEXT: DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
246 ; CHECK-NEXT: VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34)
247 ; CHECK-NEXT: frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46
249 ; CHECK-NEXT: bb.6 (align 4):
250 ; CHECK-NEXT: CONSTPOOL_ENTRY 0, %const.0, 4
252 successors: %bb.4(0x30000000), %bb.1(0x50000000)
253 liveins: $r0, $r1, $r2, $r7, $lr
255 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
256 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
257 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
258 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
259 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
260 frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
261 frame-setup CFI_INSTRUCTION def_cfa_offset 8
262 frame-setup CFI_INSTRUCTION offset $lr, -4
263 frame-setup CFI_INSTRUCTION offset $r7, -8
264 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
265 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
266 tCBZ renamable $r1, %bb.4, debug-location !31
269 successors: %bb.2(0x80000000)
270 liveins: $r0, $r1, $r2
272 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
273 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
274 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
275 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
276 renamable $r3, dead $cpsr = tADDi3 renamable $r1, 3, 14 /* CC::al */, $noreg, debug-location !31
277 renamable $r3 = t2BICri killed renamable $r3, 3, 14 /* CC::al */, $noreg, $noreg, debug-location !31
278 renamable $r12 = t2SUBri killed renamable $r3, 4, 14 /* CC::al */, $noreg, $noreg, debug-location !31
279 renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
280 renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14 /* CC::al */, $noreg, $noreg, debug-location !31
281 renamable $q0 = MVE_VMOVimmi32 1152, 0, $noreg, $noreg, undef renamable $q0
282 renamable $lr = t2DoLoopStartTP killed renamable $r3, renamable $r1, debug-location !31
284 bb.2.vector.body (align 4):
285 successors: %bb.2(0x7c000000), %bb.3(0x04000000)
286 liveins: $lr, $q0, $r0, $r1, $r2
288 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
289 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
290 renamable $vpr = MVE_VCTP32 renamable $r1, 0, $noreg, $noreg
291 MVE_VPST 2, implicit $vpr, debug-location !32
292 renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg, debug-location !32 :: (load (s128) from %ir.lsr.iv12, align 4, !tbaa !34)
293 DBG_VALUE $r0, $noreg, !24, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
294 renamable $vpr = MVE_VCMPf32 renamable $q1, renamable $q0, 12, 1, killed renamable $vpr, $noreg, debug-location !40
295 renamable $q0 = MVE_VORR killed renamable $q1, renamable $q1, 1, killed renamable $vpr, $noreg, killed renamable $q0, debug-location !40
296 renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 4, 14 /* CC::al */, $noreg
297 DBG_VALUE $r1, $noreg, !25, !DIExpression(DW_OP_LLVM_entry_value, 1), debug-location !29
298 renamable $lr = t2LoopEndDec killed renamable $lr, %bb.2, implicit-def dead $cpsr
299 tB %bb.3, 14 /* CC::al */, $noreg
302 successors: %bb.5(0x80000000)
305 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
306 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
307 renamable $s4 = nnan ninf nsz VFP_VMAXNMS renamable $s2, renamable $s3, debug-location !31
308 renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s1, implicit $q0, debug-location !31
309 renamable $s0 = nnan ninf nsz VFP_VMAXNMS killed renamable $s0, killed renamable $s4, debug-location !31
310 tB %bb.5, 14 /* CC::al */, $noreg
313 successors: %bb.5(0x80000000)
316 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
317 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
318 DBG_VALUE $r1, $noreg, !25, !DIExpression(), debug-location !29
319 DBG_VALUE $r0, $noreg, !24, !DIExpression(), debug-location !29
320 renamable $s0 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
325 DBG_VALUE float 0x3810000000000000, $noreg, !27, !DIExpression(), debug-location !29
326 DBG_VALUE $r2, $noreg, !26, !DIExpression(), debug-location !29
327 VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg, debug-location !45 :: (store (s32) into %ir.pResult, !tbaa !34)
328 frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc, debug-location !46
331 CONSTPOOL_ENTRY 0, %const.0, 4