1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix LOBUDGET %s
3 ; RUN: opt < %s -passes=slp-vectorizer -S -slp-schedule-budget=32 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck --check-prefix HIBUDGET %s
5 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6 target triple = "x86_64-apple-macosx10.9.0"
8 ; Test if the budget for the scheduling region size works.
9 ; We test with a reduced budget of 16 which should prevent vectorizing the loads.
11 declare void @unknown()
13 define void @test(ptr %a, ptr %b, ptr %c, ptr %d) {
14 ; LOBUDGET-LABEL: @test(
15 ; LOBUDGET-NEXT: entry:
16 ; LOBUDGET-NEXT: [[L0:%.*]] = load float, ptr [[A:%.*]], align 4
17 ; LOBUDGET-NEXT: [[A1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
18 ; LOBUDGET-NEXT: [[L1:%.*]] = load float, ptr [[A1]], align 4
19 ; LOBUDGET-NEXT: [[A2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 2
20 ; LOBUDGET-NEXT: [[L2:%.*]] = load float, ptr [[A2]], align 4
21 ; LOBUDGET-NEXT: [[A3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 3
22 ; LOBUDGET-NEXT: [[L3:%.*]] = load float, ptr [[A3]], align 4
23 ; LOBUDGET-NEXT: [[L00:%.*]] = fadd float [[L0]], [[L0]]
24 ; LOBUDGET-NEXT: [[L10:%.*]] = fadd float [[L1]], [[L1]]
25 ; LOBUDGET-NEXT: [[L20:%.*]] = fadd float [[L2]], [[L2]]
26 ; LOBUDGET-NEXT: [[L30:%.*]] = fadd float [[L3]], [[L3]]
27 ; LOBUDGET-NEXT: call void @unknown()
28 ; LOBUDGET-NEXT: call void @unknown()
29 ; LOBUDGET-NEXT: call void @unknown()
30 ; LOBUDGET-NEXT: call void @unknown()
31 ; LOBUDGET-NEXT: call void @unknown()
32 ; LOBUDGET-NEXT: call void @unknown()
33 ; LOBUDGET-NEXT: call void @unknown()
34 ; LOBUDGET-NEXT: call void @unknown()
35 ; LOBUDGET-NEXT: call void @unknown()
36 ; LOBUDGET-NEXT: call void @unknown()
37 ; LOBUDGET-NEXT: call void @unknown()
38 ; LOBUDGET-NEXT: call void @unknown()
39 ; LOBUDGET-NEXT: call void @unknown()
40 ; LOBUDGET-NEXT: call void @unknown()
41 ; LOBUDGET-NEXT: call void @unknown()
42 ; LOBUDGET-NEXT: call void @unknown()
43 ; LOBUDGET-NEXT: call void @unknown()
44 ; LOBUDGET-NEXT: call void @unknown()
45 ; LOBUDGET-NEXT: call void @unknown()
46 ; LOBUDGET-NEXT: call void @unknown()
47 ; LOBUDGET-NEXT: call void @unknown()
48 ; LOBUDGET-NEXT: call void @unknown()
49 ; LOBUDGET-NEXT: call void @unknown()
50 ; LOBUDGET-NEXT: call void @unknown()
51 ; LOBUDGET-NEXT: call void @unknown()
52 ; LOBUDGET-NEXT: call void @unknown()
53 ; LOBUDGET-NEXT: call void @unknown()
54 ; LOBUDGET-NEXT: call void @unknown()
55 ; LOBUDGET-NEXT: store float [[L00]], ptr [[B:%.*]], align 4
56 ; LOBUDGET-NEXT: [[B1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 1
57 ; LOBUDGET-NEXT: store float [[L10]], ptr [[B1]], align 4
58 ; LOBUDGET-NEXT: [[B2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 2
59 ; LOBUDGET-NEXT: store float [[L20]], ptr [[B2]], align 4
60 ; LOBUDGET-NEXT: [[B3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 3
61 ; LOBUDGET-NEXT: store float [[L30]], ptr [[B3]], align 4
62 ; LOBUDGET-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
63 ; LOBUDGET-NEXT: store <4 x float> [[TMP0]], ptr [[D:%.*]], align 4
64 ; LOBUDGET-NEXT: ret void
66 ; HIBUDGET-LABEL: @test(
67 ; HIBUDGET-NEXT: entry:
68 ; HIBUDGET-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
69 ; HIBUDGET-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[TMP0]], [[TMP0]]
70 ; HIBUDGET-NEXT: call void @unknown()
71 ; HIBUDGET-NEXT: call void @unknown()
72 ; HIBUDGET-NEXT: call void @unknown()
73 ; HIBUDGET-NEXT: call void @unknown()
74 ; HIBUDGET-NEXT: call void @unknown()
75 ; HIBUDGET-NEXT: call void @unknown()
76 ; HIBUDGET-NEXT: call void @unknown()
77 ; HIBUDGET-NEXT: call void @unknown()
78 ; HIBUDGET-NEXT: call void @unknown()
79 ; HIBUDGET-NEXT: call void @unknown()
80 ; HIBUDGET-NEXT: call void @unknown()
81 ; HIBUDGET-NEXT: call void @unknown()
82 ; HIBUDGET-NEXT: call void @unknown()
83 ; HIBUDGET-NEXT: call void @unknown()
84 ; HIBUDGET-NEXT: call void @unknown()
85 ; HIBUDGET-NEXT: call void @unknown()
86 ; HIBUDGET-NEXT: call void @unknown()
87 ; HIBUDGET-NEXT: call void @unknown()
88 ; HIBUDGET-NEXT: call void @unknown()
89 ; HIBUDGET-NEXT: call void @unknown()
90 ; HIBUDGET-NEXT: call void @unknown()
91 ; HIBUDGET-NEXT: call void @unknown()
92 ; HIBUDGET-NEXT: call void @unknown()
93 ; HIBUDGET-NEXT: call void @unknown()
94 ; HIBUDGET-NEXT: call void @unknown()
95 ; HIBUDGET-NEXT: call void @unknown()
96 ; HIBUDGET-NEXT: call void @unknown()
97 ; HIBUDGET-NEXT: call void @unknown()
98 ; HIBUDGET-NEXT: store <4 x float> [[TMP1]], ptr [[B:%.*]], align 4
99 ; HIBUDGET-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
100 ; HIBUDGET-NEXT: store <4 x float> [[TMP2]], ptr [[D:%.*]], align 4
101 ; HIBUDGET-NEXT: ret void
104 ; Don't vectorize these loads (with the reduced budget).
105 %l0 = load float, ptr %a
106 %a1 = getelementptr inbounds float, ptr %a, i64 1
107 %l1 = load float, ptr %a1
108 %a2 = getelementptr inbounds float, ptr %a, i64 2
109 %l2 = load float, ptr %a2
110 %a3 = getelementptr inbounds float, ptr %a, i64 3
111 %l3 = load float, ptr %a3
112 %l00 = fadd float %l0, %l0
113 %l10 = fadd float %l1, %l1
114 %l20 = fadd float %l2, %l2
115 %l30 = fadd float %l3, %l3
117 ; some unrelated instructions inbetween to enlarge the scheduling region
147 ; Don't vectorize these stores because their operands are too far away (with
148 ; the reduced budget).
149 store float %l00, ptr %b
150 %b1 = getelementptr inbounds float, ptr %b, i64 1
151 store float %l10, ptr %b1
152 %b2 = getelementptr inbounds float, ptr %b, i64 2
153 store float %l20, ptr %b2
154 %b3 = getelementptr inbounds float, ptr %b, i64 3
155 store float %l30, ptr %b3
157 ; But still vectorize the following instructions, because even if the budget
158 ; is exceeded there is a minimum region size.
159 %l4 = load float, ptr %c
160 %c1 = getelementptr inbounds float, ptr %c, i64 1
161 %l5 = load float, ptr %c1
162 %c2 = getelementptr inbounds float, ptr %c, i64 2
163 %l6 = load float, ptr %c2
164 %c3 = getelementptr inbounds float, ptr %c, i64 3
165 %l7 = load float, ptr %c3
167 store float %l4, ptr %d
168 %d1 = getelementptr inbounds float, ptr %d, i64 1
169 store float %l5, ptr %d1
170 %d2 = getelementptr inbounds float, ptr %d, i64 2
171 store float %l6, ptr %d2
172 %d3 = getelementptr inbounds float, ptr %d, i64 3
173 store float %l7, ptr %d3