1 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
2 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
3 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
4 // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
5 // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
6 // RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
7 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
9 // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
10 // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
11 // RUN: %clang_cc1 -fopenmp-simd -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
12 // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
13 // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
14 // RUN: %clang_cc1 -fopenmp-simd -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
15 // RUN: %clang_cc1 -fopenmp-simd -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
17 void h1(float *c
, float *a
, double b
[], int size
)
19 // CHECK-LABEL: define{{.*}} void @h1
21 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)
22 // CHECK: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR4:%.*]], {{i64|i32}} 32) ]
25 // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 16) ]
26 // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 32) ]
27 // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 64) ]
28 // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 16) ]
31 // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 16) ]
32 // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 32) ]
33 // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 64) ]
34 // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 16) ]
35 for (int i
= 0; i
< size
; ++i
) {
36 c
[i
] = a
[i
] * a
[i
] + b
[i
] * b
[t
];
39 // do not emit llvm.access.group metadata due to usage of safelen clause.
40 // CHECK-NOT: store float {{.+}}, ptr {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
41 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
42 // CHECK: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR4:%.*]], {{i64|i32}} 32) ]
45 // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 16) ]
46 // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 32) ]
47 // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 64) ]
48 // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 16) ]
51 // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 16) ]
52 // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 32) ]
53 // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 64) ]
54 // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 16) ]
55 for (int i
= 0; i
< size
; ++i
) {
56 c
[i
] = a
[i
] * a
[i
] + b
[i
] * b
[t
];
59 // do not emit llvm.access.group metadata due to usage of safelen clause.
60 // CHECK-NOT: store float {{.+}}, ptr {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
61 #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
62 // CHECK: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR4:%.*]], {{i64|i32}} 32) ]
65 // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 16) ]
66 // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 32) ]
67 // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 64) ]
68 // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR5:%.*]], {{i64|i32}} 16) ]
71 // X86-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 16) ]
72 // X86-AVX-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 32) ]
73 // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 64) ]
74 // PPC-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR6:%.*]], {{i64|i32}} 16) ]
75 for (int i
= 0; i
< size
; ++i
) {
76 c
[i
] = a
[i
] * a
[i
] + b
[i
] * b
[t
];
78 // CHECK: store float {{.+}}, ptr {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_7:[0-9]+]]
82 void h2(float *c
, float *a
, float *b
, int size
)
84 // CHECK-LABEL: define{{.*}} void @h2
86 #pragma omp simd linear(t)
87 for (int i
= 0; i
< size
; ++i
) {
88 c
[i
] = a
[i
] * a
[i
] + b
[i
] * b
[t
];
90 // CHECK: store float {{.+}}, ptr {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_10:[0-9]+]]
92 // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H2_HEADER:![0-9]+]]
95 void h3(float *c
, float *a
, float *b
, int size
)
97 // CHECK-LABEL: define{{.*}} void @h3
99 for (int i
= 0; i
< size
; ++i
) {
100 for (int j
= 0; j
< size
; ++j
) {
101 c
[j
*i
] = a
[i
] * b
[j
];
103 // CHECK: store float {{.+}}, ptr {{.+}}, align {{.+}}, !llvm.access.group ![[ACCESS_GROUP_13:[0-9]+]]
105 // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H3_HEADER_INNER:![0-9]+]]
106 // CHECK: br label %{{.+}}, !llvm.loop [[LOOP_H3_HEADER:![0-9]+]]
110 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]}
111 // CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}
112 // CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
113 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]}
114 // CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
115 // CHECK: ![[ACCESS_GROUP_7]] = distinct !{}
116 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], ![[PARALLEL_ACCESSES_9:[0-9]+]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]]}
117 // CHECK: ![[PARALLEL_ACCESSES_9]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_7]]}
120 // CHECK: ![[ACCESS_GROUP_10]] = distinct !{}
121 // CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], ![[PARALLEL_ACCESSES_12:[0-9]+]], [[LOOP_VEC_ENABLE]]}
122 // CHECK: ![[PARALLEL_ACCESSES_12]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_10]]}
125 // CHECK: ![[ACCESS_GROUP_13]] = distinct !{}
126 // CHECK: [[LOOP_H3_HEADER]] = distinct !{[[LOOP_H3_HEADER]], ![[PARALLEL_ACCESSES_15:[0-9]+]], [[LOOP_VEC_ENABLE]]}
127 // CHECK: ![[PARALLEL_ACCESSES_15]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_13]]}