1 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm \
2 // RUN: -target-cpu pwr9 -target-feature +float128 -mabi=ieeelongdouble \
3 // RUN: -o - %s | FileCheck %s -check-prefix=IEEE
4 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm \
5 // RUN: -target-cpu pwr9 -target-feature +float128 \
6 // RUN: -o - %s | FileCheck %s -check-prefix=IBM
8 // RUN: %clang_cc1 -triple ppc64le -emit-llvm-bc %s -target-cpu pwr9 \
9 // RUN: -target-feature +float128 -mabi=ieeelongdouble -fopenmp \
10 // RUN: -fopenmp-targets=ppc64le -o %t-ppc-host.bc
11 // RUN: %clang_cc1 -triple ppc64le -aux-triple ppc64le %s -target-cpu pwr9 \
12 // RUN: -target-feature +float128 -fopenmp -fopenmp-is-target-device -emit-llvm \
13 // RUN: -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s \
14 // RUN: -check-prefix=OMP-TARGET
15 // RUN: %clang_cc1 -triple ppc64le %t-ppc-host.bc -emit-llvm -o - | FileCheck %s \
16 // RUN: -check-prefix=OMP-HOST
20 typedef struct { long double x
; } ldbl128_s
;
22 void foo_ld(long double);
23 void foo_fq(__float128
);
24 void foo_ls(ldbl128_s
);
26 // Verify cases when OpenMP target's and host's long-double semantics differ.
28 // OMP-TARGET-LABEL: define internal void @__omp_offloading_{{.+}}_omp_{{.+}}.omp_outlined(
29 // OMP-TARGET: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr
30 // OMP-TARGET: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8
31 // OMP-TARGET: call void @foo_ld(ppc_fp128 noundef %[[V3]])
33 // OMP-HOST-LABEL: define{{.*}} void @omp(
34 // OMP-HOST: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
35 // OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]], align 8
36 // OMP-HOST: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
37 // OMP-HOST: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
38 // OMP-HOST: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
39 // OMP-HOST: call void @foo_ld(fp128 noundef %[[V4]])
40 void omp(int n
, ...) {
43 foo_ld(va_arg(ap
, long double));
44 #pragma omp target parallel
45 for (int i
= 1; i
< n
; ++i
) {
46 foo_ld(va_arg(ap
, long double));
51 // IEEE-LABEL: define{{.*}} void @f128
52 // IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
53 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
54 // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
55 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
56 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
57 // IEEE: call void @foo_fq(fp128 noundef %[[V4]])
58 // IEEE: call void @llvm.va_end(ptr %[[AP]])
59 void f128(int n
, ...) {
62 foo_fq(va_arg(ap
, __float128
));
66 // IEEE-LABEL: define{{.*}} void @long_double
67 // IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
68 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
69 // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
70 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
71 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
72 // IEEE: call void @foo_ld(fp128 noundef %[[V4]])
73 // IEEE: call void @llvm.va_end(ptr %[[AP]])
75 // IBM-LABEL: define{{.*}} void @long_double
76 // IBM: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
77 // IBM: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
78 // IBM: %[[V4:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8
79 // IBM: call void @foo_ld(ppc_fp128 noundef %[[V4]])
80 // IBM: call void @llvm.va_end(ptr %[[AP]])
81 void long_double(int n
, ...) {
84 foo_ld(va_arg(ap
, long double));
88 // IEEE-LABEL: define{{.*}} void @long_double_struct
89 // IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
90 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
91 // IEEE: %[[TMP0:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
92 // IEEE: %[[ALIGN:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP0]], i64 -16)
93 // IEEE: %[[V0:[0-9a-zA-Z_.]+]] = getelementptr inbounds i8, ptr %[[ALIGN]], i64 16
94 // IEEE: store ptr %[[V0]], ptr %[[AP]], align 8
95 // IEEE: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %[[TMP:[0-9a-zA-Z_.]+]], ptr align 16 %[[ALIGN]], i64 16, i1 false)
96 // IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, ptr %[[TMP]], i32 0, i32 0
97 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[COERCE]], align 16
98 // IEEE: call void @foo_ls(fp128 inreg %[[V4]])
99 // IEEE: call void @llvm.va_end(ptr %[[AP]])
100 void long_double_struct(int n
, ...) {
103 foo_ls(va_arg(ap
, ldbl128_s
));