1 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm \
2 // RUN: -target-cpu pwr9 -target-feature +float128 -mabi=ieeelongdouble \
3 // RUN: -o - %s | FileCheck %s -check-prefix=IEEE
4 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm \
5 // RUN: -target-cpu pwr9 -target-feature +float128 \
6 // RUN: -o - %s | FileCheck %s -check-prefix=IBM
8 // RUN: %clang_cc1 -triple ppc64le -emit-llvm-bc %s -target-cpu pwr9 \
9 // RUN: -target-feature +float128 -mabi=ieeelongdouble -fopenmp \
10 // RUN: -fopenmp-targets=ppc64le -o %t-ppc-host.bc
11 // RUN: %clang_cc1 -triple ppc64le -aux-triple ppc64le %s -target-cpu pwr9 \
12 // RUN: -target-feature +float128 -fopenmp -fopenmp-is-device -emit-llvm \
13 // RUN: -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s \
14 // RUN: -check-prefix=OMP-TARGET
15 // RUN: %clang_cc1 -triple ppc64le %t-ppc-host.bc -emit-llvm -o - | FileCheck %s \
16 // RUN: -check-prefix=OMP-HOST
20 typedef struct { long double x
; } ldbl128_s
;
22 void foo_ld(long double);
23 void foo_fq(__float128
);
24 void foo_ls(ldbl128_s
);
26 // Verify cases when OpenMP target's and host's long-double semantics differ.
28 // OMP-TARGET-LABEL: define internal void @.omp_outlined.(
29 // OMP-TARGET: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr
30 // OMP-TARGET: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8
31 // OMP-TARGET: call void @foo_ld(ppc_fp128 noundef %[[V3]])
33 // OMP-HOST-LABEL: define{{.*}} void @omp(
34 // OMP-HOST: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
35 // OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]], align 8
36 // OMP-HOST: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint ptr %[[CUR]] to i64
37 // OMP-HOST: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15
38 // OMP-HOST: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16
39 // OMP-HOST: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to ptr
40 // OMP-HOST: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
41 // OMP-HOST: call void @foo_ld(fp128 noundef %[[V4]])
42 void omp(int n
, ...) {
45 foo_ld(va_arg(ap
, long double));
46 #pragma omp target parallel
47 for (int i
= 1; i
< n
; ++i
) {
48 foo_ld(va_arg(ap
, long double));
53 // IEEE-LABEL: define{{.*}} void @f128
54 // IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
55 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
56 // IEEE: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint ptr %[[CUR]] to i64
57 // IEEE: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15
58 // IEEE: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16
59 // IEEE: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to ptr
60 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
61 // IEEE: call void @foo_fq(fp128 noundef %[[V4]])
62 // IEEE: call void @llvm.va_end(ptr %[[AP]])
63 void f128(int n
, ...) {
66 foo_fq(va_arg(ap
, __float128
));
70 // IEEE-LABEL: define{{.*}} void @long_double
71 // IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
72 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
73 // IEEE: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint ptr %[[CUR]] to i64
74 // IEEE: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15
75 // IEEE: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16
76 // IEEE: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to ptr
77 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[ALIGN]], align 16
78 // IEEE: call void @foo_ld(fp128 noundef %[[V4]])
79 // IEEE: call void @llvm.va_end(ptr %[[AP]])
81 // IBM-LABEL: define{{.*}} void @long_double
82 // IBM: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
83 // IBM: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
84 // IBM: %[[V4:[0-9a-zA-Z_.]+]] = load ppc_fp128, ptr %[[CUR]], align 8
85 // IBM: call void @foo_ld(ppc_fp128 noundef %[[V4]])
86 // IBM: call void @llvm.va_end(ptr %[[AP]])
87 void long_double(int n
, ...) {
90 foo_ld(va_arg(ap
, long double));
94 // IEEE-LABEL: define{{.*}} void @long_double_struct
95 // IEEE: call void @llvm.va_start(ptr %[[AP:[0-9a-zA-Z_.]+]])
96 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load ptr, ptr %[[AP]]
97 // IEEE: %[[P0:[0-9a-zA-Z_.]+]] = ptrtoint ptr %[[CUR]] to i64
98 // IEEE: %[[P1:[0-9a-zA-Z_.]+]] = add i64 %[[P0]], 15
99 // IEEE: %[[P2:[0-9a-zA-Z_.]+]] = and i64 %[[P1]], -16
100 // IEEE: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[P2]] to ptr
101 // IEEE: %[[V0:[0-9a-zA-Z_.]+]] = getelementptr inbounds i8, ptr %[[ALIGN]], i64 16
102 // IEEE: store ptr %[[V0]], ptr %[[AP]], align 8
103 // IEEE: call void @llvm.memcpy.p0.p0.i64(ptr align 16 %[[TMP:[0-9a-zA-Z_.]+]], ptr align 16 %[[ALIGN]], i64 16, i1 false)
104 // IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, ptr %[[TMP]], i32 0, i32 0
105 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, ptr %[[COERCE]], align 16
106 // IEEE: call void @foo_ls(fp128 inreg %[[V4]])
107 // IEEE: call void @llvm.va_end(ptr %[[AP]])
108 void long_double_struct(int n
, ...) {
111 foo_ls(va_arg(ap
, ldbl128_s
));