1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
3 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=UNCONSTRAINED %s
4 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
5 // RUN: -ffp-exception-behavior=strict \
6 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg \
7 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=COMMONIR --check-prefix=CONSTRAINED %s
8 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
9 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \
10 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
11 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \
12 // RUN: -ffp-exception-behavior=strict \
13 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | llc -o=- - \
14 // RUN: | FileCheck --check-prefix=COMMON --check-prefix=CHECK-ASM %s
16 // REQUIRES: aarch64-registered-target
18 // Test new aarch64 intrinsics and types but constrained
22 // COMMON-LABEL: test_vfmas_lane_f32
23 // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
24 // UNCONSTRAINED: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a)
25 // CONSTRAINED: [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float %b, float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
26 // CHECK-ASM: fmla s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}]
27 // COMMONIR: ret float [[TMP2]]
28 float32_t
test_vfmas_lane_f32(float32_t a
, float32_t b
, float32x2_t c
) {
29 return vfmas_lane_f32(a
, b
, c
, 1);
32 // COMMON-LABEL: test_vfmad_lane_f64
33 // COMMONIR: [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0
34 // UNCONSTRAINED: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
35 // CONSTRAINED: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
36 // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
37 // COMMONIR: ret double [[TMP2]]
38 float64_t
test_vfmad_lane_f64(float64_t a
, float64_t b
, float64x1_t c
) {
39 return vfmad_lane_f64(a
, b
, c
, 0);
42 // COMMON-LABEL: test_vfmad_laneq_f64
43 // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1
44 // UNCONSTRAINED: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a)
45 // CONSTRAINED: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fma.f64(double %b, double [[EXTRACT]], double %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
46 // CHECK-ASM: fmla d{{[0-9]+}}, d{{[0-9]+}}, v{{[0-9]+}}.d[{{[0-9]+}}]
47 // COMMONIR: ret double [[TMP2]]
48 float64_t
test_vfmad_laneq_f64(float64_t a
, float64_t b
, float64x2_t c
) {
49 return vfmad_laneq_f64(a
, b
, c
, 1);
52 // COMMON-LABEL: test_vfmss_lane_f32
53 // COMMONIR: [[SUB:%.*]] = fneg float %b
54 // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1
55 // UNCONSTRAINED: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a)
56 // CONSTRAINED: [[TMP2:%.*]] = call float @llvm.experimental.constrained.fma.f32(float [[SUB]], float [[EXTRACT]], float %a, metadata !"round.tonearest", metadata !"fpexcept.strict")
57 // CHECK-ASM: fmls s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}.s[{{[0-9]+}}]
58 // COMMONIR: ret float [[TMP2]]
59 float32_t
test_vfmss_lane_f32(float32_t a
, float32_t b
, float32x2_t c
) {
60 return vfmss_lane_f32(a
, b
, c
, 1);
63 // COMMON-LABEL: test_vfma_lane_f64
64 // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
65 // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
66 // COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
67 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
68 // COMMONIR: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
69 // COMMONIR: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
70 // COMMONIR: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
71 // UNCONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
72 // CONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
73 // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
74 // COMMONIR: ret <1 x double> [[FMLA2]]
75 float64x1_t
test_vfma_lane_f64(float64x1_t a
, float64x1_t b
, float64x1_t v
) {
76 return vfma_lane_f64(a
, b
, v
, 0);
79 // COMMON-LABEL: test_vfms_lane_f64
80 // COMMONIR: [[SUB:%.*]] = fneg <1 x double> %b
81 // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
82 // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
83 // COMMONIR: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8>
84 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double>
85 // COMMONIR: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer
86 // COMMONIR: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double>
87 // COMMONIR: [[FMLA1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
88 // UNCONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]])
89 // CONSTRAINED: [[FMLA2:%.*]] = call <1 x double> @llvm.experimental.constrained.fma.v1f64(<1 x double> [[FMLA]], <1 x double> [[LANE]], <1 x double> [[FMLA1]], metadata !"round.tonearest", metadata !"fpexcept.strict")
90 // CHECK-ASM: fmsub d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
91 // COMMONIR: ret <1 x double> [[FMLA2]]
92 float64x1_t
test_vfms_lane_f64(float64x1_t a
, float64x1_t b
, float64x1_t v
) {
93 return vfms_lane_f64(a
, b
, v
, 0);
96 // COMMON-LABEL: test_vfma_laneq_f64
97 // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
98 // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
99 // COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
100 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
101 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
102 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
103 // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
104 // UNCONSTRAINED: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
105 // CONSTRAINED: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
106 // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
107 // COMMONIR: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
108 // COMMONIR: ret <1 x double> [[TMP7]]
109 float64x1_t
test_vfma_laneq_f64(float64x1_t a
, float64x1_t b
, float64x2_t v
) {
110 return vfma_laneq_f64(a
, b
, v
, 0);
113 // COMMON-LABEL: test_vfms_laneq_f64
114 // COMMONIR: [[SUB:%.*]] = fneg <1 x double> %b
115 // CHECK-ASM: fneg d{{[0-9]+}}, d{{[0-9]+}}
116 // COMMONIR: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
117 // COMMONIR: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
118 // COMMONIR: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
119 // COMMONIR: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double
120 // COMMONIR: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double
121 // COMMONIR: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double>
122 // COMMONIR: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
123 // UNCONSTRAINED: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]])
124 // CONSTRAINED: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fma.f64(double [[TMP4]], double [[EXTRACT]], double [[TMP3]], metadata !"round.tonearest", metadata !"fpexcept.strict")
125 // CHECK-ASM: fmadd d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
126 // COMMONIR: [[TMP7:%.*]] = bitcast double [[TMP6]] to <1 x double>
127 // COMMONIR: ret <1 x double> [[TMP7]]
128 float64x1_t
test_vfms_laneq_f64(float64x1_t a
, float64x1_t b
, float64x2_t v
) {
129 return vfms_laneq_f64(a
, b
, v
, 0);