1 ; RUN: opt -S -instcombine < %s | FileCheck %s
3 target triple = "aarch64-unknown-linux-gnu"
5 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
6 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
7 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
9 ; SVE intrinsics fmul and fadd should be replaced with regular fmul and fadd
10 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
11 define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
12 ; CHECK-LABEL: @replace_fmul_intrinsic_half
13 ; CHECK-NEXT: %1 = fmul fast <vscale x 8 x half> %a, %b
14 ; CHECK-NEXT: ret <vscale x 8 x half> %1
15 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
16 %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
17 ret <vscale x 8 x half> %2
20 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
21 define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
22 ; CHECK-LABEL: @replace_fmul_intrinsic_float
23 ; CHECK-NEXT: %1 = fmul fast <vscale x 4 x float> %a, %b
24 ; CHECK-NEXT: ret <vscale x 4 x float> %1
25 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
26 %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
27 ret <vscale x 4 x float> %2
30 declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
31 define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
32 ; CHECK-LABEL: @replace_fmul_intrinsic_double
33 ; CHECK-NEXT: %1 = fmul fast <vscale x 2 x double> %a, %b
34 ; CHECK-NEXT: ret <vscale x 2 x double> %1
35 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
36 %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
37 ret <vscale x 2 x double> %2
40 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
41 define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
42 ; CHECK-LABEL: @replace_fadd_intrinsic_half
43 ; CHECK-NEXT: %1 = fadd fast <vscale x 8 x half> %a, %b
44 ; CHECK-NEXT: ret <vscale x 8 x half> %1
45 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
46 %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
47 ret <vscale x 8 x half> %2
50 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
51 define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
52 ; CHECK-LABEL: @replace_fadd_intrinsic_float
53 ; CHECK-NEXT: %1 = fadd fast <vscale x 4 x float> %a, %b
54 ; CHECK-NEXT: ret <vscale x 4 x float> %1
55 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
56 %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
57 ret <vscale x 4 x float> %2
60 declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
61 define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
62 ; CHECK-LABEL: @replace_fadd_intrinsic_double
63 ; CHECK-NEXT: %1 = fadd fast <vscale x 2 x double> %a, %b
64 ; CHECK-NEXT: ret <vscale x 2 x double> %1
65 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
66 %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
67 ret <vscale x 2 x double> %2
70 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
71 define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
72 ; CHECK-LABEL: @replace_fsub_intrinsic_half
73 ; CHECK-NEXT: %1 = fsub fast <vscale x 8 x half> %a, %b
74 ; CHECK-NEXT: ret <vscale x 8 x half> %1
75 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
76 %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
77 ret <vscale x 8 x half> %2
80 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
81 define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
82 ; CHECK-LABEL: @replace_fsub_intrinsic_float
83 ; CHECK-NEXT: %1 = fsub fast <vscale x 4 x float> %a, %b
84 ; CHECK-NEXT: ret <vscale x 4 x float> %1
85 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
86 %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
87 ret <vscale x 4 x float> %2
91 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
92 define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
93 ; CHECK-LABEL: @replace_fsub_intrinsic_double
94 ; CHECK-NEXT: %1 = fsub fast <vscale x 2 x double> %a, %b
95 ; CHECK-NEXT: ret <vscale x 2 x double> %1
96 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
97 %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
98 ret <vscale x 2 x double> %2
101 define <vscale x 2 x double> @no_replace_on_non_ptrue_all(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
102 ; CHECK-LABEL: @no_replace_on_non_ptrue_all
103 ; CHECK-NEXT: %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
104 ; CHECK-NEXT: %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
105 ; CHECK-NEXT: ret <vscale x 2 x double> %2
106 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
107 %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
108 ret <vscale x 2 x double> %2
111 define <vscale x 2 x double> @replace_fsub_intrinsic_no_fast_flag(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
112 ; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag
113 ; CHECK-NEXT: %1 = fsub <vscale x 2 x double> %a, %b
114 ; CHECK-NEXT: ret <vscale x 2 x double> %1
115 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
116 %2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
117 ret <vscale x 2 x double> %2
120 attributes #0 = { "target-features"="+sve" }