1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output < %s | FileCheck %s
3 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mcpu=neoverse-v1 < %s | FileCheck %s --check-prefix=CHECK-VSCALE-2
4 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefix=CHECK-VSCALE-1
5 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mcpu=cortex-a510 < %s | FileCheck %s --check-prefix=CHECK-VSCALE-1
7 target triple="aarch64--linux-gnu"
8 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
10 define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) #0 {
11 ; CHECK-LABEL: 'masked_scatters'
12 ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
13 ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
14 ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
15 ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i1.nxv4p0(<vscale x 4 x i1> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
16 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
18 ; CHECK-VSCALE-2-LABEL: 'masked_scatters'
19 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
20 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
21 ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
22 ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i1.nxv4p0(<vscale x 4 x i1> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
23 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
25 ; CHECK-VSCALE-1-LABEL: 'masked_scatters'
26 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
27 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
28 ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
29 ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv4i1.nxv4p0(<vscale x 4 x i1> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
30 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
32 call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
33 call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
34 call void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
35 call void @llvm.masked.scatter.nxv4i1(<vscale x 4 x i1> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
39 define void @masked_scatters_tune_generic(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) #1 {
40 ; CHECK-LABEL: 'masked_scatters_tune_generic'
41 ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
42 ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
43 ; CHECK-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
44 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
46 ; CHECK-VSCALE-2-LABEL: 'masked_scatters_tune_generic'
47 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
48 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
49 ; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
50 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
52 ; CHECK-VSCALE-1-LABEL: 'masked_scatters_tune_generic'
53 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
54 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
55 ; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
56 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
58 call void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x ptr> undef, i32 0, <vscale x 4 x i1> %nxv4i1mask)
59 call void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x ptr> undef, i32 0, <vscale x 8 x i1> %nxv8i1mask)
60 call void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64> undef, <vscale x 1 x ptr> undef, i32 0, <vscale x 1 x i1> %nxv1i1mask)
64 define void @masked_scatters_no_vscale_range() #2 {
65 ; CHECK-LABEL: 'masked_scatters_no_vscale_range'
66 ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
67 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
68 ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
69 ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
70 ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
71 ; CHECK-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
72 ; CHECK-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
73 ; CHECK-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
74 ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
76 ; CHECK-VSCALE-2-LABEL: 'masked_scatters_no_vscale_range'
77 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
78 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
79 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
80 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
81 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
82 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
83 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
84 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
85 ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
87 ; CHECK-VSCALE-1-LABEL: 'masked_scatters_no_vscale_range'
88 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
89 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
90 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
91 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
92 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
93 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.scatter.nxv16i16.nxv16p0(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
94 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
95 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
96 ; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
98 call void @llvm.masked.scatter.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
99 call void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
101 call void @llvm.masked.scatter.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
102 call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
103 call void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x ptr> undef, i32 1, <vscale x 2 x i1> undef)
105 call void @llvm.masked.scatter.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x ptr> undef, i32 1, <vscale x 16 x i1> undef)
106 call void @llvm.masked.scatter.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x ptr> undef, i32 1, <vscale x 8 x i1> undef)
107 call void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x ptr> undef, i32 1, <vscale x 4 x i1> undef)
112 attributes #0 = { "target-features"="+sve" vscale_range(1, 8) }
113 attributes #1 = { "target-features"="+sve" vscale_range(1, 16) "tune-cpu"="generic" }
114 attributes #2 = { "target-features"="+sve" }
116 declare void @llvm.masked.scatter.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
117 declare void @llvm.masked.scatter.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
118 declare void @llvm.masked.scatter.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
119 declare void @llvm.masked.scatter.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
120 declare void @llvm.masked.scatter.nxv4f64(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
121 declare void @llvm.masked.scatter.nxv2f64(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
122 declare void @llvm.masked.scatter.nxv8f32(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
123 declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
124 declare void @llvm.masked.scatter.nxv2f32(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
125 declare void @llvm.masked.scatter.nxv16i16(<vscale x 16 x i16>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
126 declare void @llvm.masked.scatter.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
127 declare void @llvm.masked.scatter.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)