1 ; Check memory cost model action for fixed vector SVE and Neon
2 ; Vector bits size lower than 256 bits end up assuming Neon cost model
3 ; CHECK-NEON has same performance as CHECK-SVE-128
5 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK-NEON
6 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s --check-prefix=CHECK-SVE-128
7 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s --check-prefix=CHECK-SVE-256
8 ; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefix=CHECK-SVE-512
10 define <16 x i8> @load16(<16 x i8>* %ptr) {
11 ; CHECK: 'Cost Model Analysis' for function 'load16':
12 ; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
13 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
14 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
15 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
16 %out = load <16 x i8>, <16 x i8>* %ptr
20 define void @store16(<16 x i8>* %ptr, <16 x i8> %val) {
21 ; CHECK: 'Cost Model Analysis' for function 'store16':
22 ; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
23 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
24 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
25 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
26 store <16 x i8> %val, <16 x i8>* %ptr
30 define <8 x i8> @load8(<8 x i8>* %ptr) {
31 ; CHECK: 'Cost Model Analysis' for function 'load8':
32 ; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
33 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
34 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
35 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
36 %out = load <8 x i8>, <8 x i8>* %ptr
40 define void @store8(<8 x i8>* %ptr, <8 x i8> %val) {
41 ; CHECK: 'Cost Model Analysis' for function 'store8':
42 ; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
43 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
44 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
45 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
46 store <8 x i8> %val, <8 x i8>* %ptr
50 define <4 x i8> @load4(<4 x i8>* %ptr) {
51 ; CHECK: 'Cost Model Analysis' for function 'load4':
52 ; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
53 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
54 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
55 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
56 %out = load <4 x i8>, <4 x i8>* %ptr
60 define void @store4(<4 x i8>* %ptr, <4 x i8> %val) {
61 ; CHECK: 'Cost Model Analysis' for function 'store4':
62 ; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
63 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
64 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
65 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
66 store <4 x i8> %val, <4 x i8>* %ptr
70 define <16 x i16> @load_256(<16 x i16>* %ptr) {
71 ; CHECK: 'Cost Model Analysis' for function 'load_256':
72 ; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
73 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
74 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
75 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
76 %out = load <16 x i16>, <16 x i16>* %ptr
80 define <8 x i64> @load_512(<8 x i64>* %ptr) {
81 ; CHECK: 'Cost Model Analysis' for function 'load_512':
82 ; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction:
83 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction:
84 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction:
85 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
86 %out = load <8 x i64>, <8 x i64>* %ptr
90 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>)
91 define <4 x i8> @gather_load_4xi8_constant_mask(<4 x i8*> %ptrs) {
92 ; CHECK: gather_load_4xi8_constant_mask
93 ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
94 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
95 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
96 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
98 %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
102 define <4 x i8> @gather_load_4xi8_variable_mask(<4 x i8*> %ptrs, <4 x i1> %cond) {
103 ; CHECK: gather_load_4xi8_variable_mask
104 ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
105 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
106 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
107 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8
109 %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %ptrs, i32 1, <4 x i1> %cond, <4 x i8> undef)
113 declare void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8>, <4 x i8*>, i32 immarg, <4 x i1>)
114 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x i8*> %ptrs) {
115 ; CHECK: scatter_store_4xi8_constant_mask
116 ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
117 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
118 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
119 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
121 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
125 define void @scatter_store_4xi8_variable_mask(<4 x i8> %val, <4 x i8*> %ptrs, <4 x i1> %cond) {
126 ; CHECK: scatter_store_4xi8_variable_mask
127 ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
128 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
129 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
130 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i8.v4p0i8(
132 call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> %val, <4 x i8*> %ptrs, i32 1, <4 x i1> %cond)
136 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32 immarg, <4 x i1>, <4 x i32>)
137 define <4 x i32> @gather_load_4xi32_constant_mask(<4 x i32*> %ptrs) {
138 ; CHECK: gather_load_4xi32_constant_mask
139 ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
140 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
141 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
142 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
144 %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
148 define <4 x i32> @gather_load_4xi32_variable_mask(<4 x i32*> %ptrs, <4 x i1> %cond) {
149 ; CHECK: gather_load_4xi32_variable_mask
150 ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
151 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
152 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
153 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32
155 %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 1, <4 x i1> %cond, <4 x i32> undef)
159 declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32 immarg, <4 x i1>)
160 define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x i32*> %ptrs) {
161 ; CHECK: scatter_store_4xi32_constant_mask
162 ; CHECK-NEON: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
163 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
164 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
165 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
167 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
171 define void @scatter_store_4xi32_variable_mask(<4 x i32> %val, <4 x i32*> %ptrs, <4 x i1> %cond) {
172 ; CHECK: scatter_store_4xi32_variable_mask
173 ; CHECK-NEON: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
174 ; CHECK-SVE-128: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
175 ; CHECK-SVE-256: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
176 ; CHECK-SVE-512: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(
178 call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %ptrs, i32 1, <4 x i1> %cond)