1 ; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-unknown | FileCheck %s
2 ; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-unknown -mattr=slow-misaligned-128store | FileCheck %s --check-prefix=SLOW_MISALIGNED_128_STORE
4 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
5 ; CHECK-LABEL: getMemoryOpCost
6 ; SLOW_MISALIGNED_128_STORE-LABEL: getMemoryOpCost
7 define void @getMemoryOpCost() {
8 ; If FeatureSlowMisaligned128Store is set, we penalize 128-bit stores.
9 ; The unlegalized 256-bit stores are further penalized when legalized down
12 ; CHECK: cost of 2 for {{.*}} store <4 x i64>
13 ; SLOW_MISALIGNED_128_STORE: cost of 24 for {{.*}} store <4 x i64>
14 store <4 x i64> undef, <4 x i64> * undef
15 ; CHECK-NEXT: cost of 2 for {{.*}} store <8 x i32>
16 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <8 x i32>
17 store <8 x i32> undef, <8 x i32> * undef
18 ; CHECK-NEXT: cost of 2 for {{.*}} store <16 x i16>
19 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <16 x i16>
20 store <16 x i16> undef, <16 x i16> * undef
21 ; CHECK-NEXT: cost of 2 for {{.*}} store <32 x i8>
22 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <32 x i8>
23 store <32 x i8> undef, <32 x i8> * undef
25 ; CHECK-NEXT: cost of 2 for {{.*}} store <4 x double>
26 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <4 x double>
27 store <4 x double> undef, <4 x double> * undef
28 ; CHECK-NEXT: cost of 2 for {{.*}} store <8 x float>
29 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <8 x float>
30 store <8 x float> undef, <8 x float> * undef
31 ; CHECK-NEXT: cost of 2 for {{.*}} store <16 x half>
32 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 24 for {{.*}} store <16 x half>
33 store <16 x half> undef, <16 x half> * undef
35 ; CHECK-NEXT: cost of 1 for {{.*}} store <2 x i64>
36 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <2 x i64>
37 store <2 x i64> undef, <2 x i64> * undef
38 ; CHECK-NEXT: cost of 1 for {{.*}} store <4 x i32>
39 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <4 x i32>
40 store <4 x i32> undef, <4 x i32> * undef
41 ; CHECK-NEXT: cost of 1 for {{.*}} store <8 x i16>
42 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <8 x i16>
43 store <8 x i16> undef, <8 x i16> * undef
44 ; CHECK-NEXT: cost of 1 for {{.*}} store <16 x i8>
45 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <16 x i8>
46 store <16 x i8> undef, <16 x i8> * undef
48 ; CHECK-NEXT: cost of 1 for {{.*}} store <2 x double>
49 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <2 x double>
50 store <2 x double> undef, <2 x double> * undef
51 ; CHECK-NEXT: cost of 1 for {{.*}} store <4 x float>
52 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <4 x float>
53 store <4 x float> undef, <4 x float> * undef
54 ; CHECK-NEXT: cost of 1 for {{.*}} store <8 x half>
55 ; SLOW_MISALIGNED_128_STORE-NEXT: cost of 12 for {{.*}} store <8 x half>
56 store <8 x half> undef, <8 x half> * undef
58 ; We scalarize the loads/stores because there is no vector register name for
59 ; these types (they get extended to v.4h/v.2s).
60 ; CHECK: cost of 16 {{.*}} store
61 store <2 x i8> undef, <2 x i8> * undef
62 ; CHECK: cost of 1 {{.*}} store
63 store <4 x i8> undef, <4 x i8> * undef
64 ; CHECK: cost of 16 {{.*}} load
65 load <2 x i8> , <2 x i8> * undef
66 ; CHECK: cost of 64 {{.*}} load
67 load <4 x i8> , <4 x i8> * undef