1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | not grep ptrue
3 ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
4 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
5 ; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
6 ; RUN: llc -aarch64-sve-vector-bits-min=640 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
7 ; RUN: llc -aarch64-sve-vector-bits-min=768 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
8 ; RUN: llc -aarch64-sve-vector-bits-min=896 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
9 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
10 ; RUN: llc -aarch64-sve-vector-bits-min=1152 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
11 ; RUN: llc -aarch64-sve-vector-bits-min=1280 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
12 ; RUN: llc -aarch64-sve-vector-bits-min=1408 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
13 ; RUN: llc -aarch64-sve-vector-bits-min=1536 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
14 ; RUN: llc -aarch64-sve-vector-bits-min=1664 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
15 ; RUN: llc -aarch64-sve-vector-bits-min=1792 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
16 ; RUN: llc -aarch64-sve-vector-bits-min=1920 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
17 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
19 target triple = "aarch64-unknown-linux-gnu"
21 ; Don't use SVE for 64-bit vectors.
22 define <2 x float> @load_v2f32(ptr %a) #0 {
23 ; CHECK-LABEL: load_v2f32:
25 ; CHECK-NEXT: ldr d0, [x0]
27 %load = load <2 x float>, ptr %a
31 ; Don't use SVE for 128-bit vectors.
32 define <4 x float> @load_v4f32(ptr %a) #0 {
33 ; CHECK-LABEL: load_v4f32:
35 ; CHECK-NEXT: ldr q0, [x0]
37 %load = load <4 x float>, ptr %a
41 define <8 x float> @load_v8f32(ptr %a) #0 {
42 ; CHECK-LABEL: load_v8f32:
44 ; CHECK-NEXT: ptrue p0.s, vl8
45 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
46 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
48 %load = load <8 x float>, ptr %a
52 define <16 x float> @load_v16f32(ptr %a) #0 {
53 ; VBITS_GE_256-LABEL: load_v16f32:
54 ; VBITS_GE_256: // %bb.0:
55 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
56 ; VBITS_GE_256-NEXT: mov x9, #8 // =0x8
57 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
58 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0]
59 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
60 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8]
61 ; VBITS_GE_256-NEXT: ret
63 ; VBITS_GE_512-LABEL: load_v16f32:
64 ; VBITS_GE_512: // %bb.0:
65 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
66 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
67 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8]
68 ; VBITS_GE_512-NEXT: ret
70 ; VBITS_GE_1024-LABEL: load_v16f32:
71 ; VBITS_GE_1024: // %bb.0:
72 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl16
73 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
74 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
75 ; VBITS_GE_1024-NEXT: ret
77 ; VBITS_GE_2048-LABEL: load_v16f32:
78 ; VBITS_GE_2048: // %bb.0:
79 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl16
80 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
81 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
82 ; VBITS_GE_2048-NEXT: ret
83 %load = load <16 x float>, ptr %a
84 ret <16 x float> %load
87 define <32 x float> @load_v32f32(ptr %a) #0 {
88 ; VBITS_GE_256-LABEL: load_v32f32:
89 ; VBITS_GE_256: // %bb.0:
90 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
91 ; VBITS_GE_256-NEXT: mov x9, #24 // =0x18
92 ; VBITS_GE_256-NEXT: mov x10, #16 // =0x10
93 ; VBITS_GE_256-NEXT: mov x11, #8 // =0x8
94 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
95 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
96 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
97 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0]
98 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
99 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2]
100 ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2]
101 ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8]
102 ; VBITS_GE_256-NEXT: ret
104 ; VBITS_GE_512-LABEL: load_v32f32:
105 ; VBITS_GE_512: // %bb.0:
106 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
107 ; VBITS_GE_512-NEXT: mov x9, #16 // =0x10
108 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
109 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0]
110 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
111 ; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8]
112 ; VBITS_GE_512-NEXT: ret
114 ; VBITS_GE_1024-LABEL: load_v32f32:
115 ; VBITS_GE_1024: // %bb.0:
116 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
117 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0]
118 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8]
119 ; VBITS_GE_1024-NEXT: ret
121 ; VBITS_GE_2048-LABEL: load_v32f32:
122 ; VBITS_GE_2048: // %bb.0:
123 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32
124 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
125 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
126 ; VBITS_GE_2048-NEXT: ret
127 %load = load <32 x float>, ptr %a
128 ret <32 x float> %load
131 define <64 x float> @load_v64f32(ptr %a) #0 {
132 ; VBITS_GE_256-LABEL: load_v64f32:
133 ; VBITS_GE_256: // %bb.0:
134 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8
135 ; VBITS_GE_256-NEXT: mov x9, #8 // =0x8
136 ; VBITS_GE_256-NEXT: mov x10, #16 // =0x10
137 ; VBITS_GE_256-NEXT: mov x11, #24 // =0x18
138 ; VBITS_GE_256-NEXT: mov x12, #56 // =0x38
139 ; VBITS_GE_256-NEXT: mov x13, #32 // =0x20
140 ; VBITS_GE_256-NEXT: mov x14, #48 // =0x30
141 ; VBITS_GE_256-NEXT: mov x15, #40 // =0x28
142 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x12, lsl #2]
143 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2]
144 ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x14, lsl #2]
145 ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2]
146 ; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x11, lsl #2]
147 ; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x15, lsl #2]
148 ; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x13, lsl #2]
149 ; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0]
150 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x12, lsl #2]
151 ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x14, lsl #2]
152 ; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x15, lsl #2]
153 ; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x13, lsl #2]
154 ; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x11, lsl #2]
155 ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x10, lsl #2]
156 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2]
157 ; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x8]
158 ; VBITS_GE_256-NEXT: ret
160 ; VBITS_GE_512-LABEL: load_v64f32:
161 ; VBITS_GE_512: // %bb.0:
162 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16
163 ; VBITS_GE_512-NEXT: mov x9, #48 // =0x30
164 ; VBITS_GE_512-NEXT: mov x10, #32 // =0x20
165 ; VBITS_GE_512-NEXT: mov x11, #16 // =0x10
166 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
167 ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2]
168 ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2]
169 ; VBITS_GE_512-NEXT: ld1w { z3.s }, p0/z, [x0]
170 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
171 ; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2]
172 ; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2]
173 ; VBITS_GE_512-NEXT: st1w { z3.s }, p0, [x8]
174 ; VBITS_GE_512-NEXT: ret
176 ; VBITS_GE_1024-LABEL: load_v64f32:
177 ; VBITS_GE_1024: // %bb.0:
178 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
179 ; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20
180 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
181 ; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0]
182 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
183 ; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x8]
184 ; VBITS_GE_1024-NEXT: ret
186 ; VBITS_GE_2048-LABEL: load_v64f32:
187 ; VBITS_GE_2048: // %bb.0:
188 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
189 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0]
190 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
191 ; VBITS_GE_2048-NEXT: ret
192 %load = load <64 x float>, ptr %a
193 ret <64 x float> %load
196 attributes #0 = { "target-features"="+sve" }