1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2 ; RUN: opt -S --passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
6 ; Sink the GEP to make use of scalar+vector addressing modes.
7 define <vscale x 4 x float> @gather_offsets_sink_gep(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
8 ; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_gep(
9 ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
11 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
13 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES]]
14 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
15 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
17 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
20 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices
21 br i1 %cond, label %cond.block, label %exit
24 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
28 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
29 ret <vscale x 4 x float> %ret
32 ; Sink sext to make use of scalar+sxtw(vector) addressing modes.
33 define <vscale x 4 x float> @gather_offsets_sink_sext(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
34 ; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext(
35 ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
37 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
39 ; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
40 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
41 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
42 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
44 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
47 %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
48 br i1 %cond, label %cond.block, label %exit
51 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
52 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
56 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
57 ret <vscale x 4 x float> %ret
60 ; As above but ensure both the GEP and sext is sunk.
61 define <vscale x 4 x float> @gather_offsets_sink_sext_get(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
62 ; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext_get(
63 ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
65 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
67 ; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
68 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
69 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
70 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
72 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
75 %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
76 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
77 br i1 %cond, label %cond.block, label %exit
80 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
84 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
85 ret <vscale x 4 x float> %ret
88 ; Don't sink GEPs that cannot benefit from SVE's scalar+vector addressing modes.
89 define <vscale x 4 x float> @gather_no_scalar_base(<vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
90 ; CHECK-LABEL: define <vscale x 4 x float> @gather_no_scalar_base(
91 ; CHECK-SAME: <vscale x 4 x ptr> [[BASES:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
93 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, <vscale x 4 x ptr> [[BASES]], <vscale x 4 x i32> [[INDICES]]
94 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
96 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
97 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
99 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
102 %ptrs = getelementptr float, <vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices
103 br i1 %cond, label %cond.block, label %exit
106 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
110 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
111 ret <vscale x 4 x float> %ret
114 ; Don't sink extends whose result type is already favourable for SVE's sxtw/uxtw addressing modes.
115 ; NOTE: We still want to sink the GEP.
116 define <vscale x 4 x float> @gather_offset_type_too_small(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
117 ; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_small(
118 ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
120 ; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i32>
121 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
123 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES_SEXT]]
124 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
125 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
127 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
130 %indices.sext = sext <vscale x 4 x i8> %indices to <vscale x 4 x i32>
131 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices.sext
132 br i1 %cond, label %cond.block, label %exit
135 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
139 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
140 ret <vscale x 4 x float> %ret
143 ; Don't sink extends that cannot benefit from SVE's sxtw/uxtw addressing modes.
144 ; NOTE: We still want to sink the GEP.
145 define <vscale x 4 x float> @gather_offset_type_too_big(ptr %base, <vscale x 4 x i48> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
146 ; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_big(
147 ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i48> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
149 ; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i48> [[INDICES]] to <vscale x 4 x i64>
150 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
152 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[INDICES_SEXT]]
153 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
154 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
156 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
159 %indices.sext = sext <vscale x 4 x i48> %indices to <vscale x 4 x i64>
160 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
161 br i1 %cond, label %cond.block, label %exit
164 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
168 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
169 ret <vscale x 4 x float> %ret
172 ; Sink zext to make use of scalar+uxtw(vector) addressing modes.
173 ; TODO: There's an argument here to split the extend into i8->i32 and i32->i64,
174 ; which would be especially useful if the i8s are the result of a load because
175 ; it would maintain the use of sign-extending loads.
176 define <vscale x 4 x float> @gather_offset_sink_zext(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
177 ; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_sink_zext(
178 ; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
180 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
182 ; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i64>
183 ; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
184 ; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison)
185 ; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]]
187 ; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer
190 %indices.zext = zext <vscale x 4 x i8> %indices to <vscale x 4 x i64>
191 br i1 %cond, label %cond.block, label %exit
194 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.zext
195 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison)
199 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ]
200 ret <vscale x 4 x float> %ret
203 ; Ensure we support scatters as well as gathers.
204 define void @scatter_offsets_sink_sext_get(<vscale x 4 x float> %data, ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) {
205 ; CHECK-LABEL: define void @scatter_offsets_sink_sext_get(
206 ; CHECK-SAME: <vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) {
208 ; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]]
210 ; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64>
211 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]]
212 ; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[DATA]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]])
213 ; CHECK-NEXT: ret void
215 ; CHECK-NEXT: ret void
218 %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64>
219 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext
220 br i1 %cond, label %cond.block, label %exit
223 tail call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask)
230 declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
231 declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)