1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 --data-layout="e" | FileCheck %s --check-prefixes=CHECK
3 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 --data-layout="e" | FileCheck %s --check-prefixes=CHECK
4 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=sse2 --data-layout="E" | FileCheck %s --check-prefixes=CHECK
5 ; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=avx2 --data-layout="E" | FileCheck %s --check-prefixes=CHECK
7 ;-------------------------------------------------------------------------------
8 ; Here we know we can load 128 bits as per dereferenceability and alignment.
10 ; We don't widen scalar loads per-se.
11 define <1 x float> @scalar(<1 x float>* align 16 dereferenceable(16) %p) {
12 ; CHECK-LABEL: @scalar(
13 ; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16
14 ; CHECK-NEXT: ret <1 x float> [[R]]
16 %r = load <1 x float>, <1 x float>* %p, align 16
20 ; We don't widen single-element loads, these get scalarized.
21 define <1 x float> @vec_with_1elt(<1 x float>* align 16 dereferenceable(16) %p) {
22 ; CHECK-LABEL: @vec_with_1elt(
23 ; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 16
24 ; CHECK-NEXT: ret <1 x float> [[R]]
26 %r = load <1 x float>, <1 x float>* %p, align 16
30 define <2 x float> @vec_with_2elts(<2 x float>* align 16 dereferenceable(16) %p) {
31 ; CHECK-LABEL: @vec_with_2elts(
32 ; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 16
33 ; CHECK-NEXT: ret <2 x float> [[R]]
35 %r = load <2 x float>, <2 x float>* %p, align 16
39 define <3 x float> @vec_with_3elts(<3 x float>* align 16 dereferenceable(16) %p) {
40 ; CHECK-LABEL: @vec_with_3elts(
41 ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16
42 ; CHECK-NEXT: ret <3 x float> [[R]]
44 %r = load <3 x float>, <3 x float>* %p, align 16
48 ; Full-vector load. All good already.
49 define <4 x float> @vec_with_4elts(<4 x float>* align 16 dereferenceable(16) %p) {
50 ; CHECK-LABEL: @vec_with_4elts(
51 ; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 16
52 ; CHECK-NEXT: ret <4 x float> [[R]]
54 %r = load <4 x float>, <4 x float>* %p, align 16
58 ; We don't know we can load 256 bits though.
59 define <5 x float> @vec_with_5elts(<5 x float>* align 16 dereferenceable(16) %p) {
60 ; CHECK-LABEL: @vec_with_5elts(
61 ; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 16
62 ; CHECK-NEXT: ret <5 x float> [[R]]
64 %r = load <5 x float>, <5 x float>* %p, align 16
68 ;-------------------------------------------------------------------------------
70 ; We can load 128 bits, and the fact that it's underaligned isn't relevant.
71 define <3 x float> @vec_with_3elts_underaligned(<3 x float>* align 8 dereferenceable(16) %p) {
72 ; CHECK-LABEL: @vec_with_3elts_underaligned(
73 ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8
74 ; CHECK-NEXT: ret <3 x float> [[R]]
76 %r = load <3 x float>, <3 x float>* %p, align 8
80 ; We don't know we can load 128 bits, but since it's aligned, we still can do wide load.
81 ; FIXME: this should still get widened.
82 define <3 x float> @vec_with_3elts_underdereferenceable(<3 x float>* align 16 dereferenceable(12) %p) {
83 ; CHECK-LABEL: @vec_with_3elts_underdereferenceable(
84 ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 16
85 ; CHECK-NEXT: ret <3 x float> [[R]]
87 %r = load <3 x float>, <3 x float>* %p, align 16
91 ; We can't tell if we can load 128 bits.
92 define <3 x float> @vec_with_3elts_underaligned_underdereferenceable(<3 x float>* align 8 dereferenceable(12) %p) {
93 ; CHECK-LABEL: @vec_with_3elts_underaligned_underdereferenceable(
94 ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 8
95 ; CHECK-NEXT: ret <3 x float> [[R]]
97 %r = load <3 x float>, <3 x float>* %p, align 8
101 ;-------------------------------------------------------------------------------
102 ; Here we know we can load 256 bits as per dereferenceability and alignment.
104 define <1 x float> @vec_with_1elt_256bits(<1 x float>* align 32 dereferenceable(32) %p) {
105 ; CHECK-LABEL: @vec_with_1elt_256bits(
106 ; CHECK-NEXT: [[R:%.*]] = load <1 x float>, <1 x float>* [[P:%.*]], align 32
107 ; CHECK-NEXT: ret <1 x float> [[R]]
109 %r = load <1 x float>, <1 x float>* %p, align 32
113 define <2 x float> @vec_with_2elts_256bits(<2 x float>* align 32 dereferenceable(32) %p) {
114 ; CHECK-LABEL: @vec_with_2elts_256bits(
115 ; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 32
116 ; CHECK-NEXT: ret <2 x float> [[R]]
118 %r = load <2 x float>, <2 x float>* %p, align 32
122 define <3 x float> @vec_with_3elts_256bits(<3 x float>* align 32 dereferenceable(32) %p) {
123 ; CHECK-LABEL: @vec_with_3elts_256bits(
124 ; CHECK-NEXT: [[R:%.*]] = load <3 x float>, <3 x float>* [[P:%.*]], align 32
125 ; CHECK-NEXT: ret <3 x float> [[R]]
127 %r = load <3 x float>, <3 x float>* %p, align 32
131 define <4 x float> @vec_with_4elts_256bits(<4 x float>* align 32 dereferenceable(32) %p) {
132 ; CHECK-LABEL: @vec_with_4elts_256bits(
133 ; CHECK-NEXT: [[R:%.*]] = load <4 x float>, <4 x float>* [[P:%.*]], align 32
134 ; CHECK-NEXT: ret <4 x float> [[R]]
136 %r = load <4 x float>, <4 x float>* %p, align 32
140 define <5 x float> @vec_with_5elts_256bits(<5 x float>* align 32 dereferenceable(32) %p) {
141 ; CHECK-LABEL: @vec_with_5elts_256bits(
142 ; CHECK-NEXT: [[R:%.*]] = load <5 x float>, <5 x float>* [[P:%.*]], align 32
143 ; CHECK-NEXT: ret <5 x float> [[R]]
145 %r = load <5 x float>, <5 x float>* %p, align 32
149 define <6 x float> @vec_with_6elts_256bits(<6 x float>* align 32 dereferenceable(32) %p) {
150 ; CHECK-LABEL: @vec_with_6elts_256bits(
151 ; CHECK-NEXT: [[R:%.*]] = load <6 x float>, <6 x float>* [[P:%.*]], align 32
152 ; CHECK-NEXT: ret <6 x float> [[R]]
154 %r = load <6 x float>, <6 x float>* %p, align 32
158 define <7 x float> @vec_with_7elts_256bits(<7 x float>* align 32 dereferenceable(32) %p) {
159 ; CHECK-LABEL: @vec_with_7elts_256bits(
160 ; CHECK-NEXT: [[R:%.*]] = load <7 x float>, <7 x float>* [[P:%.*]], align 32
161 ; CHECK-NEXT: ret <7 x float> [[R]]
163 %r = load <7 x float>, <7 x float>* %p, align 32
167 ; Full-vector load. All good already.
168 define <8 x float> @vec_with_8elts_256bits(<8 x float>* align 32 dereferenceable(32) %p) {
169 ; CHECK-LABEL: @vec_with_8elts_256bits(
170 ; CHECK-NEXT: [[R:%.*]] = load <8 x float>, <8 x float>* [[P:%.*]], align 32
171 ; CHECK-NEXT: ret <8 x float> [[R]]
173 %r = load <8 x float>, <8 x float>* %p, align 32
177 ; We can't tell if we can load more than 256 bits.
178 define <9 x float> @vec_with_9elts_256bits(<9 x float>* align 32 dereferenceable(32) %p) {
179 ; CHECK-LABEL: @vec_with_9elts_256bits(
180 ; CHECK-NEXT: [[R:%.*]] = load <9 x float>, <9 x float>* [[P:%.*]], align 32
181 ; CHECK-NEXT: ret <9 x float> [[R]]
183 %r = load <9 x float>, <9 x float>* %p, align 32
187 ;-------------------------------------------------------------------------------
189 ; Weird types we don't deal with
190 define <2 x i7> @vec_with_two_subbyte_elts(<2 x i7>* align 16 dereferenceable(16) %p) {
191 ; CHECK-LABEL: @vec_with_two_subbyte_elts(
192 ; CHECK-NEXT: [[R:%.*]] = load <2 x i7>, <2 x i7>* [[P:%.*]], align 16
193 ; CHECK-NEXT: ret <2 x i7> [[R]]
195 %r = load <2 x i7>, <2 x i7>* %p, align 16
199 define <2 x i9> @vec_with_two_nonbyte_sized_elts(<2 x i9>* align 16 dereferenceable(16) %p) {
200 ; CHECK-LABEL: @vec_with_two_nonbyte_sized_elts(
201 ; CHECK-NEXT: [[R:%.*]] = load <2 x i9>, <2 x i9>* [[P:%.*]], align 16
202 ; CHECK-NEXT: ret <2 x i9> [[R]]
204 %r = load <2 x i9>, <2 x i9>* %p, align 16
208 define <2 x i24> @vec_with_two_nonpoweroftwo_sized_elts(<2 x i24>* align 16 dereferenceable(16) %p) {
209 ; CHECK-LABEL: @vec_with_two_nonpoweroftwo_sized_elts(
210 ; CHECK-NEXT: [[R:%.*]] = load <2 x i24>, <2 x i24>* [[P:%.*]], align 16
211 ; CHECK-NEXT: ret <2 x i24> [[R]]
213 %r = load <2 x i24>, <2 x i24>* %p, align 16
217 define <2 x float> @vec_with_2elts_addressspace(<2 x float> addrspace(2)* align 16 dereferenceable(16) %p) {
218 ; CHECK-LABEL: @vec_with_2elts_addressspace(
219 ; CHECK-NEXT: [[R:%.*]] = load <2 x float>, <2 x float> addrspace(2)* [[P:%.*]], align 16
220 ; CHECK-NEXT: ret <2 x float> [[R]]
222 %r = load <2 x float>, <2 x float> addrspace(2)* %p, align 16
226 ;-------------------------------------------------------------------------------
228 ; Widening these would change the legalized type, so leave them alone.
230 define <2 x i1> @vec_with_2elts_128bits_i1(<2 x i1>* align 16 dereferenceable(16) %p) {
231 ; CHECK-LABEL: @vec_with_2elts_128bits_i1(
232 ; CHECK-NEXT: [[R:%.*]] = load <2 x i1>, <2 x i1>* [[P:%.*]], align 16
233 ; CHECK-NEXT: ret <2 x i1> [[R]]
235 %r = load <2 x i1>, <2 x i1>* %p, align 16
238 define <2 x i2> @vec_with_2elts_128bits_i2(<2 x i2>* align 16 dereferenceable(16) %p) {
239 ; CHECK-LABEL: @vec_with_2elts_128bits_i2(
240 ; CHECK-NEXT: [[R:%.*]] = load <2 x i2>, <2 x i2>* [[P:%.*]], align 16
241 ; CHECK-NEXT: ret <2 x i2> [[R]]
243 %r = load <2 x i2>, <2 x i2>* %p, align 16
246 define <2 x i4> @vec_with_2elts_128bits_i4(<2 x i4>* align 16 dereferenceable(16) %p) {
247 ; CHECK-LABEL: @vec_with_2elts_128bits_i4(
248 ; CHECK-NEXT: [[R:%.*]] = load <2 x i4>, <2 x i4>* [[P:%.*]], align 16
249 ; CHECK-NEXT: ret <2 x i4> [[R]]
251 %r = load <2 x i4>, <2 x i4>* %p, align 16