1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=SSE1
4 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
5 ; SSE1-LABEL: shuffle_v4f32_0001:
7 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
9 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
10 ret <4 x float> %shuffle
13 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
14 ; SSE1-LABEL: shuffle_v4f32_0020:
16 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
18 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
19 ret <4 x float> %shuffle
22 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
23 ; SSE1-LABEL: shuffle_v4f32_0300:
25 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
27 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
28 ret <4 x float> %shuffle
31 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
32 ; SSE1-LABEL: shuffle_v4f32_1000:
34 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
36 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
37 ret <4 x float> %shuffle
40 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
41 ; SSE1-LABEL: shuffle_v4f32_2200:
43 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
45 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
46 ret <4 x float> %shuffle
49 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
50 ; SSE1-LABEL: shuffle_v4f32_3330:
52 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
54 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
55 ret <4 x float> %shuffle
58 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
59 ; SSE1-LABEL: shuffle_v4f32_3210:
61 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
63 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
64 ret <4 x float> %shuffle
67 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
68 ; SSE1-LABEL: shuffle_v4f32_0011:
70 ; SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
72 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
73 ret <4 x float> %shuffle
76 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
77 ; SSE1-LABEL: shuffle_v4f32_2233:
79 ; SSE1-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
81 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
82 ret <4 x float> %shuffle
85 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
86 ; SSE1-LABEL: shuffle_v4f32_0022:
88 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
90 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
91 ret <4 x float> %shuffle
94 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
95 ; SSE1-LABEL: shuffle_v4f32_1133:
97 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
99 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
100 ret <4 x float> %shuffle
103 define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) {
104 ; SSE1-LABEL: shuffle_v4f32_0145:
106 ; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
108 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
109 ret <4 x float> %shuffle
112 define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) {
113 ; SSE1-LABEL: shuffle_v4f32_0101:
115 ; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
117 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
118 ret <4 x float> %shuffle
121 define <4 x float> @shuffle_v4f32_2323(<4 x float> %a, <4 x float> %b) {
122 ; SSE1-LABEL: shuffle_v4f32_2323:
124 ; SSE1-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
126 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
127 ret <4 x float> %shuffle
130 define <4 x float> @shuffle_v4f32_6723(<4 x float> %a, <4 x float> %b) {
131 ; SSE1-LABEL: shuffle_v4f32_6723:
133 ; SSE1-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
135 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
136 ret <4 x float> %shuffle
139 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
140 ; SSE1-LABEL: shuffle_v4f32_4zzz:
142 ; SSE1-NEXT: xorps %xmm1, %xmm1
143 ; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
144 ; SSE1-NEXT: movaps %xmm1, %xmm0
146 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
147 ret <4 x float> %shuffle
150 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
151 ; SSE1-LABEL: shuffle_v4f32_z4zz:
153 ; SSE1-NEXT: xorps %xmm1, %xmm1
154 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
155 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
157 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
158 ret <4 x float> %shuffle
161 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
162 ; SSE1-LABEL: shuffle_v4f32_zz4z:
164 ; SSE1-NEXT: xorps %xmm1, %xmm1
165 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
166 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
167 ; SSE1-NEXT: movaps %xmm1, %xmm0
169 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
170 ret <4 x float> %shuffle
173 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
174 ; SSE1-LABEL: shuffle_v4f32_zuu4:
176 ; SSE1-NEXT: xorps %xmm1, %xmm1
177 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
178 ; SSE1-NEXT: movaps %xmm1, %xmm0
180 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
181 ret <4 x float> %shuffle
184 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
185 ; SSE1-LABEL: shuffle_v4f32_zzz7:
187 ; SSE1-NEXT: xorps %xmm1, %xmm1
188 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
189 ; SSE1-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
190 ; SSE1-NEXT: movaps %xmm1, %xmm0
192 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
193 ret <4 x float> %shuffle
196 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
197 ; SSE1-LABEL: shuffle_v4f32_z6zz:
199 ; SSE1-NEXT: xorps %xmm1, %xmm1
200 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
201 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
203 %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
204 ret <4 x float> %shuffle
207 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
208 ; SSE1-LABEL: insert_reg_and_zero_v4f32:
210 ; SSE1-NEXT: xorps %xmm1, %xmm1
211 ; SSE1-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
212 ; SSE1-NEXT: movaps %xmm1, %xmm0
214 %v = insertelement <4 x float> undef, float %a, i32 0
215 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
216 ret <4 x float> %shuffle
219 define <4 x float> @insert_mem_and_zero_v4f32(ptr %ptr) {
220 ; SSE1-LABEL: insert_mem_and_zero_v4f32:
222 ; SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
224 %a = load float, ptr %ptr
225 %v = insertelement <4 x float> undef, float %a, i32 0
226 %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
227 ret <4 x float> %shuffle
230 define <4 x float> @insert_mem_lo_v4f32(ptr %ptr, <4 x float> %b) {
231 ; SSE1-LABEL: insert_mem_lo_v4f32:
233 ; SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
235 %a = load <2 x float>, ptr %ptr
236 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
237 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
238 ret <4 x float> %shuffle
241 define <4 x float> @insert_mem_hi_v4f32(ptr %ptr, <4 x float> %b) {
242 ; SSE1-LABEL: insert_mem_hi_v4f32:
244 ; SSE1-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
246 %a = load <2 x float>, ptr %ptr
247 %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
248 %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
249 ret <4 x float> %shuffle
252 define <4 x float> @shuffle_mem_v4f32_3210(ptr %ptr) {
253 ; SSE1-LABEL: shuffle_mem_v4f32_3210:
255 ; SSE1-NEXT: movaps (%rdi), %xmm0
256 ; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
258 %a = load <4 x float>, ptr %ptr
259 %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
260 ret <4 x float> %shuffle
263 define <4 x float> @shuffle_mem_v4f32_0145(<4 x float> %a, ptr %pb) {
264 ; SSE1-LABEL: shuffle_mem_v4f32_0145:
266 ; SSE1-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
268 %b = load <4 x float>, ptr %pb, align 1
269 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
270 ret <4 x float> %shuffle
273 define <4 x float> @shuffle_mem_v4f32_6723(<4 x float> %a, ptr %pb) {
274 ; SSE1-LABEL: shuffle_mem_v4f32_6723:
276 ; SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
278 %b = load <4 x float>, ptr %pb, align 16
279 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
280 ret <4 x float> %shuffle
283 define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, ptr %pb) {
284 ; SSE1-LABEL: shuffle_mem_v4f32_4523:
286 ; SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
288 %b = load <4 x float>, ptr %pb, align 1
289 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
290 ret <4 x float> %shuffle