1 ; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s
3 ; Test that the logic to choose between v128.const vector
4 ; initialization and splat vector initialization and to optimize the
5 ; choice of splat value works correctly.
7 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
8 target triple = "wasm32-unknown-unknown"
10 ; CHECK-LABEL: same_const_one_replaced_i16x8:
11 ; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128)
12 ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
13 ; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
14 ; CHECK-NEXT: return $pop[[L1]]
15 define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
17 <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
23 ; CHECK-LABEL: different_const_one_replaced_i16x8:
24 ; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128)
25 ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
26 ; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
27 ; CHECK-NEXT: return $pop[[L1]]
28 define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
30 <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
36 ; CHECK-LABEL: same_const_one_replaced_f32x4:
37 ; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128)
38 ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
39 ; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
40 ; CHECK-NEXT: return $pop[[L1]]
41 define <4 x float> @same_const_one_replaced_f32x4(float %x) {
43 <4 x float> <float 42., float 42., float 42., float 42.>,
49 ; CHECK-LABEL: different_const_one_replaced_f32x4:
50 ; CHECK-NEXT: .functype different_const_one_replaced_f32x4 (f32) -> (v128)
51 ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
52 ; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
53 ; CHECK-NEXT: return $pop[[L1]]
54 define <4 x float> @different_const_one_replaced_f32x4(float %x) {
56 <4 x float> <float 1., float 2., float 3., float 4.>,
62 ; CHECK-LABEL: splat_common_const_i32x4:
63 ; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128)
64 ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1
65 ; CHECK-NEXT: return $pop[[L0]]
66 define <4 x i32> @splat_common_const_i32x4() {
67 ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
70 ; CHECK-LABEL: splat_common_arg_i16x8:
71 ; CHECK-NEXT: .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
72 ; CHECK-NEXT: i16x8.splat $push[[L0:[0-9]+]]=, $2
73 ; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1
74 ; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
75 ; CHECK-NEXT: i16x8.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1
76 ; CHECK-NEXT: i16x8.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1
77 ; CHECK-NEXT: return $pop[[L4]]
78 define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
79 %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
80 %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
81 %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
82 %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
83 %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
84 %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
85 %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
86 %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
90 ; CHECK-LABEL: swizzle_one_i8x16:
91 ; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128)
92 ; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
93 ; CHECK-NEXT: return $pop[[L0]]
94 define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
95 %m0 = extractelement <16 x i8> %mask, i32 0
96 %s0 = extractelement <16 x i8> %src, i8 %m0
97 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
101 ; CHECK-LABEL: swizzle_all_i8x16:
102 ; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
103 ; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
104 ; CHECK-NEXT: return $pop[[L0]]
105 define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
106 %m0 = extractelement <16 x i8> %mask, i32 0
107 %s0 = extractelement <16 x i8> %src, i8 %m0
108 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
109 %m1 = extractelement <16 x i8> %mask, i32 1
110 %s1 = extractelement <16 x i8> %src, i8 %m1
111 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
112 %m2 = extractelement <16 x i8> %mask, i32 2
113 %s2 = extractelement <16 x i8> %src, i8 %m2
114 %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
115 %m3 = extractelement <16 x i8> %mask, i32 3
116 %s3 = extractelement <16 x i8> %src, i8 %m3
117 %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
118 %m4 = extractelement <16 x i8> %mask, i32 4
119 %s4 = extractelement <16 x i8> %src, i8 %m4
120 %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
121 %m5 = extractelement <16 x i8> %mask, i32 5
122 %s5 = extractelement <16 x i8> %src, i8 %m5
123 %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
124 %m6 = extractelement <16 x i8> %mask, i32 6
125 %s6 = extractelement <16 x i8> %src, i8 %m6
126 %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
127 %m7 = extractelement <16 x i8> %mask, i32 7
128 %s7 = extractelement <16 x i8> %src, i8 %m7
129 %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
130 %m8 = extractelement <16 x i8> %mask, i32 8
131 %s8 = extractelement <16 x i8> %src, i8 %m8
132 %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
133 %m9 = extractelement <16 x i8> %mask, i32 9
134 %s9 = extractelement <16 x i8> %src, i8 %m9
135 %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
136 %m10 = extractelement <16 x i8> %mask, i32 10
137 %s10 = extractelement <16 x i8> %src, i8 %m10
138 %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
139 %m11 = extractelement <16 x i8> %mask, i32 11
140 %s11 = extractelement <16 x i8> %src, i8 %m11
141 %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
142 %m12 = extractelement <16 x i8> %mask, i32 12
143 %s12 = extractelement <16 x i8> %src, i8 %m12
144 %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
145 %m13 = extractelement <16 x i8> %mask, i32 13
146 %s13 = extractelement <16 x i8> %src, i8 %m13
147 %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
148 %m14 = extractelement <16 x i8> %mask, i32 14
149 %s14 = extractelement <16 x i8> %src, i8 %m14
150 %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
151 %m15 = extractelement <16 x i8> %mask, i32 15
152 %s15 = extractelement <16 x i8> %src, i8 %m15
153 %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
157 ; CHECK-LABEL: swizzle_one_i16x8:
158 ; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128)
161 define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
162 %m0 = extractelement <8 x i16> %mask, i32 0
163 %s0 = extractelement <8 x i16> %src, i16 %m0
164 %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
168 ; CHECK-LABEL: mashup_swizzle_i8x16:
169 ; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
170 ; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
171 ; CHECK: i8x16.replace_lane
172 ; CHECK: i8x16.replace_lane
173 ; CHECK: i8x16.replace_lane
174 ; CHECK: i8x16.replace_lane
176 define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
178 %m0 = extractelement <16 x i8> %mask, i32 0
179 %s0 = extractelement <16 x i8> %src, i8 %m0
180 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
182 %m1 = extractelement <16 x i8> %mask, i32 7
183 %s1 = extractelement <16 x i8> %src, i8 %m1
184 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
186 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
188 %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
190 %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
192 %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
196 ; CHECK-LABEL: mashup_const_i8x16:
197 ; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128)
198 ; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
199 ; CHECK: i8x16.replace_lane
200 ; CHECK: i8x16.replace_lane
201 ; CHECK: i8x16.replace_lane
203 define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
205 %m0 = extractelement <16 x i8> %mask, i32 0
206 %s0 = extractelement <16 x i8> %src, i8 %m0
207 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
209 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
211 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
213 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
215 %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
219 ; CHECK-LABEL: mashup_splat_i8x16:
220 ; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128)
221 ; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2
222 ; CHECK: i8x16.replace_lane
223 ; CHECK: i8x16.replace_lane
225 define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
227 %m0 = extractelement <16 x i8> %mask, i32 0
228 %s0 = extractelement <16 x i8> %src, i8 %m0
229 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
231 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
233 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
235 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
239 ; CHECK-LABEL: undef_const_insert_f32x4:
240 ; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128)
241 ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
242 ; CHECK-NEXT: return $pop[[L0]]
243 define <4 x float> @undef_const_insert_f32x4() {
244 %v = insertelement <4 x float> undef, float 42., i32 1
248 ; CHECK-LABEL: undef_arg_insert_i32x4:
249 ; CHECK-NEXT: .functype undef_arg_insert_i32x4 (i32) -> (v128)
250 ; CHECK-NEXT: i32x4.splat $push[[L0:[0-9]+]]=, $0
251 ; CHECK-NEXT: return $pop[[L0]]
252 define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
253 %v = insertelement <4 x i32> undef, i32 %x, i32 3
257 ; CHECK-LABEL: all_undef_i8x16:
258 ; CHECK-NEXT: .functype all_undef_i8x16 () -> (v128)
259 ; CHECK-NEXT: return $0
260 define <16 x i8> @all_undef_i8x16() {
261 %v = insertelement <16 x i8> undef, i8 undef, i32 4
265 ; CHECK-LABEL: all_undef_f64x2:
266 ; CHECK-NEXT: .functype all_undef_f64x2 () -> (v128)
267 ; CHECK-NEXT: return $0
268 define <2 x double> @all_undef_f64x2() {
269 ret <2 x double> undef