1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -S | FileCheck %s
8 define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
9 ; CHECK-LABEL: @test_extrq_call(
10 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
11 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
13 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
17 define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
18 ; CHECK-LABEL: @test_extrq_zero_arg0(
19 ; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
21 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
25 define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
26 ; CHECK-LABEL: @test_extrq_zero_arg1(
27 ; CHECK-NEXT: ret <2 x i64> [[X:%.*]]
29 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
33 define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
34 ; CHECK-LABEL: @test_extrq_to_extqi(
35 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 15)
36 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
38 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
42 define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
43 ; CHECK-LABEL: @test_extrq_constant(
44 ; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
46 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
50 define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
51 ; CHECK-LABEL: @test_extrq_constant_undef(
52 ; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
54 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
58 define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
59 ; CHECK-LABEL: @test_extrq_call_constexpr(
60 ; CHECK-NEXT: ret <2 x i64> [[X:%.*]]
62 %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
70 define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
71 ; CHECK-LABEL: @test_extrqi_call(
72 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 23)
73 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
75 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
79 define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
80 ; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
81 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
82 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
83 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
84 ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
86 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
90 define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
91 ; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
92 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
93 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
94 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
95 ; CHECK-NEXT: ret <2 x i64> [[TMP3]]
97 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
101 define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
102 ; CHECK-LABEL: @test_extrqi_undef(
103 ; CHECK-NEXT: ret <2 x i64> undef
105 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
109 define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
110 ; CHECK-LABEL: @test_extrqi_zero(
111 ; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
113 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
117 define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
118 ; CHECK-LABEL: @test_extrqi_constant(
119 ; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
121 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
125 define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
126 ; CHECK-LABEL: @test_extrqi_constant_undef(
127 ; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
129 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
133 define <2 x i64> @test_extrqi_call_constexpr() {
134 ; CHECK-LABEL: @test_extrqi_call_constexpr(
135 ; CHECK-NEXT: ret <2 x i64> zeroinitializer
137 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
145 define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
146 ; CHECK-LABEL: @test_insertq_call(
147 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1
148 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
150 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
154 define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
155 ; CHECK-LABEL: @test_insertq_to_insertqi(
156 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
157 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
159 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
163 define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
164 ; CHECK-LABEL: @test_insertq_constant(
165 ; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
167 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
171 define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
172 ; CHECK-LABEL: @test_insertq_constant_undef(
173 ; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
175 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
179 define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
180 ; CHECK-LABEL: @test_insertq_call_constexpr(
181 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 undef>, i8 2, i8 0)
182 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
184 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
192 define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
193 ; CHECK-LABEL: @test_insertqi_shuffle_04uu(
194 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
195 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
197 %1 = bitcast <16 x i8> %v to <2 x i64>
198 %2 = bitcast <16 x i8> %i to <2 x i64>
199 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
200 %4 = bitcast <2 x i64> %3 to <16 x i8>
204 define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
205 ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
206 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
207 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
209 %1 = bitcast <16 x i8> %v to <2 x i64>
210 %2 = bitcast <16 x i8> %i to <2 x i64>
211 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
212 %4 = bitcast <2 x i64> %3 to <16 x i8>
216 define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
217 ; CHECK-LABEL: @test_insertqi_constant(
218 ; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
220 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
224 define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
225 ; CHECK-LABEL: @test_insertqi_call_constexpr(
226 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 undef>, i8 48, i8 3)
227 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
229 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
233 ; The result of this insert is the second arg, since the top 64 bits of
234 ; the result are undefined, and we copy the bottom 64 bits from the
236 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
237 ; CHECK-LABEL: @testInsert64Bits(
238 ; CHECK-NEXT: ret <2 x i64> [[I:%.*]]
240 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
244 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
245 ; CHECK-LABEL: @testZeroLength(
246 ; CHECK-NEXT: ret <2 x i64> [[I:%.*]]
248 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
252 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
253 ; CHECK-LABEL: @testUndefinedInsertq_1(
254 ; CHECK-NEXT: ret <2 x i64> undef
256 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
260 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
261 ; CHECK-LABEL: @testUndefinedInsertq_2(
262 ; CHECK-NEXT: ret <2 x i64> undef
264 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
268 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
269 ; CHECK-LABEL: @testUndefinedInsertq_3(
270 ; CHECK-NEXT: ret <2 x i64> undef
272 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
277 ; Vector Demanded Bits
280 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
281 ; CHECK-LABEL: @test_extrq_arg0(
282 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
283 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
285 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
286 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
290 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
291 ; CHECK-LABEL: @test_extrq_arg1(
292 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
293 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
295 %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
296 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
300 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
301 ; CHECK-LABEL: @test_extrq_args01(
302 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
303 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
305 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
306 %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
307 %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
311 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
312 ; CHECK-LABEL: @test_extrq_ret(
313 ; CHECK-NEXT: ret <2 x i64> undef
315 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
316 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
320 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
321 ; CHECK-LABEL: @test_extrqi_arg0(
322 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 3, i8 2)
323 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
325 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
326 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
330 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
331 ; CHECK-LABEL: @test_extrqi_ret(
332 ; CHECK-NEXT: ret <2 x i64> undef
334 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
335 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
339 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
340 ; CHECK-LABEL: @test_insertq_arg0(
341 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1
342 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
344 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
345 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
349 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
350 ; CHECK-LABEL: @test_insertq_ret(
351 ; CHECK-NEXT: ret <2 x i64> undef
353 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
354 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
358 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
359 ; CHECK-LABEL: @test_insertqi_arg0(
360 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
361 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
363 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
364 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
368 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
369 ; CHECK-LABEL: @test_insertqi_arg1(
370 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
371 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
373 %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
374 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
378 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
379 ; CHECK-LABEL: @test_insertqi_args01(
380 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
381 ; CHECK-NEXT: ret <2 x i64> [[TMP1]]
383 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
384 %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
385 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
389 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
390 ; CHECK-LABEL: @test_insertqi_ret(
391 ; CHECK-NEXT: ret <2 x i64> undef
393 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
394 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
398 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
399 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
401 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
402 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
404 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
405 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
407 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
408 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind