1 ;; GCC machine description for SSE instructions
3 ;; Free Software Foundation, Inc.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 2, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING. If not, write to
19 ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 ;; Boston, MA 02110-1301, USA.
23 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
24 ;; special-cased for TARGET_64BIT.
25 (define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
27 ;; All 16-byte vector modes handled by SSE
28 (define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
31 (define_mode_macro SSEMODE12 [V16QI V8HI])
32 (define_mode_macro SSEMODE24 [V8HI V4SI])
33 (define_mode_macro SSEMODE14 [V16QI V4SI])
34 (define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35 (define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
37 ;; Mapping from integer vector mode to mnemonic suffix
38 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
40 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
42 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
46 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
48 ;; All of these patterns are enabled for SSE1 as well as SSE2.
49 ;; This is essential for maintaining stable calling conventions.
51 (define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
56 ix86_expand_vector_move (<MODE>mode, operands);
60 (define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
65 switch (which_alternative)
68 if (get_attr_mode (insn) == MODE_V4SF)
69 return "xorps\t%0, %0";
71 return "pxor\t%0, %0";
74 if (get_attr_mode (insn) == MODE_V4SF)
75 return "movaps\t{%1, %0|%0, %1}";
77 return "movdqa\t{%1, %0|%0, %1}";
82 [(set_attr "type" "sselog1,ssemov,ssemov")
84 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
87 (eq_attr "alternative" "0,1")
89 (ne (symbol_ref "optimize_size")
93 (eq_attr "alternative" "2")
95 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
97 (ne (symbol_ref "optimize_size")
100 (const_string "TI"))]
101 (const_string "TI")))])
103 (define_expand "movv4sf"
104 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
105 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
108 ix86_expand_vector_move (V4SFmode, operands);
112 (define_insn "*movv4sf_internal"
113 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
114 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
118 movaps\t{%1, %0|%0, %1}
119 movaps\t{%1, %0|%0, %1}"
120 [(set_attr "type" "sselog1,ssemov,ssemov")
121 (set_attr "mode" "V4SF")])
124 [(set (match_operand:V4SF 0 "register_operand" "")
125 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
126 "TARGET_SSE && reload_completed"
129 (vec_duplicate:V4SF (match_dup 1))
133 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
134 operands[2] = CONST0_RTX (V4SFmode);
137 (define_expand "movv2df"
138 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
139 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
142 ix86_expand_vector_move (V2DFmode, operands);
146 (define_insn "*movv2df_internal"
147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
148 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
149 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
151 switch (which_alternative)
154 if (get_attr_mode (insn) == MODE_V4SF)
155 return "xorps\t%0, %0";
157 return "xorpd\t%0, %0";
160 if (get_attr_mode (insn) == MODE_V4SF)
161 return "movaps\t{%1, %0|%0, %1}";
163 return "movapd\t{%1, %0|%0, %1}";
168 [(set_attr "type" "sselog1,ssemov,ssemov")
170 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
171 (const_string "V4SF")
172 (eq_attr "alternative" "0,1")
174 (ne (symbol_ref "optimize_size")
176 (const_string "V4SF")
177 (const_string "V2DF"))
178 (eq_attr "alternative" "2")
180 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
182 (ne (symbol_ref "optimize_size")
184 (const_string "V4SF")
185 (const_string "V2DF"))]
186 (const_string "V2DF")))])
189 [(set (match_operand:V2DF 0 "register_operand" "")
190 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
191 "TARGET_SSE2 && reload_completed"
192 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
194 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
195 operands[2] = CONST0_RTX (DFmode);
198 (define_expand "push<mode>1"
199 [(match_operand:SSEMODE 0 "register_operand" "")]
202 ix86_expand_push (<MODE>mode, operands[0]);
206 (define_expand "movmisalign<mode>"
207 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
208 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
211 ix86_expand_vector_move_misalign (<MODE>mode, operands);
215 (define_insn "sse_movups"
216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
217 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
219 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
220 "movups\t{%1, %0|%0, %1}"
221 [(set_attr "type" "ssemov")
222 (set_attr "mode" "V2DF")])
224 (define_insn "sse2_movupd"
225 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
226 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
229 "movupd\t{%1, %0|%0, %1}"
230 [(set_attr "type" "ssemov")
231 (set_attr "mode" "V2DF")])
233 (define_insn "sse2_movdqu"
234 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
235 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
237 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
238 "movdqu\t{%1, %0|%0, %1}"
239 [(set_attr "type" "ssemov")
240 (set_attr "mode" "TI")])
242 (define_insn "sse_movntv4sf"
243 [(set (match_operand:V4SF 0 "memory_operand" "=m")
244 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
247 "movntps\t{%1, %0|%0, %1}"
248 [(set_attr "type" "ssemov")
249 (set_attr "mode" "V4SF")])
251 (define_insn "sse2_movntv2df"
252 [(set (match_operand:V2DF 0 "memory_operand" "=m")
253 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
256 "movntpd\t{%1, %0|%0, %1}"
257 [(set_attr "type" "ssecvt")
258 (set_attr "mode" "V2DF")])
260 (define_insn "sse2_movntv2di"
261 [(set (match_operand:V2DI 0 "memory_operand" "=m")
262 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
265 "movntdq\t{%1, %0|%0, %1}"
266 [(set_attr "type" "ssecvt")
267 (set_attr "mode" "TI")])
269 (define_insn "sse2_movntsi"
270 [(set (match_operand:SI 0 "memory_operand" "=m")
271 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
274 "movnti\t{%1, %0|%0, %1}"
275 [(set_attr "type" "ssecvt")
276 (set_attr "mode" "V2DF")])
278 (define_insn "sse3_lddqu"
279 [(set (match_operand:V16QI 0 "register_operand" "=x")
280 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
283 "lddqu\t{%1, %0|%0, %1}"
284 [(set_attr "type" "ssecvt")
285 (set_attr "mode" "TI")])
287 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
289 ;; Parallel single-precision floating point arithmetic
291 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 (define_expand "negv4sf2"
294 [(set (match_operand:V4SF 0 "register_operand" "")
295 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
297 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
299 (define_expand "absv4sf2"
300 [(set (match_operand:V4SF 0 "register_operand" "")
301 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
303 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
305 (define_expand "addv4sf3"
306 [(set (match_operand:V4SF 0 "register_operand" "")
307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
308 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
310 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
312 (define_insn "*addv4sf3"
313 [(set (match_operand:V4SF 0 "register_operand" "=x")
314 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
315 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
316 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
317 "addps\t{%2, %0|%0, %2}"
318 [(set_attr "type" "sseadd")
319 (set_attr "mode" "V4SF")])
321 (define_insn "sse_vmaddv4sf3"
322 [(set (match_operand:V4SF 0 "register_operand" "=x")
324 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
325 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
328 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
329 "addss\t{%2, %0|%0, %2}"
330 [(set_attr "type" "sseadd")
331 (set_attr "mode" "SF")])
333 (define_expand "subv4sf3"
334 [(set (match_operand:V4SF 0 "register_operand" "")
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
336 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
338 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
340 (define_insn "*subv4sf3"
341 [(set (match_operand:V4SF 0 "register_operand" "=x")
342 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
343 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
345 "subps\t{%2, %0|%0, %2}"
346 [(set_attr "type" "sseadd")
347 (set_attr "mode" "V4SF")])
349 (define_insn "sse_vmsubv4sf3"
350 [(set (match_operand:V4SF 0 "register_operand" "=x")
352 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
353 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
357 "subss\t{%2, %0|%0, %2}"
358 [(set_attr "type" "sseadd")
359 (set_attr "mode" "SF")])
361 (define_expand "mulv4sf3"
362 [(set (match_operand:V4SF 0 "register_operand" "")
363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
364 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
366 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
368 (define_insn "*mulv4sf3"
369 [(set (match_operand:V4SF 0 "register_operand" "=x")
370 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
371 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
372 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
373 "mulps\t{%2, %0|%0, %2}"
374 [(set_attr "type" "ssemul")
375 (set_attr "mode" "V4SF")])
377 (define_insn "sse_vmmulv4sf3"
378 [(set (match_operand:V4SF 0 "register_operand" "=x")
380 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
381 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
384 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
385 "mulss\t{%2, %0|%0, %2}"
386 [(set_attr "type" "ssemul")
387 (set_attr "mode" "SF")])
389 (define_expand "divv4sf3"
390 [(set (match_operand:V4SF 0 "register_operand" "")
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
392 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
394 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
396 (define_insn "*divv4sf3"
397 [(set (match_operand:V4SF 0 "register_operand" "=x")
398 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
399 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
401 "divps\t{%2, %0|%0, %2}"
402 [(set_attr "type" "ssediv")
403 (set_attr "mode" "V4SF")])
405 (define_insn "sse_vmdivv4sf3"
406 [(set (match_operand:V4SF 0 "register_operand" "=x")
408 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
409 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
413 "divss\t{%2, %0|%0, %2}"
414 [(set_attr "type" "ssediv")
415 (set_attr "mode" "SF")])
417 (define_insn "sse_rcpv4sf2"
418 [(set (match_operand:V4SF 0 "register_operand" "=x")
420 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
422 "rcpps\t{%1, %0|%0, %1}"
423 [(set_attr "type" "sse")
424 (set_attr "mode" "V4SF")])
426 (define_insn "sse_vmrcpv4sf2"
427 [(set (match_operand:V4SF 0 "register_operand" "=x")
429 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
431 (match_operand:V4SF 2 "register_operand" "0")
434 "rcpss\t{%1, %0|%0, %1}"
435 [(set_attr "type" "sse")
436 (set_attr "mode" "SF")])
438 (define_insn "sse_rsqrtv4sf2"
439 [(set (match_operand:V4SF 0 "register_operand" "=x")
441 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
443 "rsqrtps\t{%1, %0|%0, %1}"
444 [(set_attr "type" "sse")
445 (set_attr "mode" "V4SF")])
447 (define_insn "sse_vmrsqrtv4sf2"
448 [(set (match_operand:V4SF 0 "register_operand" "=x")
450 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
452 (match_operand:V4SF 2 "register_operand" "0")
455 "rsqrtss\t{%1, %0|%0, %1}"
456 [(set_attr "type" "sse")
457 (set_attr "mode" "SF")])
459 (define_insn "sqrtv4sf2"
460 [(set (match_operand:V4SF 0 "register_operand" "=x")
461 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
463 "sqrtps\t{%1, %0|%0, %1}"
464 [(set_attr "type" "sse")
465 (set_attr "mode" "V4SF")])
467 (define_insn "sse_vmsqrtv4sf2"
468 [(set (match_operand:V4SF 0 "register_operand" "=x")
470 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
471 (match_operand:V4SF 2 "register_operand" "0")
474 "sqrtss\t{%1, %0|%0, %1}"
475 [(set_attr "type" "sse")
476 (set_attr "mode" "SF")])
478 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
479 ;; isn't really correct, as those rtl operators aren't defined when
480 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
482 (define_expand "smaxv4sf3"
483 [(set (match_operand:V4SF 0 "register_operand" "")
484 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
485 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
488 if (!flag_finite_math_only)
489 operands[1] = force_reg (V4SFmode, operands[1]);
490 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
493 (define_insn "*smaxv4sf3_finite"
494 [(set (match_operand:V4SF 0 "register_operand" "=x")
495 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
496 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
497 "TARGET_SSE && flag_finite_math_only
498 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
499 "maxps\t{%2, %0|%0, %2}"
500 [(set_attr "type" "sse")
501 (set_attr "mode" "V4SF")])
503 (define_insn "*smaxv4sf3"
504 [(set (match_operand:V4SF 0 "register_operand" "=x")
505 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
506 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
508 "maxps\t{%2, %0|%0, %2}"
509 [(set_attr "type" "sse")
510 (set_attr "mode" "V4SF")])
512 (define_insn "*sse_vmsmaxv4sf3_finite"
513 [(set (match_operand:V4SF 0 "register_operand" "=x")
515 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
516 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
519 "TARGET_SSE && flag_finite_math_only
520 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
521 "maxss\t{%2, %0|%0, %2}"
522 [(set_attr "type" "sse")
523 (set_attr "mode" "SF")])
525 (define_insn "sse_vmsmaxv4sf3"
526 [(set (match_operand:V4SF 0 "register_operand" "=x")
528 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
529 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
533 "maxss\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "SF")])
537 (define_expand "sminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "")
539 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
540 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
543 if (!flag_finite_math_only)
544 operands[1] = force_reg (V4SFmode, operands[1]);
545 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
548 (define_insn "*sminv4sf3_finite"
549 [(set (match_operand:V4SF 0 "register_operand" "=x")
550 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
551 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
552 "TARGET_SSE && flag_finite_math_only
553 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
554 "minps\t{%2, %0|%0, %2}"
555 [(set_attr "type" "sse")
556 (set_attr "mode" "V4SF")])
558 (define_insn "*sminv4sf3"
559 [(set (match_operand:V4SF 0 "register_operand" "=x")
560 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
561 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
563 "minps\t{%2, %0|%0, %2}"
564 [(set_attr "type" "sse")
565 (set_attr "mode" "V4SF")])
567 (define_insn "*sse_vmsminv4sf3_finite"
568 [(set (match_operand:V4SF 0 "register_operand" "=x")
570 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
571 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
574 "TARGET_SSE && flag_finite_math_only
575 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
576 "minss\t{%2, %0|%0, %2}"
577 [(set_attr "type" "sse")
578 (set_attr "mode" "SF")])
580 (define_insn "sse_vmsminv4sf3"
581 [(set (match_operand:V4SF 0 "register_operand" "=x")
583 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
584 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
588 "minss\t{%2, %0|%0, %2}"
589 [(set_attr "type" "sse")
590 (set_attr "mode" "SF")])
592 ;; These versions of the min/max patterns implement exactly the operations
593 ;; min = (op1 < op2 ? op1 : op2)
594 ;; max = (!(op1 < op2) ? op1 : op2)
595 ;; Their operands are not commutative, and thus they may be used in the
596 ;; presence of -0.0 and NaN.
598 (define_insn "*ieee_sminv4sf3"
599 [(set (match_operand:V4SF 0 "register_operand" "=x")
600 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
601 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
604 "minps\t{%2, %0|%0, %2}"
605 [(set_attr "type" "sseadd")
606 (set_attr "mode" "V4SF")])
608 (define_insn "*ieee_smaxv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
611 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
614 "maxps\t{%2, %0|%0, %2}"
615 [(set_attr "type" "sseadd")
616 (set_attr "mode" "V4SF")])
618 (define_insn "*ieee_sminv2df3"
619 [(set (match_operand:V2DF 0 "register_operand" "=x")
620 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
621 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
624 "minpd\t{%2, %0|%0, %2}"
625 [(set_attr "type" "sseadd")
626 (set_attr "mode" "V2DF")])
628 (define_insn "*ieee_smaxv2df3"
629 [(set (match_operand:V2DF 0 "register_operand" "=x")
630 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
631 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
634 "maxpd\t{%2, %0|%0, %2}"
635 [(set_attr "type" "sseadd")
636 (set_attr "mode" "V2DF")])
638 (define_insn "sse3_addsubv4sf3"
639 [(set (match_operand:V4SF 0 "register_operand" "=x")
642 (match_operand:V4SF 1 "register_operand" "0")
643 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
644 (minus:V4SF (match_dup 1) (match_dup 2))
647 "addsubps\t{%2, %0|%0, %2}"
648 [(set_attr "type" "sseadd")
649 (set_attr "mode" "V4SF")])
651 (define_insn "sse3_haddv4sf3"
652 [(set (match_operand:V4SF 0 "register_operand" "=x")
657 (match_operand:V4SF 1 "register_operand" "0")
658 (parallel [(const_int 0)]))
659 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
661 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
662 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
666 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
667 (parallel [(const_int 0)]))
668 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
670 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
671 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
673 "haddps\t{%2, %0|%0, %2}"
674 [(set_attr "type" "sseadd")
675 (set_attr "mode" "V4SF")])
677 (define_insn "sse3_hsubv4sf3"
678 [(set (match_operand:V4SF 0 "register_operand" "=x")
683 (match_operand:V4SF 1 "register_operand" "0")
684 (parallel [(const_int 0)]))
685 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
687 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
688 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
692 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
693 (parallel [(const_int 0)]))
694 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
696 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
697 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
699 "hsubps\t{%2, %0|%0, %2}"
700 [(set_attr "type" "sseadd")
701 (set_attr "mode" "V4SF")])
703 (define_expand "reduc_splus_v4sf"
704 [(match_operand:V4SF 0 "register_operand" "")
705 (match_operand:V4SF 1 "register_operand" "")]
710 rtx tmp = gen_reg_rtx (V4SFmode);
711 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
712 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
715 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
719 (define_expand "reduc_smax_v4sf"
720 [(match_operand:V4SF 0 "register_operand" "")
721 (match_operand:V4SF 1 "register_operand" "")]
724 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
728 (define_expand "reduc_smin_v4sf"
729 [(match_operand:V4SF 0 "register_operand" "")
730 (match_operand:V4SF 1 "register_operand" "")]
733 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
737 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
739 ;; Parallel single-precision floating point comparisons
741 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
743 (define_insn "sse_maskcmpv4sf3"
744 [(set (match_operand:V4SF 0 "register_operand" "=x")
745 (match_operator:V4SF 3 "sse_comparison_operator"
746 [(match_operand:V4SF 1 "register_operand" "0")
747 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
749 "cmp%D3ps\t{%2, %0|%0, %2}"
750 [(set_attr "type" "ssecmp")
751 (set_attr "mode" "V4SF")])
753 (define_insn "sse_vmmaskcmpv4sf3"
754 [(set (match_operand:V4SF 0 "register_operand" "=x")
756 (match_operator:V4SF 3 "sse_comparison_operator"
757 [(match_operand:V4SF 1 "register_operand" "0")
758 (match_operand:V4SF 2 "register_operand" "x")])
762 "cmp%D3ss\t{%2, %0|%0, %2}"
763 [(set_attr "type" "ssecmp")
764 (set_attr "mode" "SF")])
766 (define_insn "sse_comi"
767 [(set (reg:CCFP FLAGS_REG)
770 (match_operand:V4SF 0 "register_operand" "x")
771 (parallel [(const_int 0)]))
773 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
774 (parallel [(const_int 0)]))))]
776 "comiss\t{%1, %0|%0, %1}"
777 [(set_attr "type" "ssecomi")
778 (set_attr "mode" "SF")])
780 (define_insn "sse_ucomi"
781 [(set (reg:CCFPU FLAGS_REG)
784 (match_operand:V4SF 0 "register_operand" "x")
785 (parallel [(const_int 0)]))
787 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
788 (parallel [(const_int 0)]))))]
790 "ucomiss\t{%1, %0|%0, %1}"
791 [(set_attr "type" "ssecomi")
792 (set_attr "mode" "SF")])
794 (define_expand "vcondv4sf"
795 [(set (match_operand:V4SF 0 "register_operand" "")
798 [(match_operand:V4SF 4 "nonimmediate_operand" "")
799 (match_operand:V4SF 5 "nonimmediate_operand" "")])
800 (match_operand:V4SF 1 "general_operand" "")
801 (match_operand:V4SF 2 "general_operand" "")))]
804 if (ix86_expand_fp_vcond (operands))
810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
812 ;; Parallel single-precision floating point logical operations
814 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
816 (define_expand "andv4sf3"
817 [(set (match_operand:V4SF 0 "register_operand" "")
818 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
819 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
821 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
823 (define_insn "*andv4sf3"
824 [(set (match_operand:V4SF 0 "register_operand" "=x")
825 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
826 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
827 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
828 "andps\t{%2, %0|%0, %2}"
829 [(set_attr "type" "sselog")
830 (set_attr "mode" "V4SF")])
832 (define_insn "sse_nandv4sf3"
833 [(set (match_operand:V4SF 0 "register_operand" "=x")
834 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
835 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
837 "andnps\t{%2, %0|%0, %2}"
838 [(set_attr "type" "sselog")
839 (set_attr "mode" "V4SF")])
841 (define_expand "iorv4sf3"
842 [(set (match_operand:V4SF 0 "register_operand" "")
843 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
844 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
846 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
848 (define_insn "*iorv4sf3"
849 [(set (match_operand:V4SF 0 "register_operand" "=x")
850 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
851 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
852 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
853 "orps\t{%2, %0|%0, %2}"
854 [(set_attr "type" "sselog")
855 (set_attr "mode" "V4SF")])
857 (define_expand "xorv4sf3"
858 [(set (match_operand:V4SF 0 "register_operand" "")
859 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
860 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
862 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
864 (define_insn "*xorv4sf3"
865 [(set (match_operand:V4SF 0 "register_operand" "=x")
866 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
867 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
868 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
869 "xorps\t{%2, %0|%0, %2}"
870 [(set_attr "type" "sselog")
871 (set_attr "mode" "V4SF")])
873 ;; Also define scalar versions. These are used for abs, neg, and
874 ;; conditional move. Using subregs into vector modes causes register
875 ;; allocation lossage. These patterns do not allow memory operands
876 ;; because the native instructions read the full 128-bits.
878 (define_insn "*andsf3"
879 [(set (match_operand:SF 0 "register_operand" "=x")
880 (and:SF (match_operand:SF 1 "register_operand" "0")
881 (match_operand:SF 2 "register_operand" "x")))]
883 "andps\t{%2, %0|%0, %2}"
884 [(set_attr "type" "sselog")
885 (set_attr "mode" "V4SF")])
887 (define_insn "*nandsf3"
888 [(set (match_operand:SF 0 "register_operand" "=x")
889 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
890 (match_operand:SF 2 "register_operand" "x")))]
892 "andnps\t{%2, %0|%0, %2}"
893 [(set_attr "type" "sselog")
894 (set_attr "mode" "V4SF")])
896 (define_insn "*iorsf3"
897 [(set (match_operand:SF 0 "register_operand" "=x")
898 (ior:SF (match_operand:SF 1 "register_operand" "0")
899 (match_operand:SF 2 "register_operand" "x")))]
901 "orps\t{%2, %0|%0, %2}"
902 [(set_attr "type" "sselog")
903 (set_attr "mode" "V4SF")])
905 (define_insn "*xorsf3"
906 [(set (match_operand:SF 0 "register_operand" "=x")
907 (xor:SF (match_operand:SF 1 "register_operand" "0")
908 (match_operand:SF 2 "register_operand" "x")))]
910 "xorps\t{%2, %0|%0, %2}"
911 [(set_attr "type" "sselog")
912 (set_attr "mode" "V4SF")])
914 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
916 ;; Parallel single-precision floating point conversion operations
918 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
920 (define_insn "sse_cvtpi2ps"
921 [(set (match_operand:V4SF 0 "register_operand" "=x")
924 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
925 (match_operand:V4SF 1 "register_operand" "0")
928 "cvtpi2ps\t{%2, %0|%0, %2}"
929 [(set_attr "type" "ssecvt")
930 (set_attr "mode" "V4SF")])
932 (define_insn "sse_cvtps2pi"
933 [(set (match_operand:V2SI 0 "register_operand" "=y")
935 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
937 (parallel [(const_int 0) (const_int 1)])))]
939 "cvtps2pi\t{%1, %0|%0, %1}"
940 [(set_attr "type" "ssecvt")
941 (set_attr "unit" "mmx")
942 (set_attr "mode" "DI")])
944 (define_insn "sse_cvttps2pi"
945 [(set (match_operand:V2SI 0 "register_operand" "=y")
947 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
948 (parallel [(const_int 0) (const_int 1)])))]
950 "cvttps2pi\t{%1, %0|%0, %1}"
951 [(set_attr "type" "ssecvt")
952 (set_attr "unit" "mmx")
953 (set_attr "mode" "SF")])
955 (define_insn "sse_cvtsi2ss"
956 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
959 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
960 (match_operand:V4SF 1 "register_operand" "0,0")
963 "cvtsi2ss\t{%2, %0|%0, %2}"
964 [(set_attr "type" "sseicvt")
965 (set_attr "athlon_decode" "vector,double")
966 (set_attr "mode" "SF")])
968 (define_insn "sse_cvtsi2ssq"
969 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
972 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
973 (match_operand:V4SF 1 "register_operand" "0,0")
975 "TARGET_SSE && TARGET_64BIT"
976 "cvtsi2ssq\t{%2, %0|%0, %2}"
977 [(set_attr "type" "sseicvt")
978 (set_attr "athlon_decode" "vector,double")
979 (set_attr "mode" "SF")])
981 (define_insn "sse_cvtss2si"
982 [(set (match_operand:SI 0 "register_operand" "=r,r")
985 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
986 (parallel [(const_int 0)]))]
987 UNSPEC_FIX_NOTRUNC))]
989 "cvtss2si\t{%1, %0|%0, %1}"
990 [(set_attr "type" "sseicvt")
991 (set_attr "athlon_decode" "double,vector")
992 (set_attr "mode" "SI")])
994 (define_insn "sse_cvtss2siq"
995 [(set (match_operand:DI 0 "register_operand" "=r,r")
998 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
999 (parallel [(const_int 0)]))]
1000 UNSPEC_FIX_NOTRUNC))]
1001 "TARGET_SSE && TARGET_64BIT"
1002 "cvtss2siq\t{%1, %0|%0, %1}"
1003 [(set_attr "type" "sseicvt")
1004 (set_attr "athlon_decode" "double,vector")
1005 (set_attr "mode" "DI")])
1007 (define_insn "sse_cvttss2si"
1008 [(set (match_operand:SI 0 "register_operand" "=r,r")
1011 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1012 (parallel [(const_int 0)]))))]
1014 "cvttss2si\t{%1, %0|%0, %1}"
1015 [(set_attr "type" "sseicvt")
1016 (set_attr "athlon_decode" "double,vector")
1017 (set_attr "mode" "SI")])
1019 (define_insn "sse_cvttss2siq"
1020 [(set (match_operand:DI 0 "register_operand" "=r,r")
1023 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1024 (parallel [(const_int 0)]))))]
1025 "TARGET_SSE && TARGET_64BIT"
1026 "cvttss2siq\t{%1, %0|%0, %1}"
1027 [(set_attr "type" "sseicvt")
1028 (set_attr "athlon_decode" "double,vector")
1029 (set_attr "mode" "DI")])
1031 (define_insn "sse2_cvtdq2ps"
1032 [(set (match_operand:V4SF 0 "register_operand" "=x")
1033 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1035 "cvtdq2ps\t{%1, %0|%0, %1}"
1036 [(set_attr "type" "ssecvt")
1037 (set_attr "mode" "V2DF")])
1039 (define_insn "sse2_cvtps2dq"
1040 [(set (match_operand:V4SI 0 "register_operand" "=x")
1041 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1042 UNSPEC_FIX_NOTRUNC))]
1044 "cvtps2dq\t{%1, %0|%0, %1}"
1045 [(set_attr "type" "ssecvt")
1046 (set_attr "mode" "TI")])
1048 (define_insn "sse2_cvttps2dq"
1049 [(set (match_operand:V4SI 0 "register_operand" "=x")
1050 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1052 "cvttps2dq\t{%1, %0|%0, %1}"
1053 [(set_attr "type" "ssecvt")
1054 (set_attr "mode" "TI")])
1056 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1058 ;; Parallel single-precision floating point element swizzling
1060 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1062 (define_insn "sse_movhlps"
1063 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1066 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
1067 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
1068 (parallel [(const_int 6)
1072 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1074 movhlps\t{%2, %0|%0, %2}
1075 movlps\t{%H1, %0|%0, %H1}
1076 movhps\t{%1, %0|%0, %1}"
1077 [(set_attr "type" "ssemov")
1078 (set_attr "mode" "V4SF,V2SF,V2SF")])
1080 (define_insn "sse_movlhps"
1081 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1084 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1085 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1086 (parallel [(const_int 0)
1090 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1092 movlhps\t{%2, %0|%0, %2}
1093 movhps\t{%2, %0|%0, %2}
1094 movlps\t{%2, %H0|%H0, %2}"
1095 [(set_attr "type" "ssemov")
1096 (set_attr "mode" "V4SF,V2SF,V2SF")])
1098 (define_insn "sse_unpckhps"
1099 [(set (match_operand:V4SF 0 "register_operand" "=x")
1102 (match_operand:V4SF 1 "register_operand" "0")
1103 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1104 (parallel [(const_int 2) (const_int 6)
1105 (const_int 3) (const_int 7)])))]
1107 "unpckhps\t{%2, %0|%0, %2}"
1108 [(set_attr "type" "sselog")
1109 (set_attr "mode" "V4SF")])
1111 (define_insn "sse_unpcklps"
1112 [(set (match_operand:V4SF 0 "register_operand" "=x")
1115 (match_operand:V4SF 1 "register_operand" "0")
1116 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1117 (parallel [(const_int 0) (const_int 4)
1118 (const_int 1) (const_int 5)])))]
1120 "unpcklps\t{%2, %0|%0, %2}"
1121 [(set_attr "type" "sselog")
1122 (set_attr "mode" "V4SF")])
1124 ;; These are modeled with the same vec_concat as the others so that we
1125 ;; capture users of shufps that can use the new instructions
1126 (define_insn "sse3_movshdup"
1127 [(set (match_operand:V4SF 0 "register_operand" "=x")
1130 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1132 (parallel [(const_int 1)
1137 "movshdup\t{%1, %0|%0, %1}"
1138 [(set_attr "type" "sse")
1139 (set_attr "mode" "V4SF")])
1141 (define_insn "sse3_movsldup"
1142 [(set (match_operand:V4SF 0 "register_operand" "=x")
1145 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1147 (parallel [(const_int 0)
1152 "movsldup\t{%1, %0|%0, %1}"
1153 [(set_attr "type" "sse")
1154 (set_attr "mode" "V4SF")])
1156 (define_expand "sse_shufps"
1157 [(match_operand:V4SF 0 "register_operand" "")
1158 (match_operand:V4SF 1 "register_operand" "")
1159 (match_operand:V4SF 2 "nonimmediate_operand" "")
1160 (match_operand:SI 3 "const_int_operand" "")]
1163 int mask = INTVAL (operands[3]);
1164 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1165 GEN_INT ((mask >> 0) & 3),
1166 GEN_INT ((mask >> 2) & 3),
1167 GEN_INT (((mask >> 4) & 3) + 4),
1168 GEN_INT (((mask >> 6) & 3) + 4)));
1172 (define_insn "sse_shufps_1"
1173 [(set (match_operand:V4SF 0 "register_operand" "=x")
1176 (match_operand:V4SF 1 "register_operand" "0")
1177 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1178 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1179 (match_operand 4 "const_0_to_3_operand" "")
1180 (match_operand 5 "const_4_to_7_operand" "")
1181 (match_operand 6 "const_4_to_7_operand" "")])))]
1185 mask |= INTVAL (operands[3]) << 0;
1186 mask |= INTVAL (operands[4]) << 2;
1187 mask |= (INTVAL (operands[5]) - 4) << 4;
1188 mask |= (INTVAL (operands[6]) - 4) << 6;
1189 operands[3] = GEN_INT (mask);
1191 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1193 [(set_attr "type" "sselog")
1194 (set_attr "mode" "V4SF")])
1196 (define_insn "sse_storehps"
1197 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1200 (parallel [(const_int 2) (const_int 3)])))]
1203 movhps\t{%1, %0|%0, %1}
1204 movhlps\t{%1, %0|%0, %1}
1205 movlps\t{%H1, %0|%0, %H1}"
1206 [(set_attr "type" "ssemov")
1207 (set_attr "mode" "V2SF,V4SF,V2SF")])
1209 (define_insn "sse_loadhps"
1210 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1213 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1214 (parallel [(const_int 0) (const_int 1)]))
1215 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1218 movhps\t{%2, %0|%0, %2}
1219 movlhps\t{%2, %0|%0, %2}
1220 movlps\t{%2, %H0|%H0, %2}"
1221 [(set_attr "type" "ssemov")
1222 (set_attr "mode" "V2SF,V4SF,V2SF")])
1224 (define_insn "sse_storelps"
1225 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1227 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1228 (parallel [(const_int 0) (const_int 1)])))]
1231 movlps\t{%1, %0|%0, %1}
1232 movaps\t{%1, %0|%0, %1}
1233 movlps\t{%1, %0|%0, %1}"
1234 [(set_attr "type" "ssemov")
1235 (set_attr "mode" "V2SF,V4SF,V2SF")])
1237 (define_insn "sse_loadlps"
1238 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1240 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1242 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1243 (parallel [(const_int 2) (const_int 3)]))))]
1246 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1247 movlps\t{%2, %0|%0, %2}
1248 movlps\t{%2, %0|%0, %2}"
1249 [(set_attr "type" "sselog,ssemov,ssemov")
1250 (set_attr "mode" "V4SF,V2SF,V2SF")])
1252 (define_insn "sse_movss"
1253 [(set (match_operand:V4SF 0 "register_operand" "=x")
1255 (match_operand:V4SF 2 "register_operand" "x")
1256 (match_operand:V4SF 1 "register_operand" "0")
1259 "movss\t{%2, %0|%0, %2}"
1260 [(set_attr "type" "ssemov")
1261 (set_attr "mode" "SF")])
1263 (define_insn "*vec_dupv4sf"
1264 [(set (match_operand:V4SF 0 "register_operand" "=x")
1266 (match_operand:SF 1 "register_operand" "0")))]
1268 "shufps\t{$0, %0, %0|%0, %0, 0}"
1269 [(set_attr "type" "sselog1")
1270 (set_attr "mode" "V4SF")])
1272 ;; ??? In theory we can match memory for the MMX alternative, but allowing
1273 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1274 ;; alternatives pretty much forces the MMX alternative to be chosen.
1275 (define_insn "*sse_concatv2sf"
1276 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1278 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1279 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1282 unpcklps\t{%2, %0|%0, %2}
1283 movss\t{%1, %0|%0, %1}
1284 punpckldq\t{%2, %0|%0, %2}
1285 movd\t{%1, %0|%0, %1}"
1286 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1287 (set_attr "mode" "V4SF,SF,DI,DI")])
1289 (define_insn "*sse_concatv4sf"
1290 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1292 (match_operand:V2SF 1 "register_operand" " 0,0")
1293 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1296 movlhps\t{%2, %0|%0, %2}
1297 movhps\t{%2, %0|%0, %2}"
1298 [(set_attr "type" "ssemov")
1299 (set_attr "mode" "V4SF,V2SF")])
1301 (define_expand "vec_initv4sf"
1302 [(match_operand:V4SF 0 "register_operand" "")
1303 (match_operand 1 "" "")]
1306 ix86_expand_vector_init (false, operands[0], operands[1]);
1310 (define_insn "*vec_setv4sf_0"
1311 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1314 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1315 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1319 movss\t{%2, %0|%0, %2}
1320 movss\t{%2, %0|%0, %2}
1321 movd\t{%2, %0|%0, %2}
1323 [(set_attr "type" "ssemov")
1324 (set_attr "mode" "SF")])
1327 [(set (match_operand:V4SF 0 "memory_operand" "")
1330 (match_operand:SF 1 "nonmemory_operand" ""))
1333 "TARGET_SSE && reload_completed"
1336 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1340 (define_expand "vec_setv4sf"
1341 [(match_operand:V4SF 0 "register_operand" "")
1342 (match_operand:SF 1 "register_operand" "")
1343 (match_operand 2 "const_int_operand" "")]
1346 ix86_expand_vector_set (false, operands[0], operands[1],
1347 INTVAL (operands[2]));
1351 (define_insn_and_split "*vec_extractv4sf_0"
1352 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1354 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1355 (parallel [(const_int 0)])))]
1356 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1358 "&& reload_completed"
1361 rtx op1 = operands[1];
1363 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1365 op1 = gen_lowpart (SFmode, op1);
1366 emit_move_insn (operands[0], op1);
1370 (define_expand "vec_extractv4sf"
1371 [(match_operand:SF 0 "register_operand" "")
1372 (match_operand:V4SF 1 "register_operand" "")
1373 (match_operand 2 "const_int_operand" "")]
1376 ix86_expand_vector_extract (false, operands[0], operands[1],
1377 INTVAL (operands[2]));
1381 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1383 ;; Parallel double-precision floating point arithmetic
1385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1387 (define_expand "negv2df2"
1388 [(set (match_operand:V2DF 0 "register_operand" "")
1389 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1391 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1393 (define_expand "absv2df2"
1394 [(set (match_operand:V2DF 0 "register_operand" "")
1395 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1397 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1399 (define_expand "addv2df3"
1400 [(set (match_operand:V2DF 0 "register_operand" "")
1401 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1402 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1404 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1406 (define_insn "*addv2df3"
1407 [(set (match_operand:V2DF 0 "register_operand" "=x")
1408 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1409 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1410 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1411 "addpd\t{%2, %0|%0, %2}"
1412 [(set_attr "type" "sseadd")
1413 (set_attr "mode" "V2DF")])
1415 (define_insn "sse2_vmaddv2df3"
1416 [(set (match_operand:V2DF 0 "register_operand" "=x")
1418 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1419 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1422 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1423 "addsd\t{%2, %0|%0, %2}"
1424 [(set_attr "type" "sseadd")
1425 (set_attr "mode" "DF")])
1427 (define_expand "subv2df3"
1428 [(set (match_operand:V2DF 0 "register_operand" "")
1429 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1430 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1432 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1434 (define_insn "*subv2df3"
1435 [(set (match_operand:V2DF 0 "register_operand" "=x")
1436 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1437 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1439 "subpd\t{%2, %0|%0, %2}"
1440 [(set_attr "type" "sseadd")
1441 (set_attr "mode" "V2DF")])
1443 (define_insn "sse2_vmsubv2df3"
1444 [(set (match_operand:V2DF 0 "register_operand" "=x")
1446 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1447 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1451 "subsd\t{%2, %0|%0, %2}"
1452 [(set_attr "type" "sseadd")
1453 (set_attr "mode" "DF")])
1455 (define_expand "mulv2df3"
1456 [(set (match_operand:V2DF 0 "register_operand" "")
1457 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1458 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1460 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1462 (define_insn "*mulv2df3"
1463 [(set (match_operand:V2DF 0 "register_operand" "=x")
1464 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1465 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1466 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1467 "mulpd\t{%2, %0|%0, %2}"
1468 [(set_attr "type" "ssemul")
1469 (set_attr "mode" "V2DF")])
1471 (define_insn "sse2_vmmulv2df3"
1472 [(set (match_operand:V2DF 0 "register_operand" "=x")
1474 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1478 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1479 "mulsd\t{%2, %0|%0, %2}"
1480 [(set_attr "type" "ssemul")
1481 (set_attr "mode" "DF")])
1483 (define_expand "divv2df3"
1484 [(set (match_operand:V2DF 0 "register_operand" "")
1485 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1486 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1488 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1490 (define_insn "*divv2df3"
1491 [(set (match_operand:V2DF 0 "register_operand" "=x")
1492 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1493 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1495 "divpd\t{%2, %0|%0, %2}"
1496 [(set_attr "type" "ssediv")
1497 (set_attr "mode" "V2DF")])
1499 (define_insn "sse2_vmdivv2df3"
1500 [(set (match_operand:V2DF 0 "register_operand" "=x")
1502 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1503 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1507 "divsd\t{%2, %0|%0, %2}"
1508 [(set_attr "type" "ssediv")
1509 (set_attr "mode" "DF")])
1511 (define_insn "sqrtv2df2"
1512 [(set (match_operand:V2DF 0 "register_operand" "=x")
1513 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1515 "sqrtpd\t{%1, %0|%0, %1}"
1516 [(set_attr "type" "sse")
1517 (set_attr "mode" "V2DF")])
1519 (define_insn "sse2_vmsqrtv2df2"
1520 [(set (match_operand:V2DF 0 "register_operand" "=x")
1522 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1523 (match_operand:V2DF 2 "register_operand" "0")
1526 "sqrtsd\t{%1, %0|%0, %1}"
1527 [(set_attr "type" "sse")
1528 (set_attr "mode" "SF")])
1530 ;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1531 ;; isn't really correct, as those rtl operators aren't defined when
1532 ;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1534 (define_expand "smaxv2df3"
1535 [(set (match_operand:V2DF 0 "register_operand" "")
1536 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1537 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1540 if (!flag_finite_math_only)
1541 operands[1] = force_reg (V2DFmode, operands[1]);
1542 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1545 (define_insn "*smaxv2df3_finite"
1546 [(set (match_operand:V2DF 0 "register_operand" "=x")
1547 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1548 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1549 "TARGET_SSE2 && flag_finite_math_only
1550 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1551 "maxpd\t{%2, %0|%0, %2}"
1552 [(set_attr "type" "sseadd")
1553 (set_attr "mode" "V2DF")])
1555 (define_insn "*smaxv2df3"
1556 [(set (match_operand:V2DF 0 "register_operand" "=x")
1557 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1558 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1560 "maxpd\t{%2, %0|%0, %2}"
1561 [(set_attr "type" "sseadd")
1562 (set_attr "mode" "V2DF")])
1564 (define_insn "*sse2_vmsmaxv2df3_finite"
1565 [(set (match_operand:V2DF 0 "register_operand" "=x")
1567 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1568 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1571 "TARGET_SSE2 && flag_finite_math_only
1572 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1573 "maxsd\t{%2, %0|%0, %2}"
1574 [(set_attr "type" "sseadd")
1575 (set_attr "mode" "DF")])
1577 (define_insn "sse2_vmsmaxv2df3"
1578 [(set (match_operand:V2DF 0 "register_operand" "=x")
1580 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1585 "maxsd\t{%2, %0|%0, %2}"
1586 [(set_attr "type" "sseadd")
1587 (set_attr "mode" "DF")])
1589 (define_expand "sminv2df3"
1590 [(set (match_operand:V2DF 0 "register_operand" "")
1591 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1592 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1595 if (!flag_finite_math_only)
1596 operands[1] = force_reg (V2DFmode, operands[1]);
1597 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1600 (define_insn "*sminv2df3_finite"
1601 [(set (match_operand:V2DF 0 "register_operand" "=x")
1602 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1603 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1604 "TARGET_SSE2 && flag_finite_math_only
1605 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1606 "minpd\t{%2, %0|%0, %2}"
1607 [(set_attr "type" "sseadd")
1608 (set_attr "mode" "V2DF")])
1610 (define_insn "*sminv2df3"
1611 [(set (match_operand:V2DF 0 "register_operand" "=x")
1612 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1613 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1615 "minpd\t{%2, %0|%0, %2}"
1616 [(set_attr "type" "sseadd")
1617 (set_attr "mode" "V2DF")])
1619 (define_insn "*sse2_vmsminv2df3_finite"
1620 [(set (match_operand:V2DF 0 "register_operand" "=x")
1622 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1623 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1626 "TARGET_SSE2 && flag_finite_math_only
1627 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1628 "minsd\t{%2, %0|%0, %2}"
1629 [(set_attr "type" "sseadd")
1630 (set_attr "mode" "DF")])
1632 (define_insn "sse2_vmsminv2df3"
1633 [(set (match_operand:V2DF 0 "register_operand" "=x")
1635 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1636 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1640 "minsd\t{%2, %0|%0, %2}"
1641 [(set_attr "type" "sseadd")
1642 (set_attr "mode" "DF")])
1644 (define_insn "sse3_addsubv2df3"
1645 [(set (match_operand:V2DF 0 "register_operand" "=x")
1648 (match_operand:V2DF 1 "register_operand" "0")
1649 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1650 (minus:V2DF (match_dup 1) (match_dup 2))
1653 "addsubpd\t{%2, %0|%0, %2}"
1654 [(set_attr "type" "sseadd")
1655 (set_attr "mode" "V2DF")])
1657 (define_insn "sse3_haddv2df3"
1658 [(set (match_operand:V2DF 0 "register_operand" "=x")
1662 (match_operand:V2DF 1 "register_operand" "0")
1663 (parallel [(const_int 0)]))
1664 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1667 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1668 (parallel [(const_int 0)]))
1669 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1671 "haddpd\t{%2, %0|%0, %2}"
1672 [(set_attr "type" "sseadd")
1673 (set_attr "mode" "V2DF")])
1675 (define_insn "sse3_hsubv2df3"
1676 [(set (match_operand:V2DF 0 "register_operand" "=x")
1680 (match_operand:V2DF 1 "register_operand" "0")
1681 (parallel [(const_int 0)]))
1682 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1685 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1686 (parallel [(const_int 0)]))
1687 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1689 "hsubpd\t{%2, %0|%0, %2}"
1690 [(set_attr "type" "sseadd")
1691 (set_attr "mode" "V2DF")])
1693 (define_expand "reduc_splus_v2df"
1694 [(match_operand:V2DF 0 "register_operand" "")
1695 (match_operand:V2DF 1 "register_operand" "")]
1698 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1702 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1704 ;; Parallel double-precision floating point comparisons
1706 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1708 (define_insn "sse2_maskcmpv2df3"
1709 [(set (match_operand:V2DF 0 "register_operand" "=x")
1710 (match_operator:V2DF 3 "sse_comparison_operator"
1711 [(match_operand:V2DF 1 "register_operand" "0")
1712 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1714 "cmp%D3pd\t{%2, %0|%0, %2}"
1715 [(set_attr "type" "ssecmp")
1716 (set_attr "mode" "V2DF")])
1718 (define_insn "sse2_vmmaskcmpv2df3"
1719 [(set (match_operand:V2DF 0 "register_operand" "=x")
1721 (match_operator:V2DF 3 "sse_comparison_operator"
1722 [(match_operand:V2DF 1 "register_operand" "0")
1723 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1727 "cmp%D3sd\t{%2, %0|%0, %2}"
1728 [(set_attr "type" "ssecmp")
1729 (set_attr "mode" "DF")])
1731 (define_insn "sse2_comi"
1732 [(set (reg:CCFP FLAGS_REG)
1735 (match_operand:V2DF 0 "register_operand" "x")
1736 (parallel [(const_int 0)]))
1738 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1739 (parallel [(const_int 0)]))))]
1741 "comisd\t{%1, %0|%0, %1}"
1742 [(set_attr "type" "ssecomi")
1743 (set_attr "mode" "DF")])
1745 (define_insn "sse2_ucomi"
1746 [(set (reg:CCFPU FLAGS_REG)
1749 (match_operand:V2DF 0 "register_operand" "x")
1750 (parallel [(const_int 0)]))
1752 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1753 (parallel [(const_int 0)]))))]
1755 "ucomisd\t{%1, %0|%0, %1}"
1756 [(set_attr "type" "ssecomi")
1757 (set_attr "mode" "DF")])
1759 (define_expand "vcondv2df"
1760 [(set (match_operand:V2DF 0 "register_operand" "")
1762 (match_operator 3 ""
1763 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1764 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1765 (match_operand:V2DF 1 "general_operand" "")
1766 (match_operand:V2DF 2 "general_operand" "")))]
1769 if (ix86_expand_fp_vcond (operands))
1775 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1777 ;; Parallel double-precision floating point logical operations
1779 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1781 (define_expand "andv2df3"
1782 [(set (match_operand:V2DF 0 "register_operand" "")
1783 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1784 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1786 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1788 (define_insn "*andv2df3"
1789 [(set (match_operand:V2DF 0 "register_operand" "=x")
1790 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1791 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1792 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1793 "andpd\t{%2, %0|%0, %2}"
1794 [(set_attr "type" "sselog")
1795 (set_attr "mode" "V2DF")])
1797 (define_insn "sse2_nandv2df3"
1798 [(set (match_operand:V2DF 0 "register_operand" "=x")
1799 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1800 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1802 "andnpd\t{%2, %0|%0, %2}"
1803 [(set_attr "type" "sselog")
1804 (set_attr "mode" "V2DF")])
1806 (define_expand "iorv2df3"
1807 [(set (match_operand:V2DF 0 "register_operand" "")
1808 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1809 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1811 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1813 (define_insn "*iorv2df3"
1814 [(set (match_operand:V2DF 0 "register_operand" "=x")
1815 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1816 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1817 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1818 "orpd\t{%2, %0|%0, %2}"
1819 [(set_attr "type" "sselog")
1820 (set_attr "mode" "V2DF")])
1822 (define_expand "xorv2df3"
1823 [(set (match_operand:V2DF 0 "register_operand" "")
1824 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1825 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1827 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1829 (define_insn "*xorv2df3"
1830 [(set (match_operand:V2DF 0 "register_operand" "=x")
1831 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1832 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1833 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1834 "xorpd\t{%2, %0|%0, %2}"
1835 [(set_attr "type" "sselog")
1836 (set_attr "mode" "V2DF")])
1838 ;; Also define scalar versions. These are used for abs, neg, and
1839 ;; conditional move. Using subregs into vector modes causes register
1840 ;; allocation lossage. These patterns do not allow memory operands
1841 ;; because the native instructions read the full 128-bits.
1843 (define_insn "*anddf3"
1844 [(set (match_operand:DF 0 "register_operand" "=x")
1845 (and:DF (match_operand:DF 1 "register_operand" "0")
1846 (match_operand:DF 2 "register_operand" "x")))]
1848 "andpd\t{%2, %0|%0, %2}"
1849 [(set_attr "type" "sselog")
1850 (set_attr "mode" "V2DF")])
1852 (define_insn "*nanddf3"
1853 [(set (match_operand:DF 0 "register_operand" "=x")
1854 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1855 (match_operand:DF 2 "register_operand" "x")))]
1857 "andnpd\t{%2, %0|%0, %2}"
1858 [(set_attr "type" "sselog")
1859 (set_attr "mode" "V2DF")])
1861 (define_insn "*iordf3"
1862 [(set (match_operand:DF 0 "register_operand" "=x")
1863 (ior:DF (match_operand:DF 1 "register_operand" "0")
1864 (match_operand:DF 2 "register_operand" "x")))]
1866 "orpd\t{%2, %0|%0, %2}"
1867 [(set_attr "type" "sselog")
1868 (set_attr "mode" "V2DF")])
1870 (define_insn "*xordf3"
1871 [(set (match_operand:DF 0 "register_operand" "=x")
1872 (xor:DF (match_operand:DF 1 "register_operand" "0")
1873 (match_operand:DF 2 "register_operand" "x")))]
1875 "xorpd\t{%2, %0|%0, %2}"
1876 [(set_attr "type" "sselog")
1877 (set_attr "mode" "V2DF")])
1879 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1881 ;; Parallel double-precision floating point conversion operations
1883 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1885 (define_insn "sse2_cvtpi2pd"
1886 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1887 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1889 "cvtpi2pd\t{%1, %0|%0, %1}"
1890 [(set_attr "type" "ssecvt")
1891 (set_attr "unit" "mmx,*")
1892 (set_attr "mode" "V2DF")])
1894 (define_insn "sse2_cvtpd2pi"
1895 [(set (match_operand:V2SI 0 "register_operand" "=y")
1896 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1897 UNSPEC_FIX_NOTRUNC))]
1899 "cvtpd2pi\t{%1, %0|%0, %1}"
1900 [(set_attr "type" "ssecvt")
1901 (set_attr "unit" "mmx")
1902 (set_attr "mode" "DI")])
1904 (define_insn "sse2_cvttpd2pi"
1905 [(set (match_operand:V2SI 0 "register_operand" "=y")
1906 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1908 "cvttpd2pi\t{%1, %0|%0, %1}"
1909 [(set_attr "type" "ssecvt")
1910 (set_attr "unit" "mmx")
1911 (set_attr "mode" "TI")])
1913 (define_insn "sse2_cvtsi2sd"
1914 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1917 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1918 (match_operand:V2DF 1 "register_operand" "0,0")
1921 "cvtsi2sd\t{%2, %0|%0, %2}"
1922 [(set_attr "type" "sseicvt")
1923 (set_attr "mode" "DF")
1924 (set_attr "athlon_decode" "double,direct")])
1926 (define_insn "sse2_cvtsi2sdq"
1927 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1930 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1931 (match_operand:V2DF 1 "register_operand" "0,0")
1933 "TARGET_SSE2 && TARGET_64BIT"
1934 "cvtsi2sdq\t{%2, %0|%0, %2}"
1935 [(set_attr "type" "sseicvt")
1936 (set_attr "mode" "DF")
1937 (set_attr "athlon_decode" "double,direct")])
1939 (define_insn "sse2_cvtsd2si"
1940 [(set (match_operand:SI 0 "register_operand" "=r,r")
1943 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1944 (parallel [(const_int 0)]))]
1945 UNSPEC_FIX_NOTRUNC))]
1947 "cvtsd2si\t{%1, %0|%0, %1}"
1948 [(set_attr "type" "sseicvt")
1949 (set_attr "athlon_decode" "double,vector")
1950 (set_attr "mode" "SI")])
1952 (define_insn "sse2_cvtsd2siq"
1953 [(set (match_operand:DI 0 "register_operand" "=r,r")
1956 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1957 (parallel [(const_int 0)]))]
1958 UNSPEC_FIX_NOTRUNC))]
1959 "TARGET_SSE2 && TARGET_64BIT"
1960 "cvtsd2siq\t{%1, %0|%0, %1}"
1961 [(set_attr "type" "sseicvt")
1962 (set_attr "athlon_decode" "double,vector")
1963 (set_attr "mode" "DI")])
1965 (define_insn "sse2_cvttsd2si"
1966 [(set (match_operand:SI 0 "register_operand" "=r,r")
1969 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1970 (parallel [(const_int 0)]))))]
1972 "cvttsd2si\t{%1, %0|%0, %1}"
1973 [(set_attr "type" "sseicvt")
1974 (set_attr "mode" "SI")
1975 (set_attr "athlon_decode" "double,vector")])
1977 (define_insn "sse2_cvttsd2siq"
1978 [(set (match_operand:DI 0 "register_operand" "=r,r")
1981 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1982 (parallel [(const_int 0)]))))]
1983 "TARGET_SSE2 && TARGET_64BIT"
1984 "cvttsd2siq\t{%1, %0|%0, %1}"
1985 [(set_attr "type" "sseicvt")
1986 (set_attr "mode" "DI")
1987 (set_attr "athlon_decode" "double,vector")])
1989 (define_insn "sse2_cvtdq2pd"
1990 [(set (match_operand:V2DF 0 "register_operand" "=x")
1993 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1994 (parallel [(const_int 0) (const_int 1)]))))]
1996 "cvtdq2pd\t{%1, %0|%0, %1}"
1997 [(set_attr "type" "ssecvt")
1998 (set_attr "mode" "V2DF")])
2000 (define_expand "sse2_cvtpd2dq"
2001 [(set (match_operand:V4SI 0 "register_operand" "")
2003 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2007 "operands[2] = CONST0_RTX (V2SImode);")
2009 (define_insn "*sse2_cvtpd2dq"
2010 [(set (match_operand:V4SI 0 "register_operand" "=x")
2012 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2014 (match_operand:V2SI 2 "const0_operand" "")))]
2016 "cvtpd2dq\t{%1, %0|%0, %1}"
2017 [(set_attr "type" "ssecvt")
2018 (set_attr "mode" "TI")])
2020 (define_expand "sse2_cvttpd2dq"
2021 [(set (match_operand:V4SI 0 "register_operand" "")
2023 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2026 "operands[2] = CONST0_RTX (V2SImode);")
2028 (define_insn "*sse2_cvttpd2dq"
2029 [(set (match_operand:V4SI 0 "register_operand" "=x")
2031 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2032 (match_operand:V2SI 2 "const0_operand" "")))]
2034 "cvttpd2dq\t{%1, %0|%0, %1}"
2035 [(set_attr "type" "ssecvt")
2036 (set_attr "mode" "TI")])
2038 (define_insn "sse2_cvtsd2ss"
2039 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2042 (float_truncate:V2SF
2043 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2044 (match_operand:V4SF 1 "register_operand" "0,0")
2047 "cvtsd2ss\t{%2, %0|%0, %2}"
2048 [(set_attr "type" "ssecvt")
2049 (set_attr "athlon_decode" "vector,double")
2050 (set_attr "mode" "SF")])
2052 (define_insn "sse2_cvtss2sd"
2053 [(set (match_operand:V2DF 0 "register_operand" "=x")
2057 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2058 (parallel [(const_int 0) (const_int 1)])))
2059 (match_operand:V2DF 1 "register_operand" "0")
2062 "cvtss2sd\t{%2, %0|%0, %2}"
2063 [(set_attr "type" "ssecvt")
2064 (set_attr "mode" "DF")])
2066 (define_expand "sse2_cvtpd2ps"
2067 [(set (match_operand:V4SF 0 "register_operand" "")
2069 (float_truncate:V2SF
2070 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2073 "operands[2] = CONST0_RTX (V2SFmode);")
2075 (define_insn "*sse2_cvtpd2ps"
2076 [(set (match_operand:V4SF 0 "register_operand" "=x")
2078 (float_truncate:V2SF
2079 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2080 (match_operand:V2SF 2 "const0_operand" "")))]
2082 "cvtpd2ps\t{%1, %0|%0, %1}"
2083 [(set_attr "type" "ssecvt")
2084 (set_attr "mode" "V4SF")])
2086 (define_insn "sse2_cvtps2pd"
2087 [(set (match_operand:V2DF 0 "register_operand" "=x")
2090 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2091 (parallel [(const_int 0) (const_int 1)]))))]
2093 "cvtps2pd\t{%1, %0|%0, %1}"
2094 [(set_attr "type" "ssecvt")
2095 (set_attr "mode" "V2DF")])
2097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2099 ;; Parallel double-precision floating point element swizzling
2101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2103 (define_insn "sse2_unpckhpd"
2104 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2107 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2108 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2109 (parallel [(const_int 1)
2111 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2113 unpckhpd\t{%2, %0|%0, %2}
2114 movlpd\t{%H1, %0|%0, %H1}
2115 movhpd\t{%1, %0|%0, %1}"
2116 [(set_attr "type" "sselog,ssemov,ssemov")
2117 (set_attr "mode" "V2DF,V1DF,V1DF")])
2119 (define_insn "*sse3_movddup"
2120 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2123 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2125 (parallel [(const_int 0)
2127 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2129 movddup\t{%1, %0|%0, %1}
2131 [(set_attr "type" "sselog,ssemov")
2132 (set_attr "mode" "V2DF")])
2135 [(set (match_operand:V2DF 0 "memory_operand" "")
2138 (match_operand:V2DF 1 "register_operand" "")
2140 (parallel [(const_int 0)
2142 "TARGET_SSE3 && reload_completed"
2145 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2146 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2147 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2151 (define_insn "sse2_unpcklpd"
2152 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2155 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2156 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2157 (parallel [(const_int 0)
2159 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2161 unpcklpd\t{%2, %0|%0, %2}
2162 movhpd\t{%2, %0|%0, %2}
2163 movlpd\t{%2, %H0|%H0, %2}"
2164 [(set_attr "type" "sselog,ssemov,ssemov")
2165 (set_attr "mode" "V2DF,V1DF,V1DF")])
2167 (define_expand "sse2_shufpd"
2168 [(match_operand:V2DF 0 "register_operand" "")
2169 (match_operand:V2DF 1 "register_operand" "")
2170 (match_operand:V2DF 2 "nonimmediate_operand" "")
2171 (match_operand:SI 3 "const_int_operand" "")]
2174 int mask = INTVAL (operands[3]);
2175 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2177 GEN_INT (mask & 2 ? 3 : 2)));
2181 (define_insn "sse2_shufpd_1"
2182 [(set (match_operand:V2DF 0 "register_operand" "=x")
2185 (match_operand:V2DF 1 "register_operand" "0")
2186 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2187 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2188 (match_operand 4 "const_2_to_3_operand" "")])))]
2192 mask = INTVAL (operands[3]);
2193 mask |= (INTVAL (operands[4]) - 2) << 1;
2194 operands[3] = GEN_INT (mask);
2196 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2198 [(set_attr "type" "sselog")
2199 (set_attr "mode" "V2DF")])
2201 (define_insn "sse2_storehpd"
2202 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2204 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2205 (parallel [(const_int 1)])))]
2206 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2208 movhpd\t{%1, %0|%0, %1}
2211 [(set_attr "type" "ssemov,sselog1,ssemov")
2212 (set_attr "mode" "V1DF,V2DF,DF")])
2215 [(set (match_operand:DF 0 "register_operand" "")
2217 (match_operand:V2DF 1 "memory_operand" "")
2218 (parallel [(const_int 1)])))]
2219 "TARGET_SSE2 && reload_completed"
2220 [(set (match_dup 0) (match_dup 1))]
2222 operands[1] = adjust_address (operands[1], DFmode, 8);
2225 (define_insn "sse2_storelpd"
2226 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2228 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2229 (parallel [(const_int 0)])))]
2230 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2232 movlpd\t{%1, %0|%0, %1}
2235 [(set_attr "type" "ssemov")
2236 (set_attr "mode" "V1DF,DF,DF")])
2239 [(set (match_operand:DF 0 "register_operand" "")
2241 (match_operand:V2DF 1 "nonimmediate_operand" "")
2242 (parallel [(const_int 0)])))]
2243 "TARGET_SSE2 && reload_completed"
2246 rtx op1 = operands[1];
2248 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2250 op1 = gen_lowpart (DFmode, op1);
2251 emit_move_insn (operands[0], op1);
2255 (define_insn "sse2_loadhpd"
2256 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2259 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2260 (parallel [(const_int 0)]))
2261 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2262 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2264 movhpd\t{%2, %0|%0, %2}
2265 unpcklpd\t{%2, %0|%0, %2}
2266 shufpd\t{$1, %1, %0|%0, %1, 1}
2268 [(set_attr "type" "ssemov,sselog,sselog,other")
2269 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2272 [(set (match_operand:V2DF 0 "memory_operand" "")
2274 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2275 (match_operand:DF 1 "register_operand" "")))]
2276 "TARGET_SSE2 && reload_completed"
2277 [(set (match_dup 0) (match_dup 1))]
2279 operands[0] = adjust_address (operands[0], DFmode, 8);
2282 (define_insn "sse2_loadlpd"
2283 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2285 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2287 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2288 (parallel [(const_int 1)]))))]
2289 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2291 movsd\t{%2, %0|%0, %2}
2292 movlpd\t{%2, %0|%0, %2}
2293 movsd\t{%2, %0|%0, %2}
2294 shufpd\t{$2, %2, %0|%0, %2, 2}
2295 movhpd\t{%H1, %0|%0, %H1}
2297 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2298 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2301 [(set (match_operand:V2DF 0 "memory_operand" "")
2303 (match_operand:DF 1 "register_operand" "")
2304 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2305 "TARGET_SSE2 && reload_completed"
2306 [(set (match_dup 0) (match_dup 1))]
2308 operands[0] = adjust_address (operands[0], DFmode, 8);
2311 (define_insn "sse2_movsd"
2312 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2314 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2315 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2319 movsd\t{%2, %0|%0, %2}
2320 movlpd\t{%2, %0|%0, %2}
2321 movlpd\t{%2, %0|%0, %2}
2322 shufpd\t{$2, %2, %0|%0, %2, 2}
2323 movhps\t{%H1, %0|%0, %H1}
2324 movhps\t{%1, %H0|%H0, %1}"
2325 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2326 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2328 (define_insn "*vec_dupv2df_sse3"
2329 [(set (match_operand:V2DF 0 "register_operand" "=x")
2331 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2333 "movddup\t{%1, %0|%0, %1}"
2334 [(set_attr "type" "sselog1")
2335 (set_attr "mode" "DF")])
2337 (define_insn "*vec_dupv2df"
2338 [(set (match_operand:V2DF 0 "register_operand" "=x")
2340 (match_operand:DF 1 "register_operand" "0")))]
2343 [(set_attr "type" "sselog1")
2344 (set_attr "mode" "V4SF")])
2346 (define_insn "*vec_concatv2df_sse3"
2347 [(set (match_operand:V2DF 0 "register_operand" "=x")
2349 (match_operand:DF 1 "nonimmediate_operand" "xm")
2352 "movddup\t{%1, %0|%0, %1}"
2353 [(set_attr "type" "sselog1")
2354 (set_attr "mode" "DF")])
2356 (define_insn "*vec_concatv2df"
2357 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2359 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2360 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2363 unpcklpd\t{%2, %0|%0, %2}
2364 movhpd\t{%2, %0|%0, %2}
2365 movsd\t{%1, %0|%0, %1}
2366 movlhps\t{%2, %0|%0, %2}
2367 movhps\t{%2, %0|%0, %2}"
2368 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2369 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2371 (define_expand "vec_setv2df"
2372 [(match_operand:V2DF 0 "register_operand" "")
2373 (match_operand:DF 1 "register_operand" "")
2374 (match_operand 2 "const_int_operand" "")]
2377 ix86_expand_vector_set (false, operands[0], operands[1],
2378 INTVAL (operands[2]));
2382 (define_expand "vec_extractv2df"
2383 [(match_operand:DF 0 "register_operand" "")
2384 (match_operand:V2DF 1 "register_operand" "")
2385 (match_operand 2 "const_int_operand" "")]
2388 ix86_expand_vector_extract (false, operands[0], operands[1],
2389 INTVAL (operands[2]));
2393 (define_expand "vec_initv2df"
2394 [(match_operand:V2DF 0 "register_operand" "")
2395 (match_operand 1 "" "")]
2398 ix86_expand_vector_init (false, operands[0], operands[1]);
2402 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2404 ;; Parallel integral arithmetic
2406 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2408 (define_expand "neg<mode>2"
2409 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2412 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2414 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2416 (define_expand "add<mode>3"
2417 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2418 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2419 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2421 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2423 (define_insn "*add<mode>3"
2424 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2426 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2427 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2428 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2429 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2430 [(set_attr "type" "sseiadd")
2431 (set_attr "mode" "TI")])
2433 (define_insn "sse2_ssadd<mode>3"
2434 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2436 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2437 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2438 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2439 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2440 [(set_attr "type" "sseiadd")
2441 (set_attr "mode" "TI")])
2443 (define_insn "sse2_usadd<mode>3"
2444 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2446 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2447 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2448 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2449 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2450 [(set_attr "type" "sseiadd")
2451 (set_attr "mode" "TI")])
2453 (define_expand "sub<mode>3"
2454 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2455 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2456 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2458 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2460 (define_insn "*sub<mode>3"
2461 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2463 (match_operand:SSEMODEI 1 "register_operand" "0")
2464 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2466 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2467 [(set_attr "type" "sseiadd")
2468 (set_attr "mode" "TI")])
2470 (define_insn "sse2_sssub<mode>3"
2471 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2473 (match_operand:SSEMODE12 1 "register_operand" "0")
2474 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2476 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2477 [(set_attr "type" "sseiadd")
2478 (set_attr "mode" "TI")])
2480 (define_insn "sse2_ussub<mode>3"
2481 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2483 (match_operand:SSEMODE12 1 "register_operand" "0")
2484 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2486 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2487 [(set_attr "type" "sseiadd")
2488 (set_attr "mode" "TI")])
2490 (define_expand "mulv16qi3"
2491 [(set (match_operand:V16QI 0 "register_operand" "")
2492 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2493 (match_operand:V16QI 2 "register_operand" "")))]
2499 for (i = 0; i < 12; ++i)
2500 t[i] = gen_reg_rtx (V16QImode);
2502 /* Unpack data such that we've got a source byte in each low byte of
2503 each word. We don't care what goes into the high byte of each word.
2504 Rather than trying to get zero in there, most convenient is to let
2505 it be a copy of the low byte. */
2506 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2507 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2508 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2509 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2511 /* Multiply words. The end-of-line annotations here give a picture of what
2512 the output of that instruction looks like. Dot means don't care; the
2513 letters are the bytes of the result with A being the most significant. */
2514 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2515 gen_lowpart (V8HImode, t[0]),
2516 gen_lowpart (V8HImode, t[1])));
2517 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2518 gen_lowpart (V8HImode, t[2]),
2519 gen_lowpart (V8HImode, t[3])));
2521 /* Extract the relevant bytes and merge them back together. */
2522 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2523 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2524 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2525 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2526 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2527 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2530 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2534 (define_expand "mulv8hi3"
2535 [(set (match_operand:V8HI 0 "register_operand" "")
2536 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2537 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2539 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2541 (define_insn "*mulv8hi3"
2542 [(set (match_operand:V8HI 0 "register_operand" "=x")
2543 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2544 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2545 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2546 "pmullw\t{%2, %0|%0, %2}"
2547 [(set_attr "type" "sseimul")
2548 (set_attr "mode" "TI")])
2550 (define_insn "sse2_smulv8hi3_highpart"
2551 [(set (match_operand:V8HI 0 "register_operand" "=x")
2556 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2558 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2560 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2561 "pmulhw\t{%2, %0|%0, %2}"
2562 [(set_attr "type" "sseimul")
2563 (set_attr "mode" "TI")])
2565 (define_insn "sse2_umulv8hi3_highpart"
2566 [(set (match_operand:V8HI 0 "register_operand" "=x")
2571 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2573 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2575 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2576 "pmulhuw\t{%2, %0|%0, %2}"
2577 [(set_attr "type" "sseimul")
2578 (set_attr "mode" "TI")])
2580 (define_insn "sse2_umulv2siv2di3"
2581 [(set (match_operand:V2DI 0 "register_operand" "=x")
2585 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2586 (parallel [(const_int 0) (const_int 2)])))
2589 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2590 (parallel [(const_int 0) (const_int 2)])))))]
2591 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2592 "pmuludq\t{%2, %0|%0, %2}"
2593 [(set_attr "type" "sseimul")
2594 (set_attr "mode" "TI")])
2596 (define_insn "sse2_pmaddwd"
2597 [(set (match_operand:V4SI 0 "register_operand" "=x")
2602 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2603 (parallel [(const_int 0)
2609 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2610 (parallel [(const_int 0)
2616 (vec_select:V4HI (match_dup 1)
2617 (parallel [(const_int 1)
2622 (vec_select:V4HI (match_dup 2)
2623 (parallel [(const_int 1)
2626 (const_int 7)]))))))]
2628 "pmaddwd\t{%2, %0|%0, %2}"
2629 [(set_attr "type" "sseiadd")
2630 (set_attr "mode" "TI")])
2632 (define_expand "mulv4si3"
2633 [(set (match_operand:V4SI 0 "register_operand" "")
2634 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2635 (match_operand:V4SI 2 "register_operand" "")))]
2638 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2644 t1 = gen_reg_rtx (V4SImode);
2645 t2 = gen_reg_rtx (V4SImode);
2646 t3 = gen_reg_rtx (V4SImode);
2647 t4 = gen_reg_rtx (V4SImode);
2648 t5 = gen_reg_rtx (V4SImode);
2649 t6 = gen_reg_rtx (V4SImode);
2650 thirtytwo = GEN_INT (32);
2652 /* Multiply elements 2 and 0. */
2653 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2655 /* Shift both input vectors down one element, so that elements 3 and 1
2656 are now in the slots for elements 2 and 0. For K8, at least, this is
2657 faster than using a shuffle. */
2658 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2659 gen_lowpart (TImode, op1), thirtytwo));
2660 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2661 gen_lowpart (TImode, op2), thirtytwo));
2663 /* Multiply elements 3 and 1. */
2664 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2666 /* Move the results in element 2 down to element 1; we don't care what
2667 goes in elements 2 and 3. */
2668 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2669 const0_rtx, const0_rtx));
2670 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2671 const0_rtx, const0_rtx));
2673 /* Merge the parts back together. */
2674 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2678 (define_expand "mulv2di3"
2679 [(set (match_operand:V2DI 0 "register_operand" "")
2680 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2681 (match_operand:V2DI 2 "register_operand" "")))]
2684 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2690 t1 = gen_reg_rtx (V2DImode);
2691 t2 = gen_reg_rtx (V2DImode);
2692 t3 = gen_reg_rtx (V2DImode);
2693 t4 = gen_reg_rtx (V2DImode);
2694 t5 = gen_reg_rtx (V2DImode);
2695 t6 = gen_reg_rtx (V2DImode);
2696 thirtytwo = GEN_INT (32);
2698 /* Multiply low parts. */
2699 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2700 gen_lowpart (V4SImode, op2)));
2702 /* Shift input vectors left 32 bits so we can multiply high parts. */
2703 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2704 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2706 /* Multiply high parts by low parts. */
2707 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2708 gen_lowpart (V4SImode, t3)));
2709 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2710 gen_lowpart (V4SImode, t2)));
2712 /* Shift them back. */
2713 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2714 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2716 /* Add the three parts together. */
2717 emit_insn (gen_addv2di3 (t6, t1, t4));
2718 emit_insn (gen_addv2di3 (op0, t6, t5));
2722 (define_insn "ashr<mode>3"
2723 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2725 (match_operand:SSEMODE24 1 "register_operand" "0")
2726 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2728 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2729 [(set_attr "type" "sseishft")
2730 (set_attr "mode" "TI")])
2732 (define_insn "lshr<mode>3"
2733 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2734 (lshiftrt:SSEMODE248
2735 (match_operand:SSEMODE248 1 "register_operand" "0")
2736 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2738 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2739 [(set_attr "type" "sseishft")
2740 (set_attr "mode" "TI")])
2742 (define_insn "ashl<mode>3"
2743 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2745 (match_operand:SSEMODE248 1 "register_operand" "0")
2746 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2748 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2749 [(set_attr "type" "sseishft")
2750 (set_attr "mode" "TI")])
2752 (define_insn "sse2_ashlti3"
2753 [(set (match_operand:TI 0 "register_operand" "=x")
2754 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2755 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2758 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2759 return "pslldq\t{%2, %0|%0, %2}";
2761 [(set_attr "type" "sseishft")
2762 (set_attr "mode" "TI")])
2764 (define_expand "vec_shl_<mode>"
2765 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2766 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2767 (match_operand:SI 2 "general_operand" "")))]
2770 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2772 operands[0] = gen_lowpart (TImode, operands[0]);
2773 operands[1] = gen_lowpart (TImode, operands[1]);
2776 (define_insn "sse2_lshrti3"
2777 [(set (match_operand:TI 0 "register_operand" "=x")
2778 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2779 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2782 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2783 return "psrldq\t{%2, %0|%0, %2}";
2785 [(set_attr "type" "sseishft")
2786 (set_attr "mode" "TI")])
2788 (define_expand "vec_shr_<mode>"
2789 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2790 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2791 (match_operand:SI 2 "general_operand" "")))]
2794 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2796 operands[0] = gen_lowpart (TImode, operands[0]);
2797 operands[1] = gen_lowpart (TImode, operands[1]);
2800 (define_expand "umaxv16qi3"
2801 [(set (match_operand:V16QI 0 "register_operand" "")
2802 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2803 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2805 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2807 (define_insn "*umaxv16qi3"
2808 [(set (match_operand:V16QI 0 "register_operand" "=x")
2809 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2810 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2811 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2812 "pmaxub\t{%2, %0|%0, %2}"
2813 [(set_attr "type" "sseiadd")
2814 (set_attr "mode" "TI")])
2816 (define_expand "smaxv8hi3"
2817 [(set (match_operand:V8HI 0 "register_operand" "")
2818 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2819 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2821 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2823 (define_insn "*smaxv8hi3"
2824 [(set (match_operand:V8HI 0 "register_operand" "=x")
2825 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2826 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2827 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2828 "pmaxsw\t{%2, %0|%0, %2}"
2829 [(set_attr "type" "sseiadd")
2830 (set_attr "mode" "TI")])
2832 (define_expand "umaxv8hi3"
2833 [(set (match_operand:V8HI 0 "register_operand" "=x")
2834 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2835 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2837 (plus:V8HI (match_dup 0) (match_dup 2)))]
2840 operands[3] = operands[0];
2841 if (rtx_equal_p (operands[0], operands[2]))
2842 operands[0] = gen_reg_rtx (V8HImode);
2845 (define_expand "smax<mode>3"
2846 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2847 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2848 (match_operand:SSEMODE14 2 "register_operand" "")))]
2854 xops[0] = operands[0];
2855 xops[1] = operands[1];
2856 xops[2] = operands[2];
2857 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2858 xops[4] = operands[1];
2859 xops[5] = operands[2];
2860 ok = ix86_expand_int_vcond (xops);
2865 (define_expand "umaxv4si3"
2866 [(set (match_operand:V4SI 0 "register_operand" "")
2867 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2868 (match_operand:V4SI 2 "register_operand" "")))]
2874 xops[0] = operands[0];
2875 xops[1] = operands[1];
2876 xops[2] = operands[2];
2877 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2878 xops[4] = operands[1];
2879 xops[5] = operands[2];
2880 ok = ix86_expand_int_vcond (xops);
2885 (define_expand "uminv16qi3"
2886 [(set (match_operand:V16QI 0 "register_operand" "")
2887 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2888 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2890 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2892 (define_insn "*uminv16qi3"
2893 [(set (match_operand:V16QI 0 "register_operand" "=x")
2894 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2895 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2896 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2897 "pminub\t{%2, %0|%0, %2}"
2898 [(set_attr "type" "sseiadd")
2899 (set_attr "mode" "TI")])
2901 (define_expand "sminv8hi3"
2902 [(set (match_operand:V8HI 0 "register_operand" "")
2903 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2904 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2906 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2908 (define_insn "*sminv8hi3"
2909 [(set (match_operand:V8HI 0 "register_operand" "=x")
2910 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2911 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2912 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2913 "pminsw\t{%2, %0|%0, %2}"
2914 [(set_attr "type" "sseiadd")
2915 (set_attr "mode" "TI")])
2917 (define_expand "smin<mode>3"
2918 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2919 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2920 (match_operand:SSEMODE14 2 "register_operand" "")))]
2926 xops[0] = operands[0];
2927 xops[1] = operands[2];
2928 xops[2] = operands[1];
2929 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2930 xops[4] = operands[1];
2931 xops[5] = operands[2];
2932 ok = ix86_expand_int_vcond (xops);
2937 (define_expand "umin<mode>3"
2938 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2939 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2940 (match_operand:SSEMODE24 2 "register_operand" "")))]
2946 xops[0] = operands[0];
2947 xops[1] = operands[2];
2948 xops[2] = operands[1];
2949 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2950 xops[4] = operands[1];
2951 xops[5] = operands[2];
2952 ok = ix86_expand_int_vcond (xops);
2957 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2959 ;; Parallel integral comparisons
2961 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2963 (define_insn "sse2_eq<mode>3"
2964 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2966 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2967 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2968 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2969 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2970 [(set_attr "type" "ssecmp")
2971 (set_attr "mode" "TI")])
2973 (define_insn "sse2_gt<mode>3"
2974 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2976 (match_operand:SSEMODE124 1 "register_operand" "0")
2977 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2979 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2980 [(set_attr "type" "ssecmp")
2981 (set_attr "mode" "TI")])
2983 (define_expand "vcond<mode>"
2984 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2985 (if_then_else:SSEMODE124
2986 (match_operator 3 ""
2987 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2988 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2989 (match_operand:SSEMODE124 1 "general_operand" "")
2990 (match_operand:SSEMODE124 2 "general_operand" "")))]
2993 if (ix86_expand_int_vcond (operands))
2999 (define_expand "vcondu<mode>"
3000 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3001 (if_then_else:SSEMODE124
3002 (match_operator 3 ""
3003 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3004 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3005 (match_operand:SSEMODE124 1 "general_operand" "")
3006 (match_operand:SSEMODE124 2 "general_operand" "")))]
3009 if (ix86_expand_int_vcond (operands))
3015 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3017 ;; Parallel integral logical operations
3019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3021 (define_expand "one_cmpl<mode>2"
3022 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3023 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3027 int i, n = GET_MODE_NUNITS (<MODE>mode);
3028 rtvec v = rtvec_alloc (n);
3030 for (i = 0; i < n; ++i)
3031 RTVEC_ELT (v, i) = constm1_rtx;
3033 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3036 (define_expand "and<mode>3"
3037 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3038 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3039 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3041 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3043 (define_insn "*and<mode>3"
3044 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3046 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3047 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3048 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3049 "pand\t{%2, %0|%0, %2}"
3050 [(set_attr "type" "sselog")
3051 (set_attr "mode" "TI")])
3053 (define_insn "sse2_nand<mode>3"
3054 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3056 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3057 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3059 "pandn\t{%2, %0|%0, %2}"
3060 [(set_attr "type" "sselog")
3061 (set_attr "mode" "TI")])
3063 (define_expand "ior<mode>3"
3064 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3065 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3066 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3068 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3070 (define_insn "*ior<mode>3"
3071 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3073 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3074 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3075 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3076 "por\t{%2, %0|%0, %2}"
3077 [(set_attr "type" "sselog")
3078 (set_attr "mode" "TI")])
3080 (define_expand "xor<mode>3"
3081 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3082 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3083 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3085 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3087 (define_insn "*xor<mode>3"
3088 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3090 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3091 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3092 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3093 "pxor\t{%2, %0|%0, %2}"
3094 [(set_attr "type" "sselog")
3095 (set_attr "mode" "TI")])
3097 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3099 ;; Parallel integral element swizzling
3101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3103 (define_insn "sse2_packsswb"
3104 [(set (match_operand:V16QI 0 "register_operand" "=x")
3107 (match_operand:V8HI 1 "register_operand" "0"))
3109 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3111 "packsswb\t{%2, %0|%0, %2}"
3112 [(set_attr "type" "sselog")
3113 (set_attr "mode" "TI")])
3115 (define_insn "sse2_packssdw"
3116 [(set (match_operand:V8HI 0 "register_operand" "=x")
3119 (match_operand:V4SI 1 "register_operand" "0"))
3121 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3123 "packssdw\t{%2, %0|%0, %2}"
3124 [(set_attr "type" "sselog")
3125 (set_attr "mode" "TI")])
3127 (define_insn "sse2_packuswb"
3128 [(set (match_operand:V16QI 0 "register_operand" "=x")
3131 (match_operand:V8HI 1 "register_operand" "0"))
3133 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3135 "packuswb\t{%2, %0|%0, %2}"
3136 [(set_attr "type" "sselog")
3137 (set_attr "mode" "TI")])
3139 (define_insn "sse2_punpckhbw"
3140 [(set (match_operand:V16QI 0 "register_operand" "=x")
3143 (match_operand:V16QI 1 "register_operand" "0")
3144 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3145 (parallel [(const_int 8) (const_int 24)
3146 (const_int 9) (const_int 25)
3147 (const_int 10) (const_int 26)
3148 (const_int 11) (const_int 27)
3149 (const_int 12) (const_int 28)
3150 (const_int 13) (const_int 29)
3151 (const_int 14) (const_int 30)
3152 (const_int 15) (const_int 31)])))]
3154 "punpckhbw\t{%2, %0|%0, %2}"
3155 [(set_attr "type" "sselog")
3156 (set_attr "mode" "TI")])
3158 (define_insn "sse2_punpcklbw"
3159 [(set (match_operand:V16QI 0 "register_operand" "=x")
3162 (match_operand:V16QI 1 "register_operand" "0")
3163 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3164 (parallel [(const_int 0) (const_int 16)
3165 (const_int 1) (const_int 17)
3166 (const_int 2) (const_int 18)
3167 (const_int 3) (const_int 19)
3168 (const_int 4) (const_int 20)
3169 (const_int 5) (const_int 21)
3170 (const_int 6) (const_int 22)
3171 (const_int 7) (const_int 23)])))]
3173 "punpcklbw\t{%2, %0|%0, %2}"
3174 [(set_attr "type" "sselog")
3175 (set_attr "mode" "TI")])
3177 (define_insn "sse2_punpckhwd"
3178 [(set (match_operand:V8HI 0 "register_operand" "=x")
3181 (match_operand:V8HI 1 "register_operand" "0")
3182 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3183 (parallel [(const_int 4) (const_int 12)
3184 (const_int 5) (const_int 13)
3185 (const_int 6) (const_int 14)
3186 (const_int 7) (const_int 15)])))]
3188 "punpckhwd\t{%2, %0|%0, %2}"
3189 [(set_attr "type" "sselog")
3190 (set_attr "mode" "TI")])
3192 (define_insn "sse2_punpcklwd"
3193 [(set (match_operand:V8HI 0 "register_operand" "=x")
3196 (match_operand:V8HI 1 "register_operand" "0")
3197 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3198 (parallel [(const_int 0) (const_int 8)
3199 (const_int 1) (const_int 9)
3200 (const_int 2) (const_int 10)
3201 (const_int 3) (const_int 11)])))]
3203 "punpcklwd\t{%2, %0|%0, %2}"
3204 [(set_attr "type" "sselog")
3205 (set_attr "mode" "TI")])
3207 (define_insn "sse2_punpckhdq"
3208 [(set (match_operand:V4SI 0 "register_operand" "=x")
3211 (match_operand:V4SI 1 "register_operand" "0")
3212 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3213 (parallel [(const_int 2) (const_int 6)
3214 (const_int 3) (const_int 7)])))]
3216 "punpckhdq\t{%2, %0|%0, %2}"
3217 [(set_attr "type" "sselog")
3218 (set_attr "mode" "TI")])
3220 (define_insn "sse2_punpckldq"
3221 [(set (match_operand:V4SI 0 "register_operand" "=x")
3224 (match_operand:V4SI 1 "register_operand" "0")
3225 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3226 (parallel [(const_int 0) (const_int 4)
3227 (const_int 1) (const_int 5)])))]
3229 "punpckldq\t{%2, %0|%0, %2}"
3230 [(set_attr "type" "sselog")
3231 (set_attr "mode" "TI")])
3233 (define_insn "sse2_punpckhqdq"
3234 [(set (match_operand:V2DI 0 "register_operand" "=x")
3237 (match_operand:V2DI 1 "register_operand" "0")
3238 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3239 (parallel [(const_int 1)
3242 "punpckhqdq\t{%2, %0|%0, %2}"
3243 [(set_attr "type" "sselog")
3244 (set_attr "mode" "TI")])
3246 (define_insn "sse2_punpcklqdq"
3247 [(set (match_operand:V2DI 0 "register_operand" "=x")
3250 (match_operand:V2DI 1 "register_operand" "0")
3251 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3252 (parallel [(const_int 0)
3255 "punpcklqdq\t{%2, %0|%0, %2}"
3256 [(set_attr "type" "sselog")
3257 (set_attr "mode" "TI")])
3259 (define_expand "sse2_pinsrw"
3260 [(set (match_operand:V8HI 0 "register_operand" "")
3263 (match_operand:SI 2 "nonimmediate_operand" ""))
3264 (match_operand:V8HI 1 "register_operand" "")
3265 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3268 operands[2] = gen_lowpart (HImode, operands[2]);
3269 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3272 (define_insn "*sse2_pinsrw"
3273 [(set (match_operand:V8HI 0 "register_operand" "=x")
3276 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3277 (match_operand:V8HI 1 "register_operand" "0")
3278 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3281 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3282 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3284 [(set_attr "type" "sselog")
3285 (set_attr "mode" "TI")])
3287 (define_insn "sse2_pextrw"
3288 [(set (match_operand:SI 0 "register_operand" "=r")
3291 (match_operand:V8HI 1 "register_operand" "x")
3292 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3294 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3295 [(set_attr "type" "sselog")
3296 (set_attr "mode" "TI")])
3298 (define_expand "sse2_pshufd"
3299 [(match_operand:V4SI 0 "register_operand" "")
3300 (match_operand:V4SI 1 "nonimmediate_operand" "")
3301 (match_operand:SI 2 "const_int_operand" "")]
3304 int mask = INTVAL (operands[2]);
3305 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3306 GEN_INT ((mask >> 0) & 3),
3307 GEN_INT ((mask >> 2) & 3),
3308 GEN_INT ((mask >> 4) & 3),
3309 GEN_INT ((mask >> 6) & 3)));
3313 (define_insn "sse2_pshufd_1"
3314 [(set (match_operand:V4SI 0 "register_operand" "=x")
3316 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3317 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3318 (match_operand 3 "const_0_to_3_operand" "")
3319 (match_operand 4 "const_0_to_3_operand" "")
3320 (match_operand 5 "const_0_to_3_operand" "")])))]
3324 mask |= INTVAL (operands[2]) << 0;
3325 mask |= INTVAL (operands[3]) << 2;
3326 mask |= INTVAL (operands[4]) << 4;
3327 mask |= INTVAL (operands[5]) << 6;
3328 operands[2] = GEN_INT (mask);
3330 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3332 [(set_attr "type" "sselog1")
3333 (set_attr "mode" "TI")])
3335 (define_expand "sse2_pshuflw"
3336 [(match_operand:V8HI 0 "register_operand" "")
3337 (match_operand:V8HI 1 "nonimmediate_operand" "")
3338 (match_operand:SI 2 "const_int_operand" "")]
3341 int mask = INTVAL (operands[2]);
3342 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3343 GEN_INT ((mask >> 0) & 3),
3344 GEN_INT ((mask >> 2) & 3),
3345 GEN_INT ((mask >> 4) & 3),
3346 GEN_INT ((mask >> 6) & 3)));
3350 (define_insn "sse2_pshuflw_1"
3351 [(set (match_operand:V8HI 0 "register_operand" "=x")
3353 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3354 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3355 (match_operand 3 "const_0_to_3_operand" "")
3356 (match_operand 4 "const_0_to_3_operand" "")
3357 (match_operand 5 "const_0_to_3_operand" "")
3365 mask |= INTVAL (operands[2]) << 0;
3366 mask |= INTVAL (operands[3]) << 2;
3367 mask |= INTVAL (operands[4]) << 4;
3368 mask |= INTVAL (operands[5]) << 6;
3369 operands[2] = GEN_INT (mask);
3371 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3373 [(set_attr "type" "sselog")
3374 (set_attr "mode" "TI")])
3376 (define_expand "sse2_pshufhw"
3377 [(match_operand:V8HI 0 "register_operand" "")
3378 (match_operand:V8HI 1 "nonimmediate_operand" "")
3379 (match_operand:SI 2 "const_int_operand" "")]
3382 int mask = INTVAL (operands[2]);
3383 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3384 GEN_INT (((mask >> 0) & 3) + 4),
3385 GEN_INT (((mask >> 2) & 3) + 4),
3386 GEN_INT (((mask >> 4) & 3) + 4),
3387 GEN_INT (((mask >> 6) & 3) + 4)));
3391 (define_insn "sse2_pshufhw_1"
3392 [(set (match_operand:V8HI 0 "register_operand" "=x")
3394 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3395 (parallel [(const_int 0)
3399 (match_operand 2 "const_4_to_7_operand" "")
3400 (match_operand 3 "const_4_to_7_operand" "")
3401 (match_operand 4 "const_4_to_7_operand" "")
3402 (match_operand 5 "const_4_to_7_operand" "")])))]
3406 mask |= (INTVAL (operands[2]) - 4) << 0;
3407 mask |= (INTVAL (operands[3]) - 4) << 2;
3408 mask |= (INTVAL (operands[4]) - 4) << 4;
3409 mask |= (INTVAL (operands[5]) - 4) << 6;
3410 operands[2] = GEN_INT (mask);
3412 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3414 [(set_attr "type" "sselog")
3415 (set_attr "mode" "TI")])
3417 (define_expand "sse2_loadd"
3418 [(set (match_operand:V4SI 0 "register_operand" "")
3421 (match_operand:SI 1 "nonimmediate_operand" ""))
3425 "operands[2] = CONST0_RTX (V4SImode);")
3427 (define_insn "sse2_loadld"
3428 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3431 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3432 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3436 movd\t{%2, %0|%0, %2}
3437 movss\t{%2, %0|%0, %2}
3438 movss\t{%2, %0|%0, %2}"
3439 [(set_attr "type" "ssemov")
3440 (set_attr "mode" "TI,V4SF,SF")])
3442 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3443 ;; be taken into account, and movdi isn't fully populated even without.
3444 (define_insn_and_split "sse2_stored"
3445 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3447 (match_operand:V4SI 1 "register_operand" "x")
3448 (parallel [(const_int 0)])))]
3451 "&& reload_completed"
3452 [(set (match_dup 0) (match_dup 1))]
3454 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3457 (define_expand "sse_storeq"
3458 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3460 (match_operand:V2DI 1 "register_operand" "")
3461 (parallel [(const_int 0)])))]
3465 ;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3466 ;; be taken into account, and movdi isn't fully populated even without.
3467 (define_insn "*sse2_storeq"
3468 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3470 (match_operand:V2DI 1 "register_operand" "x")
3471 (parallel [(const_int 0)])))]
3476 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3478 (match_operand:V2DI 1 "register_operand" "")
3479 (parallel [(const_int 0)])))]
3480 "TARGET_SSE && reload_completed"
3481 [(set (match_dup 0) (match_dup 1))]
3483 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3486 (define_insn "*vec_extractv2di_1_sse2"
3487 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3489 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3490 (parallel [(const_int 1)])))]
3491 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3493 movhps\t{%1, %0|%0, %1}
3494 psrldq\t{$8, %0|%0, 8}
3495 movq\t{%H1, %0|%0, %H1}"
3496 [(set_attr "type" "ssemov,sseishft,ssemov")
3497 (set_attr "memory" "*,none,*")
3498 (set_attr "mode" "V2SF,TI,TI")])
3500 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3501 (define_insn "*vec_extractv2di_1_sse"
3502 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3504 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3505 (parallel [(const_int 1)])))]
3506 "!TARGET_SSE2 && TARGET_SSE
3507 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3509 movhps\t{%1, %0|%0, %1}
3510 movhlps\t{%1, %0|%0, %1}
3511 movlps\t{%H1, %0|%0, %H1}"
3512 [(set_attr "type" "ssemov")
3513 (set_attr "mode" "V2SF,V4SF,V2SF")])
3515 (define_insn "*vec_dupv4si"
3516 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3518 (match_operand:SI 1 "register_operand" " Y,0")))]
3521 pshufd\t{$0, %1, %0|%0, %1, 0}
3522 shufps\t{$0, %0, %0|%0, %0, 0}"
3523 [(set_attr "type" "sselog1")
3524 (set_attr "mode" "TI,V4SF")])
3526 (define_insn "*vec_dupv2di"
3527 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3529 (match_operand:DI 1 "register_operand" " 0,0")))]
3534 [(set_attr "type" "sselog1,ssemov")
3535 (set_attr "mode" "TI,V4SF")])
3537 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3538 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3539 ;; alternatives pretty much forces the MMX alternative to be chosen.
3540 (define_insn "*sse2_concatv2si"
3541 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3543 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3544 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3547 punpckldq\t{%2, %0|%0, %2}
3548 movd\t{%1, %0|%0, %1}
3549 punpckldq\t{%2, %0|%0, %2}
3550 movd\t{%1, %0|%0, %1}"
3551 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3552 (set_attr "mode" "TI,TI,DI,DI")])
3554 (define_insn "*sse1_concatv2si"
3555 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3557 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3558 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3561 unpcklps\t{%2, %0|%0, %2}
3562 movss\t{%1, %0|%0, %1}
3563 punpckldq\t{%2, %0|%0, %2}
3564 movd\t{%1, %0|%0, %1}"
3565 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3566 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3568 (define_insn "*vec_concatv4si_1"
3569 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3571 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3572 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3575 punpcklqdq\t{%2, %0|%0, %2}
3576 movlhps\t{%2, %0|%0, %2}
3577 movhps\t{%2, %0|%0, %2}"
3578 [(set_attr "type" "sselog,ssemov,ssemov")
3579 (set_attr "mode" "TI,V4SF,V2SF")])
3581 (define_insn "*vec_concatv2di"
3582 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3584 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3585 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3588 movq\t{%1, %0|%0, %1}
3589 movq2dq\t{%1, %0|%0, %1}
3590 punpcklqdq\t{%2, %0|%0, %2}
3591 movlhps\t{%2, %0|%0, %2}
3592 movhps\t{%2, %0|%0, %2}
3593 movlps\t{%1, %0|%0, %1}"
3594 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3595 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3597 (define_expand "vec_setv2di"
3598 [(match_operand:V2DI 0 "register_operand" "")
3599 (match_operand:DI 1 "register_operand" "")
3600 (match_operand 2 "const_int_operand" "")]
3603 ix86_expand_vector_set (false, operands[0], operands[1],
3604 INTVAL (operands[2]));
3608 (define_expand "vec_extractv2di"
3609 [(match_operand:DI 0 "register_operand" "")
3610 (match_operand:V2DI 1 "register_operand" "")
3611 (match_operand 2 "const_int_operand" "")]
3614 ix86_expand_vector_extract (false, operands[0], operands[1],
3615 INTVAL (operands[2]));
3619 (define_expand "vec_initv2di"
3620 [(match_operand:V2DI 0 "register_operand" "")
3621 (match_operand 1 "" "")]
3624 ix86_expand_vector_init (false, operands[0], operands[1]);
3628 (define_expand "vec_setv4si"
3629 [(match_operand:V4SI 0 "register_operand" "")
3630 (match_operand:SI 1 "register_operand" "")
3631 (match_operand 2 "const_int_operand" "")]
3634 ix86_expand_vector_set (false, operands[0], operands[1],
3635 INTVAL (operands[2]));
3639 (define_expand "vec_extractv4si"
3640 [(match_operand:SI 0 "register_operand" "")
3641 (match_operand:V4SI 1 "register_operand" "")
3642 (match_operand 2 "const_int_operand" "")]
3645 ix86_expand_vector_extract (false, operands[0], operands[1],
3646 INTVAL (operands[2]));
3650 (define_expand "vec_initv4si"
3651 [(match_operand:V4SI 0 "register_operand" "")
3652 (match_operand 1 "" "")]
3655 ix86_expand_vector_init (false, operands[0], operands[1]);
3659 (define_expand "vec_setv8hi"
3660 [(match_operand:V8HI 0 "register_operand" "")
3661 (match_operand:HI 1 "register_operand" "")
3662 (match_operand 2 "const_int_operand" "")]
3665 ix86_expand_vector_set (false, operands[0], operands[1],
3666 INTVAL (operands[2]));
3670 (define_expand "vec_extractv8hi"
3671 [(match_operand:HI 0 "register_operand" "")
3672 (match_operand:V8HI 1 "register_operand" "")
3673 (match_operand 2 "const_int_operand" "")]
3676 ix86_expand_vector_extract (false, operands[0], operands[1],
3677 INTVAL (operands[2]));
3681 (define_expand "vec_initv8hi"
3682 [(match_operand:V8HI 0 "register_operand" "")
3683 (match_operand 1 "" "")]
3686 ix86_expand_vector_init (false, operands[0], operands[1]);
3690 (define_expand "vec_setv16qi"
3691 [(match_operand:V16QI 0 "register_operand" "")
3692 (match_operand:QI 1 "register_operand" "")
3693 (match_operand 2 "const_int_operand" "")]
3696 ix86_expand_vector_set (false, operands[0], operands[1],
3697 INTVAL (operands[2]));
3701 (define_expand "vec_extractv16qi"
3702 [(match_operand:QI 0 "register_operand" "")
3703 (match_operand:V16QI 1 "register_operand" "")
3704 (match_operand 2 "const_int_operand" "")]
3707 ix86_expand_vector_extract (false, operands[0], operands[1],
3708 INTVAL (operands[2]));
3712 (define_expand "vec_initv16qi"
3713 [(match_operand:V16QI 0 "register_operand" "")
3714 (match_operand 1 "" "")]
3717 ix86_expand_vector_init (false, operands[0], operands[1]);
3721 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3725 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3727 (define_insn "sse2_uavgv16qi3"
3728 [(set (match_operand:V16QI 0 "register_operand" "=x")
3734 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3736 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3737 (const_vector:V16QI [(const_int 1) (const_int 1)
3738 (const_int 1) (const_int 1)
3739 (const_int 1) (const_int 1)
3740 (const_int 1) (const_int 1)
3741 (const_int 1) (const_int 1)
3742 (const_int 1) (const_int 1)
3743 (const_int 1) (const_int 1)
3744 (const_int 1) (const_int 1)]))
3746 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3747 "pavgb\t{%2, %0|%0, %2}"
3748 [(set_attr "type" "sseiadd")
3749 (set_attr "mode" "TI")])
3751 (define_insn "sse2_uavgv8hi3"
3752 [(set (match_operand:V8HI 0 "register_operand" "=x")
3758 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3760 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3761 (const_vector:V8HI [(const_int 1) (const_int 1)
3762 (const_int 1) (const_int 1)
3763 (const_int 1) (const_int 1)
3764 (const_int 1) (const_int 1)]))
3766 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3767 "pavgw\t{%2, %0|%0, %2}"
3768 [(set_attr "type" "sseiadd")
3769 (set_attr "mode" "TI")])
3771 ;; The correct representation for this is absolutely enormous, and
3772 ;; surely not generally useful.
3773 (define_insn "sse2_psadbw"
3774 [(set (match_operand:V2DI 0 "register_operand" "=x")
3775 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3776 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3779 "psadbw\t{%2, %0|%0, %2}"
3780 [(set_attr "type" "sseiadd")
3781 (set_attr "mode" "TI")])
3783 (define_insn "sse_movmskps"
3784 [(set (match_operand:SI 0 "register_operand" "=r")
3785 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3788 "movmskps\t{%1, %0|%0, %1}"
3789 [(set_attr "type" "ssecvt")
3790 (set_attr "mode" "V4SF")])
3792 (define_insn "sse2_movmskpd"
3793 [(set (match_operand:SI 0 "register_operand" "=r")
3794 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3797 "movmskpd\t{%1, %0|%0, %1}"
3798 [(set_attr "type" "ssecvt")
3799 (set_attr "mode" "V2DF")])
3801 (define_insn "sse2_pmovmskb"
3802 [(set (match_operand:SI 0 "register_operand" "=r")
3803 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3806 "pmovmskb\t{%1, %0|%0, %1}"
3807 [(set_attr "type" "ssecvt")
3808 (set_attr "mode" "V2DF")])
3810 (define_expand "sse2_maskmovdqu"
3811 [(set (match_operand:V16QI 0 "memory_operand" "")
3812 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3813 (match_operand:V16QI 2 "register_operand" "x")
3819 (define_insn "*sse2_maskmovdqu"
3820 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3821 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3822 (match_operand:V16QI 2 "register_operand" "x")
3823 (mem:V16QI (match_dup 0))]
3825 "TARGET_SSE2 && !TARGET_64BIT"
3826 ;; @@@ check ordering of operands in intel/nonintel syntax
3827 "maskmovdqu\t{%2, %1|%1, %2}"
3828 [(set_attr "type" "ssecvt")
3829 (set_attr "mode" "TI")])
3831 (define_insn "*sse2_maskmovdqu_rex64"
3832 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3833 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3834 (match_operand:V16QI 2 "register_operand" "x")
3835 (mem:V16QI (match_dup 0))]
3837 "TARGET_SSE2 && TARGET_64BIT"
3838 ;; @@@ check ordering of operands in intel/nonintel syntax
3839 "maskmovdqu\t{%2, %1|%1, %2}"
3840 [(set_attr "type" "ssecvt")
3841 (set_attr "mode" "TI")])
3843 (define_insn "sse_ldmxcsr"
3844 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3848 [(set_attr "type" "sse")
3849 (set_attr "memory" "load")])
3851 (define_insn "sse_stmxcsr"
3852 [(set (match_operand:SI 0 "memory_operand" "=m")
3853 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3856 [(set_attr "type" "sse")
3857 (set_attr "memory" "store")])
3859 (define_expand "sse_sfence"
3861 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3862 "TARGET_SSE || TARGET_3DNOW_A"
3864 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3865 MEM_VOLATILE_P (operands[0]) = 1;
3868 (define_insn "*sse_sfence"
3869 [(set (match_operand:BLK 0 "" "")
3870 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3871 "TARGET_SSE || TARGET_3DNOW_A"
3873 [(set_attr "type" "sse")
3874 (set_attr "memory" "unknown")])
3876 (define_insn "sse2_clflush"
3877 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3881 [(set_attr "type" "sse")
3882 (set_attr "memory" "unknown")])
3884 (define_expand "sse2_mfence"
3886 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3889 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3890 MEM_VOLATILE_P (operands[0]) = 1;
3893 (define_insn "*sse2_mfence"
3894 [(set (match_operand:BLK 0 "" "")
3895 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3898 [(set_attr "type" "sse")
3899 (set_attr "memory" "unknown")])
3901 (define_expand "sse2_lfence"
3903 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3906 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3907 MEM_VOLATILE_P (operands[0]) = 1;
3910 (define_insn "*sse2_lfence"
3911 [(set (match_operand:BLK 0 "" "")
3912 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3915 [(set_attr "type" "sse")
3916 (set_attr "memory" "unknown")])
3918 (define_insn "sse3_mwait"
3919 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3920 (match_operand:SI 1 "register_operand" "c")]
3924 [(set_attr "length" "3")])
3926 (define_insn "sse3_monitor"
3927 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3928 (match_operand:SI 1 "register_operand" "c")
3929 (match_operand:SI 2 "register_operand" "d")]
3932 "monitor\t%0, %1, %2"
3933 [(set_attr "length" "3")])