1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64
5 define <4 x i32> @add_4i32(<4 x i32> %a0, <4 x i32> %a1) {
8 ; X86-NEXT: paddd %xmm1, %xmm0
11 ; X64-LABEL: add_4i32:
13 ; X64-NEXT: paddd %xmm1, %xmm0
15 %1 = add <4 x i32> %a0, <i32 1, i32 -2, i32 3, i32 -4>
16 %2 = add <4 x i32> %a1, <i32 -1, i32 2, i32 -3, i32 4>
17 %3 = add <4 x i32> %1, %2
21 define <4 x i32> @add_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
22 ; X86-LABEL: add_4i32_commute:
24 ; X86-NEXT: paddd %xmm1, %xmm0
27 ; X64-LABEL: add_4i32_commute:
29 ; X64-NEXT: paddd %xmm1, %xmm0
31 %1 = add <4 x i32> <i32 1, i32 -2, i32 3, i32 -4>, %a0
32 %2 = add <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, %a1
33 %3 = add <4 x i32> %1, %2
37 define <4 x i32> @mul_4i32(<4 x i32> %a0, <4 x i32> %a1) {
38 ; X86-LABEL: mul_4i32:
40 ; X86-NEXT: pmulld %xmm1, %xmm0
41 ; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
44 ; X64-LABEL: mul_4i32:
46 ; X64-NEXT: pmulld %xmm1, %xmm0
47 ; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
49 %1 = mul <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4>
50 %2 = mul <4 x i32> %a1, <i32 4, i32 3, i32 2, i32 1>
51 %3 = mul <4 x i32> %1, %2
55 define <4 x i32> @mul_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
56 ; X86-LABEL: mul_4i32_commute:
58 ; X86-NEXT: pmulld %xmm1, %xmm0
59 ; X86-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
62 ; X64-LABEL: mul_4i32_commute:
64 ; X64-NEXT: pmulld %xmm1, %xmm0
65 ; X64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
67 %1 = mul <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %a0
68 %2 = mul <4 x i32> <i32 4, i32 3, i32 2, i32 1>, %a1
69 %3 = mul <4 x i32> %1, %2
73 define <4 x i32> @and_4i32(<4 x i32> %a0, <4 x i32> %a1) {
74 ; X86-LABEL: and_4i32:
76 ; X86-NEXT: andps %xmm1, %xmm0
77 ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
80 ; X64-LABEL: and_4i32:
82 ; X64-NEXT: andps %xmm1, %xmm0
83 ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
85 %1 = and <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
86 %2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
87 %3 = and <4 x i32> %1, %2
91 define <4 x i32> @and_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
92 ; X86-LABEL: and_4i32_commute:
94 ; X86-NEXT: andps %xmm1, %xmm0
95 ; X86-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
98 ; X64-LABEL: and_4i32_commute:
100 ; X64-NEXT: andps %xmm1, %xmm0
101 ; X64-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
103 %1 = and <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
104 %2 = and <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
105 %3 = and <4 x i32> %1, %2
109 define <4 x i32> @or_4i32(<4 x i32> %a0, <4 x i32> %a1) {
110 ; X86-LABEL: or_4i32:
112 ; X86-NEXT: orps %xmm1, %xmm0
113 ; X86-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
116 ; X64-LABEL: or_4i32:
118 ; X64-NEXT: orps %xmm1, %xmm0
119 ; X64-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
121 %1 = or <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
122 %2 = or <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
123 %3 = or <4 x i32> %1, %2
127 define <4 x i32> @or_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
128 ; X86-LABEL: or_4i32_commute:
130 ; X86-NEXT: orps %xmm1, %xmm0
131 ; X86-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
134 ; X64-LABEL: or_4i32_commute:
136 ; X64-NEXT: orps %xmm1, %xmm0
137 ; X64-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
139 %1 = or <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
140 %2 = or <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
141 %3 = or <4 x i32> %1, %2
145 define <4 x i32> @xor_4i32(<4 x i32> %a0, <4 x i32> %a1) {
146 ; X86-LABEL: xor_4i32:
148 ; X86-NEXT: xorps %xmm1, %xmm0
149 ; X86-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
152 ; X64-LABEL: xor_4i32:
154 ; X64-NEXT: xorps %xmm1, %xmm0
155 ; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
157 %1 = xor <4 x i32> %a0, <i32 -2, i32 -2, i32 3, i32 3>
158 %2 = xor <4 x i32> %a1, <i32 -1, i32 -1, i32 1, i32 1>
159 %3 = xor <4 x i32> %1, %2
163 define <4 x i32> @xor_4i32_commute(<4 x i32> %a0, <4 x i32> %a1) {
164 ; X86-LABEL: xor_4i32_commute:
166 ; X86-NEXT: xorps %xmm1, %xmm0
167 ; X86-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
170 ; X64-LABEL: xor_4i32_commute:
172 ; X64-NEXT: xorps %xmm1, %xmm0
173 ; X64-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
175 %1 = xor <4 x i32> <i32 -2, i32 -2, i32 3, i32 3>, %a0
176 %2 = xor <4 x i32> <i32 -1, i32 -1, i32 1, i32 1>, %a1
177 %3 = xor <4 x i32> %1, %2