1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=mmx < %s | FileCheck %s
4 ; MMX packed sub opcodes were wrongly marked as commutative.
5 ; This test checks that the operands of packed sub instructions are
6 ; never interchanged by the "Two-Address instruction pass".
8 declare { i64, double } @getFirstParam()
9 declare { i64, double } @getSecondParam()
11 define i64 @test_psubb() {
12 ; CHECK-LABEL: test_psubb:
13 ; CHECK: # %bb.0: # %entry
14 ; CHECK-NEXT: pushq %rbx
15 ; CHECK-NEXT: .cfi_def_cfa_offset 16
16 ; CHECK-NEXT: .cfi_offset %rbx, -16
17 ; CHECK-NEXT: callq getFirstParam
18 ; CHECK-NEXT: movq %rax, %rbx
19 ; CHECK-NEXT: callq getSecondParam
20 ; CHECK-NEXT: movq %rbx, %mm0
21 ; CHECK-NEXT: movq %rax, %mm1
22 ; CHECK-NEXT: psubb %mm1, %mm0
23 ; CHECK-NEXT: movq %mm0, %rax
24 ; CHECK-NEXT: popq %rbx
25 ; CHECK-NEXT: .cfi_def_cfa_offset 8
28 %call = tail call { i64, double } @getFirstParam()
29 %0 = extractvalue { i64, double } %call, 0
30 %call2 = tail call { i64, double } @getSecondParam()
31 %1 = extractvalue { i64, double } %call2, 0
32 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
33 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
34 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
35 %3 = bitcast <8 x i8> %2 to x86_mmx
36 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
37 %5 = bitcast <8 x i8> %4 to x86_mmx
38 %6 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %3, x86_mmx %5) nounwind
39 %7 = bitcast x86_mmx %6 to <8 x i8>
40 %8 = bitcast <8 x i8> %7 to <1 x i64>
41 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
42 ret i64 %retval.0.extract.i15
45 define i64 @test_psubw() {
46 ; CHECK-LABEL: test_psubw:
47 ; CHECK: # %bb.0: # %entry
48 ; CHECK-NEXT: pushq %rbx
49 ; CHECK-NEXT: .cfi_def_cfa_offset 16
50 ; CHECK-NEXT: .cfi_offset %rbx, -16
51 ; CHECK-NEXT: callq getFirstParam
52 ; CHECK-NEXT: movq %rax, %rbx
53 ; CHECK-NEXT: callq getSecondParam
54 ; CHECK-NEXT: movq %rbx, %mm0
55 ; CHECK-NEXT: movq %rax, %mm1
56 ; CHECK-NEXT: psubw %mm1, %mm0
57 ; CHECK-NEXT: movq %mm0, %rax
58 ; CHECK-NEXT: popq %rbx
59 ; CHECK-NEXT: .cfi_def_cfa_offset 8
62 %call = tail call { i64, double } @getFirstParam()
63 %0 = extractvalue { i64, double } %call, 0
64 %call2 = tail call { i64, double } @getSecondParam()
65 %1 = extractvalue { i64, double } %call2, 0
66 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
67 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
68 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
69 %3 = bitcast <4 x i16> %2 to x86_mmx
70 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
71 %5 = bitcast <4 x i16> %4 to x86_mmx
72 %6 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %3, x86_mmx %5) nounwind
73 %7 = bitcast x86_mmx %6 to <4 x i16>
74 %8 = bitcast <4 x i16> %7 to <1 x i64>
75 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
76 ret i64 %retval.0.extract.i15
79 define i64 @test_psubd() {
80 ; CHECK-LABEL: test_psubd:
81 ; CHECK: # %bb.0: # %entry
82 ; CHECK-NEXT: pushq %rbx
83 ; CHECK-NEXT: .cfi_def_cfa_offset 16
84 ; CHECK-NEXT: .cfi_offset %rbx, -16
85 ; CHECK-NEXT: callq getFirstParam
86 ; CHECK-NEXT: movq %rax, %rbx
87 ; CHECK-NEXT: callq getSecondParam
88 ; CHECK-NEXT: movq %rbx, %mm0
89 ; CHECK-NEXT: movq %rax, %mm1
90 ; CHECK-NEXT: psubd %mm1, %mm0
91 ; CHECK-NEXT: movq %mm0, %rax
92 ; CHECK-NEXT: popq %rbx
93 ; CHECK-NEXT: .cfi_def_cfa_offset 8
96 %call = tail call { i64, double } @getFirstParam()
97 %0 = extractvalue { i64, double } %call, 0
98 %call2 = tail call { i64, double } @getSecondParam()
99 %1 = extractvalue { i64, double } %call2, 0
100 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
101 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
102 %2 = bitcast <1 x i64> %__m1.0.insert.i to <2 x i32>
103 %3 = bitcast <2 x i32> %2 to x86_mmx
104 %4 = bitcast <1 x i64> %__m2.0.insert.i to <2 x i32>
105 %5 = bitcast <2 x i32> %4 to x86_mmx
106 %6 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %3, x86_mmx %5) nounwind
107 %7 = bitcast x86_mmx %6 to <2 x i32>
108 %8 = bitcast <2 x i32> %7 to <1 x i64>
109 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
110 ret i64 %retval.0.extract.i15
113 define i64 @test_psubsb() {
114 ; CHECK-LABEL: test_psubsb:
115 ; CHECK: # %bb.0: # %entry
116 ; CHECK-NEXT: pushq %rbx
117 ; CHECK-NEXT: .cfi_def_cfa_offset 16
118 ; CHECK-NEXT: .cfi_offset %rbx, -16
119 ; CHECK-NEXT: callq getFirstParam
120 ; CHECK-NEXT: movq %rax, %rbx
121 ; CHECK-NEXT: callq getSecondParam
122 ; CHECK-NEXT: movq %rbx, %mm0
123 ; CHECK-NEXT: movq %rax, %mm1
124 ; CHECK-NEXT: psubsb %mm1, %mm0
125 ; CHECK-NEXT: movq %mm0, %rax
126 ; CHECK-NEXT: popq %rbx
127 ; CHECK-NEXT: .cfi_def_cfa_offset 8
130 %call = tail call { i64, double } @getFirstParam()
131 %0 = extractvalue { i64, double } %call, 0
132 %call2 = tail call { i64, double } @getSecondParam()
133 %1 = extractvalue { i64, double } %call2, 0
134 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
135 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
136 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
137 %3 = bitcast <8 x i8> %2 to x86_mmx
138 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
139 %5 = bitcast <8 x i8> %4 to x86_mmx
140 %6 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %3, x86_mmx %5) nounwind
141 %7 = bitcast x86_mmx %6 to <8 x i8>
142 %8 = bitcast <8 x i8> %7 to <1 x i64>
143 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
144 ret i64 %retval.0.extract.i15
147 define i64 @test_psubswv() {
148 ; CHECK-LABEL: test_psubswv:
149 ; CHECK: # %bb.0: # %entry
150 ; CHECK-NEXT: pushq %rbx
151 ; CHECK-NEXT: .cfi_def_cfa_offset 16
152 ; CHECK-NEXT: .cfi_offset %rbx, -16
153 ; CHECK-NEXT: callq getFirstParam
154 ; CHECK-NEXT: movq %rax, %rbx
155 ; CHECK-NEXT: callq getSecondParam
156 ; CHECK-NEXT: movq %rbx, %mm0
157 ; CHECK-NEXT: movq %rax, %mm1
158 ; CHECK-NEXT: psubsw %mm1, %mm0
159 ; CHECK-NEXT: movq %mm0, %rax
160 ; CHECK-NEXT: popq %rbx
161 ; CHECK-NEXT: .cfi_def_cfa_offset 8
164 %call = tail call { i64, double } @getFirstParam()
165 %0 = extractvalue { i64, double } %call, 0
166 %call2 = tail call { i64, double } @getSecondParam()
167 %1 = extractvalue { i64, double } %call2, 0
168 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
169 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
170 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
171 %3 = bitcast <4 x i16> %2 to x86_mmx
172 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
173 %5 = bitcast <4 x i16> %4 to x86_mmx
174 %6 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %3, x86_mmx %5) nounwind
175 %7 = bitcast x86_mmx %6 to <4 x i16>
176 %8 = bitcast <4 x i16> %7 to <1 x i64>
177 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
178 ret i64 %retval.0.extract.i15
181 define i64 @test_psubusbv() {
182 ; CHECK-LABEL: test_psubusbv:
183 ; CHECK: # %bb.0: # %entry
184 ; CHECK-NEXT: pushq %rbx
185 ; CHECK-NEXT: .cfi_def_cfa_offset 16
186 ; CHECK-NEXT: .cfi_offset %rbx, -16
187 ; CHECK-NEXT: callq getFirstParam
188 ; CHECK-NEXT: movq %rax, %rbx
189 ; CHECK-NEXT: callq getSecondParam
190 ; CHECK-NEXT: movq %rbx, %mm0
191 ; CHECK-NEXT: movq %rax, %mm1
192 ; CHECK-NEXT: psubusb %mm1, %mm0
193 ; CHECK-NEXT: movq %mm0, %rax
194 ; CHECK-NEXT: popq %rbx
195 ; CHECK-NEXT: .cfi_def_cfa_offset 8
198 %call = tail call { i64, double } @getFirstParam()
199 %0 = extractvalue { i64, double } %call, 0
200 %call2 = tail call { i64, double } @getSecondParam()
201 %1 = extractvalue { i64, double } %call2, 0
202 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
203 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
204 %2 = bitcast <1 x i64> %__m1.0.insert.i to <8 x i8>
205 %3 = bitcast <8 x i8> %2 to x86_mmx
206 %4 = bitcast <1 x i64> %__m2.0.insert.i to <8 x i8>
207 %5 = bitcast <8 x i8> %4 to x86_mmx
208 %6 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %3, x86_mmx %5) nounwind
209 %7 = bitcast x86_mmx %6 to <8 x i8>
210 %8 = bitcast <8 x i8> %7 to <1 x i64>
211 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
212 ret i64 %retval.0.extract.i15
215 define i64 @test_psubuswv() {
216 ; CHECK-LABEL: test_psubuswv:
217 ; CHECK: # %bb.0: # %entry
218 ; CHECK-NEXT: pushq %rbx
219 ; CHECK-NEXT: .cfi_def_cfa_offset 16
220 ; CHECK-NEXT: .cfi_offset %rbx, -16
221 ; CHECK-NEXT: callq getFirstParam
222 ; CHECK-NEXT: movq %rax, %rbx
223 ; CHECK-NEXT: callq getSecondParam
224 ; CHECK-NEXT: movq %rbx, %mm0
225 ; CHECK-NEXT: movq %rax, %mm1
226 ; CHECK-NEXT: psubusw %mm1, %mm0
227 ; CHECK-NEXT: movq %mm0, %rax
228 ; CHECK-NEXT: popq %rbx
229 ; CHECK-NEXT: .cfi_def_cfa_offset 8
232 %call = tail call { i64, double } @getFirstParam()
233 %0 = extractvalue { i64, double } %call, 0
234 %call2 = tail call { i64, double } @getSecondParam()
235 %1 = extractvalue { i64, double } %call2, 0
236 %__m1.0.insert.i = insertelement <1 x i64> undef, i64 %0, i32 0
237 %__m2.0.insert.i = insertelement <1 x i64> undef, i64 %1, i32 0
238 %2 = bitcast <1 x i64> %__m1.0.insert.i to <4 x i16>
239 %3 = bitcast <4 x i16> %2 to x86_mmx
240 %4 = bitcast <1 x i64> %__m2.0.insert.i to <4 x i16>
241 %5 = bitcast <4 x i16> %4 to x86_mmx
242 %6 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %3, x86_mmx %5) nounwind
243 %7 = bitcast x86_mmx %6 to <4 x i16>
244 %8 = bitcast <4 x i16> %7 to <1 x i64>
245 %retval.0.extract.i15 = extractelement <1 x i64> %8, i32 0
246 ret i64 %retval.0.extract.i15
249 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
251 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
253 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
255 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
257 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
259 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
261 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone