1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64
5 define void @commute_m_pfadd(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
6 ; X32-LABEL: commute_m_pfadd:
8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
9 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
11 ; X32-NEXT: movq (%edx), %mm0
12 ; X32-NEXT: pfadd (%eax), %mm0
13 ; X32-NEXT: pfadd (%ecx), %mm0
14 ; X32-NEXT: movq %mm0, (%ecx)
17 ; X64-LABEL: commute_m_pfadd:
19 ; X64-NEXT: movq (%rdi), %mm0
20 ; X64-NEXT: pfadd (%rsi), %mm0
21 ; X64-NEXT: pfadd (%rdx), %mm0
22 ; X64-NEXT: movq %mm0, (%rdx)
24 %1 = load x86_mmx, x86_mmx* %a0
25 %2 = load x86_mmx, x86_mmx* %a1
26 %3 = load x86_mmx, x86_mmx* %a2
27 %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2)
28 %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4)
29 store x86_mmx %5, x86_mmx* %a2
32 declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx)
34 define void @commute_m_pfsub(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
35 ; X32-LABEL: commute_m_pfsub:
37 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
38 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
39 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
40 ; X32-NEXT: movq (%edx), %mm0
41 ; X32-NEXT: pfsub (%eax), %mm0
42 ; X32-NEXT: pfsubr (%ecx), %mm0
43 ; X32-NEXT: movq %mm0, (%ecx)
46 ; X64-LABEL: commute_m_pfsub:
48 ; X64-NEXT: movq (%rdi), %mm0
49 ; X64-NEXT: pfsub (%rsi), %mm0
50 ; X64-NEXT: pfsubr (%rdx), %mm0
51 ; X64-NEXT: movq %mm0, (%rdx)
53 %1 = load x86_mmx, x86_mmx* %a0
54 %2 = load x86_mmx, x86_mmx* %a1
55 %3 = load x86_mmx, x86_mmx* %a2
56 %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2)
57 %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4)
58 store x86_mmx %5, x86_mmx* %a2
61 declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx)
63 define void @commute_m_pfsubr(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
64 ; X32-LABEL: commute_m_pfsubr:
66 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
67 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
68 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
69 ; X32-NEXT: movq (%edx), %mm0
70 ; X32-NEXT: pfsubr (%eax), %mm0
71 ; X32-NEXT: pfsub (%ecx), %mm0
72 ; X32-NEXT: movq %mm0, (%ecx)
75 ; X64-LABEL: commute_m_pfsubr:
77 ; X64-NEXT: movq (%rdi), %mm0
78 ; X64-NEXT: pfsubr (%rsi), %mm0
79 ; X64-NEXT: pfsub (%rdx), %mm0
80 ; X64-NEXT: movq %mm0, (%rdx)
82 %1 = load x86_mmx, x86_mmx* %a0
83 %2 = load x86_mmx, x86_mmx* %a1
84 %3 = load x86_mmx, x86_mmx* %a2
85 %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2)
86 %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4)
87 store x86_mmx %5, x86_mmx* %a2
90 declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx)
92 define void @commute_m_pfmul(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
93 ; X32-LABEL: commute_m_pfmul:
95 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
96 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
97 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
98 ; X32-NEXT: movq (%edx), %mm0
99 ; X32-NEXT: pfmul (%eax), %mm0
100 ; X32-NEXT: pfmul (%ecx), %mm0
101 ; X32-NEXT: movq %mm0, (%ecx)
104 ; X64-LABEL: commute_m_pfmul:
106 ; X64-NEXT: movq (%rdi), %mm0
107 ; X64-NEXT: pfmul (%rsi), %mm0
108 ; X64-NEXT: pfmul (%rdx), %mm0
109 ; X64-NEXT: movq %mm0, (%rdx)
111 %1 = load x86_mmx, x86_mmx* %a0
112 %2 = load x86_mmx, x86_mmx* %a1
113 %3 = load x86_mmx, x86_mmx* %a2
114 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2)
115 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4)
116 store x86_mmx %5, x86_mmx* %a2
119 declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx)
121 ; PFMAX can't commute without fast-math.
122 define void @commute_m_pfmax(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
123 ; X32-LABEL: commute_m_pfmax:
125 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
127 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
128 ; X32-NEXT: movq (%edx), %mm0
129 ; X32-NEXT: movq (%ecx), %mm1
130 ; X32-NEXT: pfmax (%eax), %mm0
131 ; X32-NEXT: pfmax %mm0, %mm1
132 ; X32-NEXT: movq %mm1, (%ecx)
135 ; X64-LABEL: commute_m_pfmax:
137 ; X64-NEXT: movq (%rdi), %mm0
138 ; X64-NEXT: movq (%rdx), %mm1
139 ; X64-NEXT: pfmax (%rsi), %mm0
140 ; X64-NEXT: pfmax %mm0, %mm1
141 ; X64-NEXT: movq %mm1, (%rdx)
143 %1 = load x86_mmx, x86_mmx* %a0
144 %2 = load x86_mmx, x86_mmx* %a1
145 %3 = load x86_mmx, x86_mmx* %a2
146 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2)
147 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4)
148 store x86_mmx %5, x86_mmx* %a2
151 declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx)
153 ; PFMIN can't commute without fast-math.
154 define void @commute_m_pfmin(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
155 ; X32-LABEL: commute_m_pfmin:
157 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
159 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
160 ; X32-NEXT: movq (%edx), %mm0
161 ; X32-NEXT: movq (%ecx), %mm1
162 ; X32-NEXT: pfmin (%eax), %mm0
163 ; X32-NEXT: pfmin %mm0, %mm1
164 ; X32-NEXT: movq %mm1, (%ecx)
167 ; X64-LABEL: commute_m_pfmin:
169 ; X64-NEXT: movq (%rdi), %mm0
170 ; X64-NEXT: movq (%rdx), %mm1
171 ; X64-NEXT: pfmin (%rsi), %mm0
172 ; X64-NEXT: pfmin %mm0, %mm1
173 ; X64-NEXT: movq %mm1, (%rdx)
175 %1 = load x86_mmx, x86_mmx* %a0
176 %2 = load x86_mmx, x86_mmx* %a1
177 %3 = load x86_mmx, x86_mmx* %a2
178 %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2)
179 %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4)
180 store x86_mmx %5, x86_mmx* %a2
183 declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx)
185 define void @commute_m_pfcmpeq(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
186 ; X32-LABEL: commute_m_pfcmpeq:
188 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
189 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
190 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
191 ; X32-NEXT: movq (%edx), %mm0
192 ; X32-NEXT: pfcmpeq (%eax), %mm0
193 ; X32-NEXT: pfcmpeq (%ecx), %mm0
194 ; X32-NEXT: movq %mm0, (%ecx)
197 ; X64-LABEL: commute_m_pfcmpeq:
199 ; X64-NEXT: movq (%rdi), %mm0
200 ; X64-NEXT: pfcmpeq (%rsi), %mm0
201 ; X64-NEXT: pfcmpeq (%rdx), %mm0
202 ; X64-NEXT: movq %mm0, (%rdx)
204 %1 = load x86_mmx, x86_mmx* %a0
205 %2 = load x86_mmx, x86_mmx* %a1
206 %3 = load x86_mmx, x86_mmx* %a2
207 %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2)
208 %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4)
209 store x86_mmx %5, x86_mmx* %a2
212 declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx)
214 define void @commute_m_pavgusb(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
215 ; X32-LABEL: commute_m_pavgusb:
217 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
218 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
219 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
220 ; X32-NEXT: movq (%edx), %mm0
221 ; X32-NEXT: pavgusb (%eax), %mm0
222 ; X32-NEXT: pavgusb (%ecx), %mm0
223 ; X32-NEXT: movq %mm0, (%ecx)
226 ; X64-LABEL: commute_m_pavgusb:
228 ; X64-NEXT: movq (%rdi), %mm0
229 ; X64-NEXT: pavgusb (%rsi), %mm0
230 ; X64-NEXT: pavgusb (%rdx), %mm0
231 ; X64-NEXT: movq %mm0, (%rdx)
233 %1 = load x86_mmx, x86_mmx* %a0
234 %2 = load x86_mmx, x86_mmx* %a1
235 %3 = load x86_mmx, x86_mmx* %a2
236 %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2)
237 %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4)
238 store x86_mmx %5, x86_mmx* %a2
241 declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx)
243 define void @commute_m_pmulhrw(x86_mmx *%a0, x86_mmx *%a1, x86_mmx *%a2) nounwind {
244 ; X32-LABEL: commute_m_pmulhrw:
246 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
247 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
248 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
249 ; X32-NEXT: movq (%edx), %mm0
250 ; X32-NEXT: pmulhrw (%eax), %mm0
251 ; X32-NEXT: pmulhrw (%ecx), %mm0
252 ; X32-NEXT: movq %mm0, (%ecx)
255 ; X64-LABEL: commute_m_pmulhrw:
257 ; X64-NEXT: movq (%rdi), %mm0
258 ; X64-NEXT: pmulhrw (%rsi), %mm0
259 ; X64-NEXT: pmulhrw (%rdx), %mm0
260 ; X64-NEXT: movq %mm0, (%rdx)
262 %1 = load x86_mmx, x86_mmx* %a0
263 %2 = load x86_mmx, x86_mmx* %a1
264 %3 = load x86_mmx, x86_mmx* %a2
265 %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2)
266 %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4)
267 store x86_mmx %5, x86_mmx* %a2
270 declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx)