1 ; RUN: llc < %s -march=x86 -mattr=+mmx
3 ;; A basic sanity check to make sure that MMX arithmetic actually compiles.
4 ;; First is a straight translation of the original with bitcasts as needed.
6 define void @foo(x86_mmx* %A, x86_mmx* %B) {
8 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
9 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
10 %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
11 %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
12 %tmp4 = add <8 x i8> %tmp1a, %tmp3a ; <<8 x i8>> [#uses=2]
13 %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
14 store x86_mmx %tmp4a, x86_mmx* %A
15 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
16 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
17 store x86_mmx %tmp12, x86_mmx* %A
18 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
19 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
20 store x86_mmx %tmp21, x86_mmx* %A
21 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
22 %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
23 %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
24 %tmp28 = sub <8 x i8> %tmp21a, %tmp27a ; <<8 x i8>> [#uses=2]
25 %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
26 store x86_mmx %tmp28a, x86_mmx* %A
27 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
28 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
29 store x86_mmx %tmp36, x86_mmx* %A
30 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
31 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
32 store x86_mmx %tmp45, x86_mmx* %A
33 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
34 %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
35 %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
36 %tmp52 = mul <8 x i8> %tmp45a, %tmp51a ; <<8 x i8>> [#uses=2]
37 %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
38 store x86_mmx %tmp52a, x86_mmx* %A
39 %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
40 %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
41 %tmp58 = and <8 x i8> %tmp52, %tmp57a ; <<8 x i8>> [#uses=2]
42 %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
43 store x86_mmx %tmp58a, x86_mmx* %A
44 %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
45 %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
46 %tmp64 = or <8 x i8> %tmp58, %tmp63a ; <<8 x i8>> [#uses=2]
47 %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
48 store x86_mmx %tmp64a, x86_mmx* %A
49 %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
50 %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
51 %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
52 %tmp70 = xor <8 x i8> %tmp64b, %tmp69a ; <<8 x i8>> [#uses=1]
53 %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
54 store x86_mmx %tmp70a, x86_mmx* %A
55 tail call void @llvm.x86.mmx.emms( )
59 define void @baz(x86_mmx* %A, x86_mmx* %B) {
61 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
62 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
63 %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
64 %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
65 %tmp4 = add <2 x i32> %tmp1a, %tmp3a ; <<2 x i32>> [#uses=2]
66 %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
67 store x86_mmx %tmp4a, x86_mmx* %A
68 %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
69 %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
70 %tmp10 = sub <2 x i32> %tmp4, %tmp9a ; <<2 x i32>> [#uses=2]
71 %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
72 store x86_mmx %tmp10a, x86_mmx* %A
73 %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
74 %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
75 %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
76 %tmp16 = mul <2 x i32> %tmp10b, %tmp15a ; <<2 x i32>> [#uses=2]
77 %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
78 store x86_mmx %tmp16a, x86_mmx* %A
79 %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
80 %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
81 %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
82 %tmp22 = and <2 x i32> %tmp16b, %tmp21a ; <<2 x i32>> [#uses=2]
83 %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
84 store x86_mmx %tmp22a, x86_mmx* %A
85 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
86 %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
87 %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
88 %tmp28 = or <2 x i32> %tmp22b, %tmp27a ; <<2 x i32>> [#uses=2]
89 %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
90 store x86_mmx %tmp28a, x86_mmx* %A
91 %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
92 %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
93 %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
94 %tmp34 = xor <2 x i32> %tmp28b, %tmp33a ; <<2 x i32>> [#uses=1]
95 %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
96 store x86_mmx %tmp34a, x86_mmx* %A
97 tail call void @llvm.x86.mmx.emms( )
101 define void @bar(x86_mmx* %A, x86_mmx* %B) {
103 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
104 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
105 %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
106 %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
107 %tmp4 = add <4 x i16> %tmp1a, %tmp3a ; <<4 x i16>> [#uses=2]
108 %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
109 store x86_mmx %tmp4a, x86_mmx* %A
110 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
111 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
112 store x86_mmx %tmp12, x86_mmx* %A
113 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
114 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
115 store x86_mmx %tmp21, x86_mmx* %A
116 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
117 %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
118 %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
119 %tmp28 = sub <4 x i16> %tmp21a, %tmp27a ; <<4 x i16>> [#uses=2]
120 %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
121 store x86_mmx %tmp28a, x86_mmx* %A
122 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
123 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
124 store x86_mmx %tmp36, x86_mmx* %A
125 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
126 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
127 store x86_mmx %tmp45, x86_mmx* %A
128 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
129 %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
130 %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
131 %tmp52 = mul <4 x i16> %tmp45a, %tmp51a ; <<4 x i16>> [#uses=2]
132 %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
133 store x86_mmx %tmp52a, x86_mmx* %A
134 %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
135 %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
136 store x86_mmx %tmp60, x86_mmx* %A
137 %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
138 %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
139 %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
140 store x86_mmx %tmp70, x86_mmx* %A
141 %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
142 %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
143 %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
144 %tmp76 = and <4 x i16> %tmp70a, %tmp75a ; <<4 x i16>> [#uses=2]
145 %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
146 store x86_mmx %tmp76a, x86_mmx* %A
147 %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
148 %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
149 %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
150 %tmp82 = or <4 x i16> %tmp76b, %tmp81a ; <<4 x i16>> [#uses=2]
151 %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
152 store x86_mmx %tmp82a, x86_mmx* %A
153 %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
154 %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
155 %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
156 %tmp88 = xor <4 x i16> %tmp82b, %tmp87a ; <<4 x i16>> [#uses=1]
157 %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
158 store x86_mmx %tmp88a, x86_mmx* %A
159 tail call void @llvm.x86.mmx.emms( )
163 ;; The following is modified to use MMX intrinsics everywhere they work.
165 define void @fooa(x86_mmx* %A, x86_mmx* %B) {
167 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
168 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
169 %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
170 store x86_mmx %tmp4, x86_mmx* %A
171 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
172 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
173 store x86_mmx %tmp12, x86_mmx* %A
174 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
175 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
176 store x86_mmx %tmp21, x86_mmx* %A
177 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
178 %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
179 store x86_mmx %tmp28, x86_mmx* %A
180 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
181 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
182 store x86_mmx %tmp36, x86_mmx* %A
183 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
184 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
185 store x86_mmx %tmp45, x86_mmx* %A
186 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
187 %tmp51a = bitcast x86_mmx %tmp51 to i64
188 %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
189 %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
190 %tmp52 = mul <8 x i8> %tmp51b, %tmp51aa ; <x86_mmx> [#uses=2]
191 %tmp52a = bitcast <8 x i8> %tmp52 to i64
192 %tmp52aa = bitcast i64 %tmp52a to x86_mmx
193 store x86_mmx %tmp52aa, x86_mmx* %A
194 %tmp57 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
195 %tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 ) ; <x86_mmx> [#uses=2]
196 store x86_mmx %tmp58, x86_mmx* %A
197 %tmp63 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
198 %tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 ) ; <x86_mmx> [#uses=2]
199 store x86_mmx %tmp64, x86_mmx* %A
200 %tmp69 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
201 %tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 ) ; <x86_mmx> [#uses=2]
202 store x86_mmx %tmp70, x86_mmx* %A
203 tail call void @llvm.x86.mmx.emms( )
207 define void @baza(x86_mmx* %A, x86_mmx* %B) {
209 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
210 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
211 %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
212 store x86_mmx %tmp4, x86_mmx* %A
213 %tmp9 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
214 %tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 ) ; <x86_mmx> [#uses=2]
215 store x86_mmx %tmp10, x86_mmx* %A
216 %tmp15 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
217 %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
218 %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
219 %tmp16 = mul <2 x i32> %tmp10a, %tmp15a ; <x86_mmx> [#uses=2]
220 %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
221 store x86_mmx %tmp16a, x86_mmx* %A
222 %tmp21 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
223 %tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 ) ; <x86_mmx> [#uses=2]
224 store x86_mmx %tmp22, x86_mmx* %A
225 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
226 %tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
227 store x86_mmx %tmp28, x86_mmx* %A
228 %tmp33 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
229 %tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 ) ; <x86_mmx> [#uses=2]
230 store x86_mmx %tmp34, x86_mmx* %A
231 tail call void @llvm.x86.mmx.emms( )
235 define void @bara(x86_mmx* %A, x86_mmx* %B) {
237 %tmp1 = load x86_mmx* %A ; <x86_mmx> [#uses=1]
238 %tmp3 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
239 %tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 ) ; <x86_mmx> [#uses=2]
240 store x86_mmx %tmp4, x86_mmx* %A
241 %tmp7 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
242 %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 ) ; <x86_mmx> [#uses=2]
243 store x86_mmx %tmp12, x86_mmx* %A
244 %tmp16 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
245 %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 ) ; <x86_mmx> [#uses=2]
246 store x86_mmx %tmp21, x86_mmx* %A
247 %tmp27 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
248 %tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 ) ; <x86_mmx> [#uses=2]
249 store x86_mmx %tmp28, x86_mmx* %A
250 %tmp31 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
251 %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 ) ; <x86_mmx> [#uses=2]
252 store x86_mmx %tmp36, x86_mmx* %A
253 %tmp40 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
254 %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 ) ; <x86_mmx> [#uses=2]
255 store x86_mmx %tmp45, x86_mmx* %A
256 %tmp51 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
257 %tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 ) ; <x86_mmx> [#uses=2]
258 store x86_mmx %tmp52, x86_mmx* %A
259 %tmp55 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
260 %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 ) ; <x86_mmx> [#uses=2]
261 store x86_mmx %tmp60, x86_mmx* %A
262 %tmp64 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
263 %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 ) ; <x86_mmx> [#uses=1]
264 %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx ; <x86_mmx> [#uses=2]
265 store x86_mmx %tmp70, x86_mmx* %A
266 %tmp75 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
267 %tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 ) ; <x86_mmx> [#uses=2]
268 store x86_mmx %tmp76, x86_mmx* %A
269 %tmp81 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
270 %tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 ) ; <x86_mmx> [#uses=2]
271 store x86_mmx %tmp82, x86_mmx* %A
272 %tmp87 = load x86_mmx* %B ; <x86_mmx> [#uses=1]
273 %tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 ) ; <x86_mmx> [#uses=2]
274 store x86_mmx %tmp88, x86_mmx* %A
275 tail call void @llvm.x86.mmx.emms( )
279 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
281 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
283 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
285 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
287 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
289 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
291 declare void @llvm.x86.mmx.emms()
293 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
294 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
295 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
296 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
297 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
298 declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
299 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
300 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
301 declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
302 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
303 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
304 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
305 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
306 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
307 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
308 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)