Speech bubbles can point down right.
[scummvm-innocent.git] / sound / softsynth / mt32 / i386.cpp
blobf092189d76c1766d28221bc88960009f19316633
1 /* Copyright (c) 2003-2005 Various contributors
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
22 #include "mt32emu.h"
24 #ifdef MT32EMU_HAVE_X86
26 namespace MT32Emu {
28 #ifndef _MSC_VER
30 #define eflag(value) __asm__ __volatile__("pushfl \n popfl \n" : : "a"(value))
31 #define cpuid_flag (1 << 21)
33 static inline bool atti386_DetectCPUID() {
34 unsigned int result;
36 // Is there a cpuid?
37 result = cpuid_flag; // set test
38 eflag(result);
39 if (!(result & cpuid_flag))
40 return false;
42 result = 0; // clear test
43 eflag(result);
44 if (result & cpuid_flag)
45 return false;
47 return true;
50 static inline bool atti386_DetectSIMD() {
51 unsigned int result;
53 if (atti386_DetectCPUID() == false)
54 return false;
56 /* check cpuid */
57 __asm__ __volatile__(
58 "pushl %%ebx \n" \
59 "movl $1, %%eax \n" \
60 "cpuid \n" \
61 "movl %%edx, %0 \n" \
62 "popl %%ebx \n" \
63 : "=r"(result) : : "eax", "ecx", "edx");
65 if (result & (1 << 25))
66 return true;
68 return false;
71 static inline bool atti386_Detect3DNow() {
72 unsigned int result;
74 if (atti386_DetectCPUID() == false)
75 return false;
77 // get cpuid
78 __asm__ __volatile__(
79 "pushl %%ebx \n" \
80 "movl $0x80000001, %%eax \n" \
81 "cpuid \n" \
82 "movl %%edx, %0 \n" \
83 "popl %%ebx \n" \
84 : "=r"(result) : : "eax", "ecx", "edx");
86 if (result & 0x80000000)
87 return true;
89 return false;
93 static inline float atti386_iir_filter_sse(float *output, float *hist1_ptr, float *coef_ptr) {
94 __asm__ __volatile__ (
95 "pushl %1 \n" \
96 "pushl %2 \n" \
97 "movss 0(%0), %%xmm1 \n" \
98 "movups 0(%1), %%xmm2 \n" \
99 "movlps 0(%2), %%xmm3 \n" \
100 " \n" \
101 "shufps $0x44, %%xmm3, %%xmm3 \n" \
102 " \n" \
103 "mulps %%xmm3, %%xmm2 \n" \
104 " \n" \
105 "subss %%xmm2, %%xmm1 \n" \
106 "shufps $0x39, %%xmm2, %%xmm2 \n" \
107 "subss %%xmm2, %%xmm1 \n" \
108 " \n" \
109 "movss %%xmm1, 0(%2) \n" \
110 " \n" \
111 "shufps $0x39, %%xmm2, %%xmm2 \n" \
112 "addss %%xmm2, %%xmm1 \n" \
113 " \n" \
114 "shufps $0x39, %%xmm2, %%xmm2 \n" \
115 "addss %%xmm2, %%xmm1 \n" \
116 " \n" \
117 "movss %%xmm3, 4(%2) \n" \
118 " \n" \
119 "addl $16, %1 \n" \
120 "addl $8, %2 \n" \
121 " \n" \
122 "movups 0(%1), %%xmm2 \n" \
123 " \n" \
124 "movlps 0(%2), %%xmm3 \n" \
125 "shufps $0x44, %%xmm3, %%xmm3 \n" \
126 " \n" \
127 "mulps %%xmm3, %%xmm2 \n" \
128 " \n" \
129 "subss %%xmm2, %%xmm1 \n" \
130 "shufps $0x39, %%xmm2, %%xmm2 \n" \
131 "subss %%xmm2, %%xmm1 \n" \
132 " \n" \
133 "movss %%xmm1, 0(%2) \n" \
134 " \n" \
135 "shufps $0x39, %%xmm2, %%xmm2 \n" \
136 "addss %%xmm2, %%xmm1 \n" \
137 " \n" \
138 "shufps $0x39, %%xmm2, %%xmm2 \n" \
139 "addss %%xmm2, %%xmm1 \n" \
140 " \n" \
141 "movss %%xmm3, 4(%2) \n" \
142 "movss %%xmm1, 0(%0) \n" \
143 "popl %2 \n" \
144 "popl %1 \n" \
145 : : "r"(output), "r"(coef_ptr), "r"(hist1_ptr)
146 : "memory"
147 #ifdef __SSE__
148 , "xmm1", "xmm2", "xmm3"
149 #endif
152 return *output;
155 static inline float atti386_iir_filter_3DNow(float output, float *hist1_ptr, float *coef_ptr) {
156 float tmp;
158 __asm__ __volatile__ (
159 "movq %0, %%mm1 \n" \
160 " \n" \
161 "movl %1, %%edi \n" \
162 "movq 0(%%edi), %%mm2 \n" \
163 " \n" \
164 "movl %2, %%eax; \n" \
165 "movq 0(%%eax), %%mm3 \n" \
166 " \n" \
167 "pfmul %%mm3, %%mm2 \n" \
168 "pfsub %%mm2, %%mm1 \n" \
169 " \n" \
170 "psrlq $32, %%mm2 \n" \
171 "pfsub %%mm2, %%mm1 \n" \
172 " \n" \
173 "movd %%mm1, %3 \n" \
174 " \n" \
175 "addl $8, %%edi \n" \
176 "movq 0(%%edi), %%mm2 \n" \
177 "movq 0(%%eax), %%mm3 \n" \
178 " \n" \
179 "pfmul %%mm3, %%mm2 \n" \
180 "pfadd %%mm2, %%mm1 \n" \
181 " \n" \
182 "psrlq $32, %%mm2 \n" \
183 "pfadd %%mm2, %%mm1 \n" \
184 " \n" \
185 "pushl %3 \n" \
186 "popl 0(%%eax) \n" \
187 " \n" \
188 "movd %%mm3, 4(%%eax) \n" \
189 " \n" \
190 "addl $8, %%edi \n" \
191 "addl $8, %%eax \n" \
192 " \n" \
193 "movq 0(%%edi), %%mm2 \n" \
194 "movq 0(%%eax), %%mm3 \n" \
195 " \n" \
196 "pfmul %%mm3, %%mm2 \n" \
197 "pfsub %%mm2, %%mm1 \n" \
198 " \n" \
199 "psrlq $32, %%mm2 \n" \
200 "pfsub %%mm2, %%mm1 \n" \
201 " \n" \
202 "movd %%mm1, %3 \n" \
203 " \n" \
204 "addl $8, %%edi \n" \
205 "movq 0(%%edi), %%mm2 \n" \
206 "movq 0(%%eax), %%mm3 \n" \
207 " \n" \
208 "pfmul %%mm3, %%mm2 \n" \
209 "pfadd %%mm2, %%mm1 \n" \
210 " \n" \
211 "psrlq $32, %%mm2 \n" \
212 "pfadd %%mm2, %%mm1 \n" \
213 " \n" \
214 "pushl %3 \n" \
215 "popl 0(%%eax) \n" \
216 "movd %%mm3, 4(%%eax) \n" \
217 " \n" \
218 "movd %%mm1, %0 \n" \
219 "femms \n" \
220 : "=m"(output) : "g"(coef_ptr), "g"(hist1_ptr), "m"(tmp)
221 : "eax", "edi", "memory"
222 #ifdef __MMX__
223 , "mm1", "mm2", "mm3"
224 #endif
227 return output;
230 static inline void atti386_produceOutput1(int tmplen, Bit16s myvolume, Bit16s *useBuf, Bit16s *snd) {
231 __asm__ __volatile__(
232 "movl %0, %%ecx \n" \
233 "movw %1, %%ax \n" \
234 "shll $16, %%eax \n" \
235 "movw %1, %%ax \n" \
236 "movd %%eax, %%mm3 \n" \
237 "movd %%eax, %%mm2 \n" \
238 "psllq $32, %%mm3 \n" \
239 "por %%mm2, %%mm3 \n" \
240 "movl %2, %%esi \n" \
241 "movl %3, %%edi \n" \
242 "1: \n" \
243 "movq 0(%%esi), %%mm1 \n" \
244 "movq 0(%%edi), %%mm2 \n" \
245 "pmulhw %%mm3, %%mm1 \n" \
246 "paddw %%mm2, %%mm1 \n" \
247 "movq %%mm1, 0(%%edi) \n" \
248 " \n" \
249 "addl $8, %%esi \n" \
250 "addl $8, %%edi \n" \
251 " \n" \
252 "decl %%ecx \n" \
253 "cmpl $0, %%ecx \n" \
254 "jg 1b \n" \
255 "emms \n" \
256 : : "g"(tmplen), "g"(myvolume), "g"(useBuf), "g"(snd)
257 : "eax", "ecx", "edi", "esi", "memory"
258 #ifdef __MMX__
259 , "mm1", "mm2", "mm3"
260 #endif
264 static inline void atti386_produceOutput2(Bit32u len, Bit16s *snd, float *sndbufl, float *sndbufr, float *multFactor) {
265 __asm__ __volatile__(
266 "movl %4, %%ecx \n" \
267 "shrl $1, %%ecx \n" \
268 "addl $4, %%ecx \n" \
269 "pushl %%ecx \n" \
270 " \n" \
271 "movl %0, %%esi \n" \
272 "movups 0(%%esi), %%xmm1 \n" \
273 " \n" \
274 "movl %1, %%esi \n" \
275 "movl %2, %%edi \n" \
276 "1: \n" \
277 "xorl %%eax, %%eax \n" \
278 "movw 0(%1), %%ax \n" \
279 "cwde \n" \
280 "incl %1 \n" \
281 "incl %1 \n" \
282 "movd %%eax, %%mm1 \n" \
283 "psrlq $32, %%mm1 \n" \
284 "movw 0(%1), %%ax \n" \
285 "incl %1 \n" \
286 "incl %1 \n" \
287 "movd %%eax, %%mm2 \n" \
288 "por %%mm2, %%mm1 \n" \
289 " \n" \
290 "decl %%ecx \n" \
291 "jnz 1b \n" \
292 " \n" \
293 "popl %%ecx \n" \
294 "movl %1, %%esi \n" \
295 "movl %3, %%edi \n" \
296 "incl %%esi \n" \
297 "2: \n" \
298 "decl %%ecx \n" \
299 "jnz 2b \n" \
300 : : "g"(multFactor), "r"(snd), "g"(sndbufl), "g"(sndbufr), "g"(len)
301 : "eax", "ecx", "edi", "esi", "mm1", "mm2", "xmm1", "memory");
304 static inline void atti386_mixBuffers(Bit16s * buf1, Bit16s *buf2, int len) {
305 __asm__ __volatile__(
306 "movl %0, %%ecx \n" \
307 "movl %1, %%esi \n" \
308 "movl %2, %%edi \n" \
309 "1: \n" \
310 "movq 0(%%edi), %%mm1 \n" \
311 "movq 0(%%esi), %%mm2 \n" \
312 "paddw %%mm2, %%mm1 \n" \
313 "movq %%mm1, 0(%%esi) \n" \
314 "addl $8, %%edi \n" \
315 "addl $8, %%esi \n" \
316 "decl %%ecx \n" \
317 "cmpl $0, %%ecx \n" \
318 "jg 1b \n" \
319 "emms \n" \
320 : : "g"(len), "g"(buf1), "g"(buf2)
321 : "ecx", "edi", "esi", "memory"
322 #ifdef __MMX__
323 , "mm1", "mm2"
324 #endif
328 static inline void atti386_mixBuffersRingMix(Bit16s * buf1, Bit16s *buf2, int len) {
329 __asm__ __volatile__(
330 "movl %0, %%ecx \n" \
331 "movl %1, %%esi \n" \
332 "movl %2, %%edi \n" \
333 "1: \n" \
334 "movq 0(%%esi), %%mm1 \n" \
335 "movq 0(%%edi), %%mm2 \n" \
336 "movq %%mm1, %%mm3 \n" \
337 "pmulhw %%mm2, %%mm1 \n" \
338 "paddw %%mm3, %%mm1 \n" \
339 "movq %%mm1, 0(%%esi) \n" \
340 "addl $8, %%edi \n" \
341 "addl $8, %%esi \n" \
342 "decl %%ecx \n" \
343 "cmpl $0, %%ecx \n" \
344 "jg 1b \n" \
345 "emms \n" \
346 : : "g"(len), "g"(buf1), "g"(buf2)
347 : "ecx", "edi", "esi", "memory"
348 #ifdef __MMX__
349 , "mm1", "mm2", "mm3"
350 #endif
354 static inline void atti386_mixBuffersRing(Bit16s * buf1, Bit16s *buf2, int len) {
355 __asm__ __volatile__(
356 "movl %0, %%ecx \n" \
357 "movl %1, %%esi \n" \
358 "movl %2, %%edi \n" \
359 "1: \n" \
360 "movq 0(%%esi), %%mm1 \n" \
361 "movq 0(%%edi), %%mm2 \n" \
362 "pmulhw %%mm2, %%mm1 \n" \
363 "movq %%mm1, 0(%%esi) \n" \
364 "addl $8, %%edi \n" \
365 "addl $8, %%esi \n" \
366 "decl %%ecx \n" \
367 "cmpl $0, %%ecx \n" \
368 "jg 1b \n" \
369 "emms \n" \
370 : : "g"(len), "g"(buf1), "g"(buf2)
371 : "ecx", "edi", "esi", "memory"
372 #ifdef __MMX__
373 , "mm1", "mm2"
374 #endif
378 static inline void atti386_partialProductOutput(int quadlen, Bit16s leftvol, Bit16s rightvol, Bit16s *partialBuf, Bit16s *p1buf) {
379 __asm__ __volatile__(
380 "movl %0, %%ecx \n" \
381 "movw %1, %%ax \n" \
382 "shll $16, %%eax \n" \
383 "movw %2, %%ax \n" \
384 "movd %%eax, %%mm1 \n" \
385 "movd %%eax, %%mm2 \n" \
386 "psllq $32, %%mm1 \n" \
387 "por %%mm2, %%mm1 \n" \
388 "movl %3, %%edi \n" \
389 "movl %4, %%esi \n" \
390 "pushl %%ebx \n" \
391 "1: \n" \
392 "movw 0(%%esi), %%bx \n" \
393 "addl $2, %%esi \n" \
394 "movw 0(%%esi), %%dx \n" \
395 "addl $2, %%esi \n" \
396 "" \
397 "movw %%dx, %%ax \n" \
398 "shll $16, %%eax \n" \
399 "movw %%dx, %%ax \n" \
400 "movd %%eax, %%mm2 \n" \
401 "psllq $32, %%mm2 \n" \
402 "movw %%bx, %%ax \n" \
403 "shll $16, %%eax \n" \
404 "movw %%bx, %%ax \n" \
405 "movd %%eax, %%mm3 \n" \
406 "por %%mm3, %%mm2 \n" \
407 "" \
408 "pmulhw %%mm1, %%mm2 \n" \
409 "movq %%mm2, 0(%%edi) \n" \
410 "addl $8, %%edi \n" \
411 "" \
412 "decl %%ecx \n" \
413 "cmpl $0, %%ecx \n" \
414 "jg 1b \n" \
415 "emms \n" \
416 "popl %%ebx \n" \
417 : : "g"(quadlen), "g"(leftvol), "g"(rightvol), "g"(partialBuf), "g"(p1buf)
418 : "eax", "ecx", "edx", "edi", "esi", "memory"
419 #ifdef __MMX__
420 , "mm1", "mm2", "mm3"
421 #endif
425 #endif
427 bool DetectSIMD() {
428 #ifdef _MSC_VER
429 bool found_simd;
430 __asm {
431 pushfd
432 pop eax // get EFLAGS into eax
433 mov ebx,eax // keep a copy
434 xor eax,0x200000
435 // toggle CPUID bit
437 push eax
438 popfd // set new EFLAGS
439 pushfd
440 pop eax // EFLAGS back into eax
442 xor eax,ebx
443 // have we changed the ID bit?
445 je NO_SIMD
446 // No, no CPUID instruction
448 // we could toggle the
449 // ID bit so CPUID is present
450 mov eax,1
452 cpuid // get processor features
453 test edx,1<<25 // check the SIMD bit
454 jz NO_SIMD
455 mov found_simd,1
456 jmp DONE
457 NO_SIMD:
458 mov found_simd,0
459 DONE:
461 return found_simd;
462 #else
463 return atti386_DetectSIMD();
464 #endif
467 bool Detect3DNow() {
468 #ifdef _MSC_VER
469 bool found3D = false;
470 __asm {
471 pushfd
472 pop eax
473 mov edx, eax
474 xor eax, 00200000h
475 push eax
476 popfd
477 pushfd
478 pop eax
479 xor eax, edx
480 jz NO_3DNOW
482 mov eax, 80000000h
483 cpuid
485 cmp eax, 80000000h
486 jbe NO_3DNOW
488 mov eax, 80000001h
489 cpuid
490 test edx, 80000000h
491 jz NO_3DNOW
492 mov found3D, 1
493 NO_3DNOW:
496 return found3D;
497 #else
498 return atti386_Detect3DNow();
499 #endif
502 float iir_filter_sse(float input,float *hist1_ptr, float *coef_ptr) {
503 float output;
505 // 1st number of coefficients array is overall input scale factor, or filter gain
506 output = input * (*coef_ptr++);
508 #ifdef _MSC_VER
509 __asm {
511 movss xmm1, output
513 mov eax, coef_ptr
514 movups xmm2, [eax]
516 mov eax, hist1_ptr
517 movlps xmm3, [eax]
518 shufps xmm3, xmm3, 44h
519 // hist1_ptr+1, hist1_ptr, hist1_ptr+1, hist1_ptr
521 mulps xmm2, xmm3
523 subss xmm1, xmm2
524 // Rotate elements right
525 shufps xmm2, xmm2, 39h
526 subss xmm1, xmm2
528 // Store new_hist
529 movss DWORD PTR [eax], xmm1
531 // Rotate elements right
532 shufps xmm2, xmm2, 39h
533 addss xmm1, xmm2
535 // Rotate elements right
536 shufps xmm2, xmm2, 39h
537 addss xmm1, xmm2
539 // Store previous hist
540 movss DWORD PTR [eax+4], xmm3
542 add coef_ptr, 16
543 add hist1_ptr, 8
545 mov eax, coef_ptr
546 movups xmm2, [eax]
548 mov eax, hist1_ptr
549 movlps xmm3, [eax]
550 shufps xmm3, xmm3, 44h
551 // hist1_ptr+1, hist1_ptr, hist1_ptr+1, hist1_ptr
553 mulps xmm2, xmm3
555 subss xmm1, xmm2
556 // Rotate elements right
557 shufps xmm2, xmm2, 39h
558 subss xmm1, xmm2
560 // Store new_hist
561 movss DWORD PTR [eax], xmm1
563 // Rotate elements right
564 shufps xmm2, xmm2, 39h
565 addss xmm1, xmm2
567 // Rotate elements right
568 shufps xmm2, xmm2, 39h
569 addss xmm1, xmm2
571 // Store previous hist
572 movss DWORD PTR [eax+4], xmm3
574 movss output, xmm1
576 #else
577 output = atti386_iir_filter_sse(&output, hist1_ptr, coef_ptr);
578 #endif
579 return output;
582 float iir_filter_3dnow(float input,float *hist1_ptr, float *coef_ptr) {
583 float output;
585 // 1st number of coefficients array is overall input scale factor, or filter gain
586 output = input * (*coef_ptr++);
588 // I find it very sad that 3DNow requires twice as many instructions as Intel's SSE
589 // Intel does have the upper hand here.
590 #ifdef _MSC_VER
591 float tmp;
592 __asm {
593 movq mm1, output
594 mov ebx, coef_ptr
595 movq mm2, [ebx]
597 mov eax, hist1_ptr;
598 movq mm3, [eax]
600 pfmul mm2, mm3
601 pfsub mm1, mm2
603 psrlq mm2, 32
604 pfsub mm1, mm2
606 // Store new hist
607 movd tmp, mm1
609 add ebx, 8
610 movq mm2, [ebx]
611 movq mm3, [eax]
613 pfmul mm2, mm3
614 pfadd mm1, mm2
616 psrlq mm2, 32
617 pfadd mm1, mm2
619 push tmp
620 pop DWORD PTR [eax]
622 movd DWORD PTR [eax+4], mm3
624 add ebx, 8
625 add eax, 8
627 movq mm2, [ebx]
628 movq mm3, [eax]
630 pfmul mm2, mm3
631 pfsub mm1, mm2
633 psrlq mm2, 32
634 pfsub mm1, mm2
636 // Store new hist
637 movd tmp, mm1
639 add ebx, 8
640 movq mm2, [ebx]
641 movq mm3, [eax]
643 pfmul mm2, mm3
644 pfadd mm1, mm2
646 psrlq mm2, 32
647 pfadd mm1, mm2
649 push tmp
650 pop DWORD PTR [eax]
651 movd DWORD PTR [eax+4], mm3
653 movd output, mm1
655 femms
657 #else
658 output = atti386_iir_filter_3DNow(output, hist1_ptr, coef_ptr);
659 #endif
660 return output;
663 #if MT32EMU_USE_MMX > 0
665 int i386_partialProductOutput(int len, Bit16s leftvol, Bit16s rightvol, Bit16s *partialBuf, Bit16s *mixedBuf) {
666 int tmplen = len >> 1;
667 if (tmplen == 0) {
668 return 0;
670 #ifdef _MSC_VER
671 __asm {
672 mov ecx,tmplen
673 mov ax, leftvol
674 shl eax,16
675 mov ax, rightvol
676 movd mm1, eax
677 movd mm2, eax
678 psllq mm1, 32
679 por mm1, mm2
680 mov edi, partialBuf
681 mov esi, mixedBuf
682 mmxloop1:
683 mov bx, [esi]
684 add esi,2
685 mov dx, [esi]
686 add esi,2
688 mov ax, dx
689 shl eax, 16
690 mov ax, dx
691 movd mm2,eax
692 psllq mm2, 32
693 mov ax, bx
694 shl eax, 16
695 mov ax, bx
696 movd mm3,eax
697 por mm2,mm3
699 pmulhw mm2, mm1
700 movq [edi], mm2
701 add edi, 8
703 dec ecx
704 cmp ecx,0
705 jg mmxloop1
706 emms
708 #else
709 atti386_partialProductOutput(tmplen, leftvol, rightvol, partialBuf, mixedBuf);
710 #endif
711 return tmplen << 1;
714 int i386_mixBuffers(Bit16s * buf1, Bit16s *buf2, int len) {
715 int tmplen = len >> 2;
716 if (tmplen == 0) {
717 return 0;
719 #ifdef _MSC_VER
720 __asm {
721 mov ecx, tmplen
722 mov esi, buf1
723 mov edi, buf2
725 mixloop1:
726 movq mm1, [edi]
727 movq mm2, [esi]
728 paddw mm1,mm2
729 movq [esi],mm1
730 add edi,8
731 add esi,8
733 dec ecx
734 cmp ecx,0
735 jg mixloop1
736 emms
738 #else
739 atti386_mixBuffers(buf1, buf2, tmplen);
740 #endif
741 return tmplen << 2;
745 int i386_mixBuffersRingMix(Bit16s * buf1, Bit16s *buf2, int len) {
746 int tmplen = len >> 2;
747 if (tmplen == 0) {
748 return 0;
750 #ifdef _MSC_VER
751 __asm {
752 mov ecx, tmplen
753 mov esi, buf1
754 mov edi, buf2
756 mixloop2:
757 movq mm1, [esi]
758 movq mm2, [edi]
759 movq mm3, mm1
760 pmulhw mm1, mm2
761 paddw mm1,mm3
762 movq [esi],mm1
763 add edi,8
764 add esi,8
766 dec ecx
767 cmp ecx,0
768 jg mixloop2
769 emms
771 #else
772 atti386_mixBuffersRingMix(buf1, buf2, tmplen);
773 #endif
774 return tmplen << 2;
777 int i386_mixBuffersRing(Bit16s * buf1, Bit16s *buf2, int len) {
778 int tmplen = len >> 2;
779 if (tmplen == 0) {
780 return 0;
782 #ifdef _MSC_VER
783 __asm {
784 mov ecx, tmplen
785 mov esi, buf1
786 mov edi, buf2
788 mixloop3:
789 movq mm1, [esi]
790 movq mm2, [edi]
791 pmulhw mm1, mm2
792 movq [esi],mm1
793 add edi,8
794 add esi,8
796 dec ecx
797 cmp ecx,0
798 jg mixloop3
799 emms
801 #else
802 atti386_mixBuffersRing(buf1, buf2, tmplen);
803 #endif
804 return tmplen << 2;
807 int i386_produceOutput1(Bit16s *useBuf, Bit16s *stream, Bit32u len, Bit16s volume) {
808 int tmplen = (len >> 1);
809 if (tmplen == 0) {
810 return 0;
812 #ifdef _MSC_VER
813 __asm {
814 mov ecx, tmplen
815 mov ax,volume
816 shl eax,16
817 mov ax,volume
818 movd mm3,eax
819 movd mm2,eax
820 psllq mm3, 32
821 por mm3,mm2
822 mov esi, useBuf
823 mov edi, stream
824 mixloop4:
825 movq mm1, [esi]
826 movq mm2, [edi]
827 pmulhw mm1, mm3
828 paddw mm1,mm2
829 movq [edi], mm1
831 add esi,8
832 add edi,8
834 dec ecx
835 cmp ecx,0
836 jg mixloop4
837 emms
839 #else
840 atti386_produceOutput1(tmplen, volume, useBuf, stream);
841 #endif
842 return tmplen << 1;
845 #endif
849 #endif