1 ;******************************************************************************
2 ;* MMX optimized DSP utils
3 ;* Copyright (c) 2008 Loren Merritt
5 ;* This file is part of FFmpeg.
7 ;* FFmpeg is free software; you can redistribute it and/or
8 ;* modify it under the terms of the GNU Lesser General Public
9 ;* License as published by the Free Software Foundation; either
10 ;* version 2.1 of the License, or (at your option) any later version.
12 ;* FFmpeg is distributed in the hope that it will be useful,
13 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;* Lesser General Public License for more details.
17 ;* You should have received a copy of the GNU Lesser General Public
18 ;* License along with FFmpeg; if not, write to the Free Software
19 ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 ;******************************************************************************
24 section .text
align=16
35 %macro FLOAT_TO_INT16_INTERLEAVE6
1
36 ; void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
37 cglobal float_to_int16_interleave6_
%1, 2,7,0, dst
, src
, src1
, src2
, src3
, src4
, src5
42 %define lend
dword r2m
44 mov src1q
, [srcq
+1*gprsize
]
45 mov src2q
, [srcq
+2*gprsize
]
46 mov src3q
, [srcq
+3*gprsize
]
47 mov src4q
, [srcq
+4*gprsize
]
48 mov src5q
, [srcq
+5*gprsize
]
57 cvtps2pi mm1
, [srcq
+src1q
]
58 cvtps2pi mm2
, [srcq
+src2q
]
59 cvtps2pi mm3
, [srcq
+src3q
]
60 cvtps2pi mm4
, [srcq
+src4q
]
61 cvtps2pi mm5
, [srcq
+src5q
]
82 %endmacro
; FLOAT_TO_INT16_INTERLEAVE6
84 %define pswapd PSWAPD_SSE
85 FLOAT_TO_INT16_INTERLEAVE6 sse
86 %define cvtps2pi pf2id
87 %define pswapd PSWAPD_3DN1
88 FLOAT_TO_INT16_INTERLEAVE6
3dnow
90 FLOAT_TO_INT16_INTERLEAVE6
3dn2
95 ; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *top, uint8_t *diff, int w, int *left, int *left_top)
96 cglobal add_hfyu_median_prediction_mmx2
, 6,6,0, dst
, top
, diff
, w
, left
, left_top
104 psubb mm0
, mm4
; t-tl
116 psubb mm0
, mm4
; t-tl
122 paddb mm4
, mm3
; t-tl+l
127 pmaxub mm3
, mm5
; median
128 paddb mm3
, mm2
; +residual
148 movzx r2d
, byte [dstq
-1]
150 movzx r2d
, byte [topq
-1]