1 ;*****************************************************************************
2 ;* x86-optimized functions for v360 filter
4 ;* This file is part of FFmpeg.
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
22 %include "libavutil/x86/x86util.asm"
24 %if HAVE_AVX2_EXTERNAL
28 pb_mask: db 0,4,8,12,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
29 pw_mask: db 0,1,4, 5, 8, 9,12,13,-1,-1,-1,-1,-1,-1,-1,-1
30 pd_255: times
4 dd 255
31 pd_65535: times
4 dd 65535
35 ; void ff_remap2_8bit_line_avx2(uint8_t *dst, int width, const uint8_t *src, ptrdiff_t in_linesize,
36 ; const uint16_t *u, const uint16_t *v, const int16_t *ker);
39 cglobal remap1_8bit_line
, 6, 7, 6, dst
, width, src
, in_linesize
, u
, v
, x
40 movsxdifnidn widthq
, widthd
42 movd xm0
, in_linesized
44 VBROADCASTI128 m3
, [pb_mask
]
48 pmovsxwd m1
, [vq
+ xq
* 2]
49 pmovsxwd m2
, [uq
+ xq
* 2]
54 vpgatherdd m5
, [srcq
+ m1
], m2
56 vextracti128 xm2
, m1
, 1
66 cglobal remap1_16bit_line
, 6, 7, 6, dst
, width, src
, in_linesize
, u
, v
, x
67 movsxdifnidn widthq
, widthd
69 movd xm0
, in_linesized
71 VBROADCASTI128 m3
, [pw_mask
]
75 pmovsxwd m1
, [vq
+ xq
* 2]
76 pmovsxwd m2
, [uq
+ xq
* 2]
82 vpgatherdd m5
, [srcq
+ m1
], m2
84 vextracti128 xm2
, m1
, 1
86 movq
[dstq
+xq
*2+8], xm2
94 cglobal remap2_8bit_line
, 7, 8, 8, dst
, width, src
, in_linesize
, u
, v
, ker
, x
95 movsxdifnidn widthq
, widthd
96 movd xm0
, in_linesized
98 DEFINE_ARGS dst
, width, src
, x
, u
, v
, ker
103 vpbroadcastd m6
, [pd_255
]
106 pmovsxwd m1
, [kerq
+ xq
* 8]
107 pmovsxwd m2
, [vq
+ xq
* 8]
108 pmovsxwd m3
, [uq
+ xq
* 8]
113 vpgatherdd m2
, [srcq
+ m4
], m3
119 vextracti128 xm2
, m1
, 1
121 pextrb
[dstq
+xq
], xm1
, 0
122 pextrb
[dstq
+xq
+1], xm2
, 0
130 cglobal remap2_16bit_line
, 7, 8, 8, dst
, width, src
, in_linesize
, u
, v
, ker
, x
131 movsxdifnidn widthq
, widthd
132 movd xm0
, in_linesized
134 DEFINE_ARGS dst
, width, src
, x
, u
, v
, ker
139 vpbroadcastd m6
, [pd_65535
]
142 pmovsxwd m1
, [kerq
+ xq
* 8]
143 pmovsxwd m2
, [vq
+ xq
* 8]
144 pmovsxwd m3
, [uq
+ xq
* 8]
150 vpgatherdd m2
, [srcq
+ m4
], m3
156 vextracti128 xm2
, m1
, 1
158 pextrw
[dstq
+xq
*2], xm1
, 0
159 pextrw
[dstq
+xq
*2+2], xm2
, 0
169 cglobal remap3_8bit_line
, 7, 11, 8, dst
, width, src
, in_linesize
, u
, v
, ker
, x
, y
, tmp
, z
170 movsxdifnidn widthq
, widthd
174 movd xm0
, in_linesized
177 vpbroadcastd m6
, [pd_255
]
180 pmovsxwd m1
, [kerq
+ yq
]
181 pmovsxwd m2
, [vq
+ yq
]
182 pmovsxwd m3
, [uq
+ yq
]
187 vpgatherdd m2
, [srcq
+ m4
], m3
191 movzx tmpq
, word [vq
+ yq
+ 16]
192 imul tmpq
, in_linesizeq
193 movzx zq
, word [uq
+ yq
+ 16]
195 movzx zq
, byte [srcq
+ tmpq
]
196 movzx tmpq
, word [kerq
+ yq
+ 16]
203 pextrb
[dstq
+xq
], xm2
, 0
212 cglobal remap4_8bit_line
, 7, 9, 11, dst
, width, src
, in_linesize
, u
, v
, ker
, x
, y
213 movsxdifnidn widthq
, widthd
216 movd xm0
, in_linesized
219 vpbroadcastd m6
, [pd_255
]
222 pmovsxwd m1
, [kerq
+ yq
]
223 pmovsxwd m5
, [kerq
+ yq
+ 16]
224 pmovsxwd m2
, [vq
+ yq
]
225 pmovsxwd m8
, [vq
+ yq
+ 16]
226 pmovsxwd m3
, [uq
+ yq
]
227 pmovsxwd m9
, [uq
+ yq
+ 16]
234 vpgatherdd m2
, [srcq
+ m4
], m3
236 vpgatherdd m4
, [srcq
+ m10
], m3
247 pextrb
[dstq
+xq
], xm2
, 0