7 #define RTCD_EXTERN extern
14 #include "vpx/vpx_integer.h"
15 #include "vp9/common/vp9_common.h"
16 #include "vp9/common/vp9_enums.h"
20 /* Encoder forward decls */
22 struct vp9_variance_vtable
;
23 struct search_site_config
;
26 struct yv12_buffer_config
;
32 unsigned int vp9_avg_4x4_c(const uint8_t *, int p
);
33 unsigned int vp9_avg_4x4_sse2(const uint8_t *, int p
);
34 RTCD_EXTERN
unsigned int (*vp9_avg_4x4
)(const uint8_t *, int p
);
36 unsigned int vp9_avg_8x8_c(const uint8_t *, int p
);
37 unsigned int vp9_avg_8x8_sse2(const uint8_t *, int p
);
38 RTCD_EXTERN
unsigned int (*vp9_avg_8x8
)(const uint8_t *, int p
);
40 int64_t vp9_block_error_c(const tran_low_t
*coeff
, const tran_low_t
*dqcoeff
, intptr_t block_size
, int64_t *ssz
);
41 int64_t vp9_block_error_sse2(const tran_low_t
*coeff
, const tran_low_t
*dqcoeff
, intptr_t block_size
, int64_t *ssz
);
42 int64_t vp9_block_error_avx2(const tran_low_t
*coeff
, const tran_low_t
*dqcoeff
, intptr_t block_size
, int64_t *ssz
);
43 RTCD_EXTERN
int64_t (*vp9_block_error
)(const tran_low_t
*coeff
, const tran_low_t
*dqcoeff
, intptr_t block_size
, int64_t *ssz
);
45 int64_t vp9_block_error_fp_c(const int16_t *coeff
, const int16_t *dqcoeff
, int block_size
);
46 int64_t vp9_block_error_fp_sse2(const int16_t *coeff
, const int16_t *dqcoeff
, int block_size
);
47 RTCD_EXTERN
int64_t (*vp9_block_error_fp
)(const int16_t *coeff
, const int16_t *dqcoeff
, int block_size
);
49 int vp9_denoiser_filter_c(const uint8_t *sig
, int sig_stride
, const uint8_t *mc_avg
, int mc_avg_stride
, uint8_t *avg
, int avg_stride
, int increase_denoising
, BLOCK_SIZE bs
, int motion_magnitude
);
50 int vp9_denoiser_filter_sse2(const uint8_t *sig
, int sig_stride
, const uint8_t *mc_avg
, int mc_avg_stride
, uint8_t *avg
, int avg_stride
, int increase_denoising
, BLOCK_SIZE bs
, int motion_magnitude
);
51 RTCD_EXTERN
int (*vp9_denoiser_filter
)(const uint8_t *sig
, int sig_stride
, const uint8_t *mc_avg
, int mc_avg_stride
, uint8_t *avg
, int avg_stride
, int increase_denoising
, BLOCK_SIZE bs
, int motion_magnitude
);
53 int vp9_diamond_search_sad_c(const struct macroblock
*x
, const struct search_site_config
*cfg
, struct mv
*ref_mv
, struct mv
*best_mv
, int search_param
, int sad_per_bit
, int *num00
, const struct vp9_variance_vtable
*fn_ptr
, const struct mv
*center_mv
);
54 #define vp9_diamond_search_sad vp9_diamond_search_sad_c
56 void vp9_fdct8x8_quant_c(const int16_t *input
, int stride
, tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
57 void vp9_fdct8x8_quant_sse2(const int16_t *input
, int stride
, tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
58 void vp9_fdct8x8_quant_ssse3(const int16_t *input
, int stride
, tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
59 RTCD_EXTERN
void (*vp9_fdct8x8_quant
)(const int16_t *input
, int stride
, tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
61 void vp9_fht16x16_c(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
62 void vp9_fht16x16_sse2(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
63 RTCD_EXTERN
void (*vp9_fht16x16
)(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
65 void vp9_fht4x4_c(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
66 void vp9_fht4x4_sse2(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
67 RTCD_EXTERN
void (*vp9_fht4x4
)(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
69 void vp9_fht8x8_c(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
70 void vp9_fht8x8_sse2(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
71 RTCD_EXTERN
void (*vp9_fht8x8
)(const int16_t *input
, tran_low_t
*output
, int stride
, int tx_type
);
73 void vp9_filter_by_weight16x16_c(const uint8_t *src
, int src_stride
, uint8_t *dst
, int dst_stride
, int src_weight
);
74 void vp9_filter_by_weight16x16_sse2(const uint8_t *src
, int src_stride
, uint8_t *dst
, int dst_stride
, int src_weight
);
75 RTCD_EXTERN
void (*vp9_filter_by_weight16x16
)(const uint8_t *src
, int src_stride
, uint8_t *dst
, int dst_stride
, int src_weight
);
77 void vp9_filter_by_weight8x8_c(const uint8_t *src
, int src_stride
, uint8_t *dst
, int dst_stride
, int src_weight
);
78 void vp9_filter_by_weight8x8_sse2(const uint8_t *src
, int src_stride
, uint8_t *dst
, int dst_stride
, int src_weight
);
79 RTCD_EXTERN
void (*vp9_filter_by_weight8x8
)(const uint8_t *src
, int src_stride
, uint8_t *dst
, int dst_stride
, int src_weight
);
81 int vp9_full_range_search_c(const struct macroblock
*x
, const struct search_site_config
*cfg
, struct mv
*ref_mv
, struct mv
*best_mv
, int search_param
, int sad_per_bit
, int *num00
, const struct vp9_variance_vtable
*fn_ptr
, const struct mv
*center_mv
);
82 #define vp9_full_range_search vp9_full_range_search_c
84 int vp9_full_search_sad_c(const struct macroblock
*x
, const struct mv
*ref_mv
, int sad_per_bit
, int distance
, const struct vp9_variance_vtable
*fn_ptr
, const struct mv
*center_mv
, struct mv
*best_mv
);
85 int vp9_full_search_sadx3(const struct macroblock
*x
, const struct mv
*ref_mv
, int sad_per_bit
, int distance
, const struct vp9_variance_vtable
*fn_ptr
, const struct mv
*center_mv
, struct mv
*best_mv
);
86 int vp9_full_search_sadx8(const struct macroblock
*x
, const struct mv
*ref_mv
, int sad_per_bit
, int distance
, const struct vp9_variance_vtable
*fn_ptr
, const struct mv
*center_mv
, struct mv
*best_mv
);
87 RTCD_EXTERN
int (*vp9_full_search_sad
)(const struct macroblock
*x
, const struct mv
*ref_mv
, int sad_per_bit
, int distance
, const struct vp9_variance_vtable
*fn_ptr
, const struct mv
*center_mv
, struct mv
*best_mv
);
89 void vp9_fwht4x4_c(const int16_t *input
, tran_low_t
*output
, int stride
);
90 void vp9_fwht4x4_mmx(const int16_t *input
, tran_low_t
*output
, int stride
);
91 RTCD_EXTERN
void (*vp9_fwht4x4
)(const int16_t *input
, tran_low_t
*output
, int stride
);
93 void vp9_hadamard_16x16_c(int16_t const *src_diff
, int src_stride
, int16_t *coeff
);
94 void vp9_hadamard_16x16_sse2(int16_t const *src_diff
, int src_stride
, int16_t *coeff
);
95 RTCD_EXTERN
void (*vp9_hadamard_16x16
)(int16_t const *src_diff
, int src_stride
, int16_t *coeff
);
97 void vp9_hadamard_8x8_c(int16_t const *src_diff
, int src_stride
, int16_t *coeff
);
98 void vp9_hadamard_8x8_sse2(int16_t const *src_diff
, int src_stride
, int16_t *coeff
);
99 RTCD_EXTERN
void (*vp9_hadamard_8x8
)(int16_t const *src_diff
, int src_stride
, int16_t *coeff
);
101 void vp9_iht16x16_256_add_c(const tran_low_t
*input
, uint8_t *output
, int pitch
, int tx_type
);
102 void vp9_iht16x16_256_add_sse2(const tran_low_t
*input
, uint8_t *output
, int pitch
, int tx_type
);
103 RTCD_EXTERN
void (*vp9_iht16x16_256_add
)(const tran_low_t
*input
, uint8_t *output
, int pitch
, int tx_type
);
105 void vp9_iht4x4_16_add_c(const tran_low_t
*input
, uint8_t *dest
, int dest_stride
, int tx_type
);
106 void vp9_iht4x4_16_add_sse2(const tran_low_t
*input
, uint8_t *dest
, int dest_stride
, int tx_type
);
107 RTCD_EXTERN
void (*vp9_iht4x4_16_add
)(const tran_low_t
*input
, uint8_t *dest
, int dest_stride
, int tx_type
);
109 void vp9_iht8x8_64_add_c(const tran_low_t
*input
, uint8_t *dest
, int dest_stride
, int tx_type
);
110 void vp9_iht8x8_64_add_sse2(const tran_low_t
*input
, uint8_t *dest
, int dest_stride
, int tx_type
);
111 RTCD_EXTERN
void (*vp9_iht8x8_64_add
)(const tran_low_t
*input
, uint8_t *dest
, int dest_stride
, int tx_type
);
113 int16_t vp9_int_pro_col_c(uint8_t const *ref
, const int width
);
114 int16_t vp9_int_pro_col_sse2(uint8_t const *ref
, const int width
);
115 RTCD_EXTERN
int16_t (*vp9_int_pro_col
)(uint8_t const *ref
, const int width
);
117 void vp9_int_pro_row_c(int16_t *hbuf
, uint8_t const *ref
, const int ref_stride
, const int height
);
118 void vp9_int_pro_row_sse2(int16_t *hbuf
, uint8_t const *ref
, const int ref_stride
, const int height
);
119 RTCD_EXTERN
void (*vp9_int_pro_row
)(int16_t *hbuf
, uint8_t const *ref
, const int ref_stride
, const int height
);
121 void vp9_mbpost_proc_across_ip_c(uint8_t *src
, int pitch
, int rows
, int cols
, int flimit
);
122 void vp9_mbpost_proc_across_ip_xmm(uint8_t *src
, int pitch
, int rows
, int cols
, int flimit
);
123 RTCD_EXTERN
void (*vp9_mbpost_proc_across_ip
)(uint8_t *src
, int pitch
, int rows
, int cols
, int flimit
);
125 void vp9_mbpost_proc_down_c(uint8_t *dst
, int pitch
, int rows
, int cols
, int flimit
);
126 void vp9_mbpost_proc_down_xmm(uint8_t *dst
, int pitch
, int rows
, int cols
, int flimit
);
127 RTCD_EXTERN
void (*vp9_mbpost_proc_down
)(uint8_t *dst
, int pitch
, int rows
, int cols
, int flimit
);
129 void vp9_minmax_8x8_c(const uint8_t *s
, int p
, const uint8_t *d
, int dp
, int *min
, int *max
);
130 void vp9_minmax_8x8_sse2(const uint8_t *s
, int p
, const uint8_t *d
, int dp
, int *min
, int *max
);
131 RTCD_EXTERN
void (*vp9_minmax_8x8
)(const uint8_t *s
, int p
, const uint8_t *d
, int dp
, int *min
, int *max
);
133 void vp9_plane_add_noise_c(uint8_t *Start
, char *noise
, char blackclamp
[16], char whiteclamp
[16], char bothclamp
[16], unsigned int Width
, unsigned int Height
, int Pitch
);
134 void vp9_plane_add_noise_wmt(uint8_t *Start
, char *noise
, char blackclamp
[16], char whiteclamp
[16], char bothclamp
[16], unsigned int Width
, unsigned int Height
, int Pitch
);
135 RTCD_EXTERN
void (*vp9_plane_add_noise
)(uint8_t *Start
, char *noise
, char blackclamp
[16], char whiteclamp
[16], char bothclamp
[16], unsigned int Width
, unsigned int Height
, int Pitch
);
137 void vp9_post_proc_down_and_across_c(const uint8_t *src_ptr
, uint8_t *dst_ptr
, int src_pixels_per_line
, int dst_pixels_per_line
, int rows
, int cols
, int flimit
);
138 void vp9_post_proc_down_and_across_xmm(const uint8_t *src_ptr
, uint8_t *dst_ptr
, int src_pixels_per_line
, int dst_pixels_per_line
, int rows
, int cols
, int flimit
);
139 RTCD_EXTERN
void (*vp9_post_proc_down_and_across
)(const uint8_t *src_ptr
, uint8_t *dst_ptr
, int src_pixels_per_line
, int dst_pixels_per_line
, int rows
, int cols
, int flimit
);
141 void vp9_quantize_fp_c(const tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
142 void vp9_quantize_fp_sse2(const tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
143 RTCD_EXTERN
void (*vp9_quantize_fp
)(const tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
145 void vp9_quantize_fp_32x32_c(const tran_low_t
*coeff_ptr
, intptr_t n_coeffs
, int skip_block
, const int16_t *zbin_ptr
, const int16_t *round_ptr
, const int16_t *quant_ptr
, const int16_t *quant_shift_ptr
, tran_low_t
*qcoeff_ptr
, tran_low_t
*dqcoeff_ptr
, const int16_t *dequant_ptr
, uint16_t *eob_ptr
, const int16_t *scan
, const int16_t *iscan
);
146 #define vp9_quantize_fp_32x32 vp9_quantize_fp_32x32_c
148 int16_t vp9_satd_c(const int16_t *coeff
, int length
);
149 int16_t vp9_satd_sse2(const int16_t *coeff
, int length
);
150 RTCD_EXTERN
int16_t (*vp9_satd
)(const int16_t *coeff
, int length
);
152 void vp9_temporal_filter_apply_c(uint8_t *frame1
, unsigned int stride
, uint8_t *frame2
, unsigned int block_width
, unsigned int block_height
, int strength
, int filter_weight
, unsigned int *accumulator
, uint16_t *count
);
153 void vp9_temporal_filter_apply_sse2(uint8_t *frame1
, unsigned int stride
, uint8_t *frame2
, unsigned int block_width
, unsigned int block_height
, int strength
, int filter_weight
, unsigned int *accumulator
, uint16_t *count
);
154 RTCD_EXTERN
void (*vp9_temporal_filter_apply
)(uint8_t *frame1
, unsigned int stride
, uint8_t *frame2
, unsigned int block_width
, unsigned int block_height
, int strength
, int filter_weight
, unsigned int *accumulator
, uint16_t *count
);
156 int vp9_vector_var_c(int16_t const *ref
, int16_t const *src
, const int bwl
);
157 int vp9_vector_var_sse2(int16_t const *ref
, int16_t const *src
, const int bwl
);
158 RTCD_EXTERN
int (*vp9_vector_var
)(int16_t const *ref
, int16_t const *src
, const int bwl
);
163 #include "vpx_ports/x86.h"
164 static void setup_rtcd_internal(void)
166 int flags
= x86_simd_caps();
170 vp9_avg_4x4
= vp9_avg_4x4_c
;
171 if (flags
& HAS_SSE2
) vp9_avg_4x4
= vp9_avg_4x4_sse2
;
172 vp9_avg_8x8
= vp9_avg_8x8_c
;
173 if (flags
& HAS_SSE2
) vp9_avg_8x8
= vp9_avg_8x8_sse2
;
174 vp9_block_error
= vp9_block_error_c
;
175 if (flags
& HAS_SSE2
) vp9_block_error
= vp9_block_error_sse2
;
176 if (flags
& HAS_AVX2
) vp9_block_error
= vp9_block_error_avx2
;
177 vp9_block_error_fp
= vp9_block_error_fp_c
;
178 if (flags
& HAS_SSE2
) vp9_block_error_fp
= vp9_block_error_fp_sse2
;
179 vp9_denoiser_filter
= vp9_denoiser_filter_c
;
180 if (flags
& HAS_SSE2
) vp9_denoiser_filter
= vp9_denoiser_filter_sse2
;
181 vp9_fdct8x8_quant
= vp9_fdct8x8_quant_c
;
182 if (flags
& HAS_SSE2
) vp9_fdct8x8_quant
= vp9_fdct8x8_quant_sse2
;
183 if (flags
& HAS_SSSE3
) vp9_fdct8x8_quant
= vp9_fdct8x8_quant_ssse3
;
184 vp9_fht16x16
= vp9_fht16x16_c
;
185 if (flags
& HAS_SSE2
) vp9_fht16x16
= vp9_fht16x16_sse2
;
186 vp9_fht4x4
= vp9_fht4x4_c
;
187 if (flags
& HAS_SSE2
) vp9_fht4x4
= vp9_fht4x4_sse2
;
188 vp9_fht8x8
= vp9_fht8x8_c
;
189 if (flags
& HAS_SSE2
) vp9_fht8x8
= vp9_fht8x8_sse2
;
190 vp9_filter_by_weight16x16
= vp9_filter_by_weight16x16_c
;
191 if (flags
& HAS_SSE2
) vp9_filter_by_weight16x16
= vp9_filter_by_weight16x16_sse2
;
192 vp9_filter_by_weight8x8
= vp9_filter_by_weight8x8_c
;
193 if (flags
& HAS_SSE2
) vp9_filter_by_weight8x8
= vp9_filter_by_weight8x8_sse2
;
194 vp9_full_search_sad
= vp9_full_search_sad_c
;
195 if (flags
& HAS_SSE3
) vp9_full_search_sad
= vp9_full_search_sadx3
;
196 if (flags
& HAS_SSE4_1
) vp9_full_search_sad
= vp9_full_search_sadx8
;
197 vp9_fwht4x4
= vp9_fwht4x4_c
;
198 if (flags
& HAS_MMX
) vp9_fwht4x4
= vp9_fwht4x4_mmx
;
199 vp9_hadamard_16x16
= vp9_hadamard_16x16_c
;
200 if (flags
& HAS_SSE2
) vp9_hadamard_16x16
= vp9_hadamard_16x16_sse2
;
201 vp9_hadamard_8x8
= vp9_hadamard_8x8_c
;
202 if (flags
& HAS_SSE2
) vp9_hadamard_8x8
= vp9_hadamard_8x8_sse2
;
203 vp9_iht16x16_256_add
= vp9_iht16x16_256_add_c
;
204 if (flags
& HAS_SSE2
) vp9_iht16x16_256_add
= vp9_iht16x16_256_add_sse2
;
205 vp9_iht4x4_16_add
= vp9_iht4x4_16_add_c
;
206 if (flags
& HAS_SSE2
) vp9_iht4x4_16_add
= vp9_iht4x4_16_add_sse2
;
207 vp9_iht8x8_64_add
= vp9_iht8x8_64_add_c
;
208 if (flags
& HAS_SSE2
) vp9_iht8x8_64_add
= vp9_iht8x8_64_add_sse2
;
209 vp9_int_pro_col
= vp9_int_pro_col_c
;
210 if (flags
& HAS_SSE2
) vp9_int_pro_col
= vp9_int_pro_col_sse2
;
211 vp9_int_pro_row
= vp9_int_pro_row_c
;
212 if (flags
& HAS_SSE2
) vp9_int_pro_row
= vp9_int_pro_row_sse2
;
213 vp9_mbpost_proc_across_ip
= vp9_mbpost_proc_across_ip_c
;
214 if (flags
& HAS_SSE2
) vp9_mbpost_proc_across_ip
= vp9_mbpost_proc_across_ip_xmm
;
215 vp9_mbpost_proc_down
= vp9_mbpost_proc_down_c
;
216 if (flags
& HAS_SSE2
) vp9_mbpost_proc_down
= vp9_mbpost_proc_down_xmm
;
217 vp9_minmax_8x8
= vp9_minmax_8x8_c
;
218 if (flags
& HAS_SSE2
) vp9_minmax_8x8
= vp9_minmax_8x8_sse2
;
219 vp9_plane_add_noise
= vp9_plane_add_noise_c
;
220 if (flags
& HAS_SSE2
) vp9_plane_add_noise
= vp9_plane_add_noise_wmt
;
221 vp9_post_proc_down_and_across
= vp9_post_proc_down_and_across_c
;
222 if (flags
& HAS_SSE2
) vp9_post_proc_down_and_across
= vp9_post_proc_down_and_across_xmm
;
223 vp9_quantize_fp
= vp9_quantize_fp_c
;
224 if (flags
& HAS_SSE2
) vp9_quantize_fp
= vp9_quantize_fp_sse2
;
225 vp9_satd
= vp9_satd_c
;
226 if (flags
& HAS_SSE2
) vp9_satd
= vp9_satd_sse2
;
227 vp9_temporal_filter_apply
= vp9_temporal_filter_apply_c
;
228 if (flags
& HAS_SSE2
) vp9_temporal_filter_apply
= vp9_temporal_filter_apply_sse2
;
229 vp9_vector_var
= vp9_vector_var_c
;
230 if (flags
& HAS_SSE2
) vp9_vector_var
= vp9_vector_var_sse2
;