3 * Copyright (c) 2002-2004 Michael Niedermayer
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * @file motion_est_template.c
23 * Motion estimation template.
26 //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28 uint32_t attribute_unused * const score_map= c->score_map;\
29 const int attribute_unused xmin= c->xmin;\
30 const int attribute_unused ymin= c->ymin;\
31 const int attribute_unused xmax= c->xmax;\
32 const int attribute_unused ymax= c->ymax;\
33 uint8_t *mv_penalty= c->current_mv_penalty;\
34 const int pred_x= c->pred_x;\
35 const int pred_y= c->pred_y;\
37 #define CHECK_HALF_MV(dx, dy, x, y)\
39 const int hx= 2*(x)+(dx);\
40 const int hy= 2*(y)+(dy);\
41 d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
42 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
43 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
47 static int hpel_motion_search
)(MpegEncContext
* s
,
48 int *mx_ptr
, int *my_ptr
, int dmin
,
52 const int xx
= 16 * s
->mb_x
+ 8*(n
&1);
53 const int yy
= 16 * s
->mb_y
+ 8*(n
>>1);
54 const int mx
= *mx_ptr
;
55 const int my
= *my_ptr
;
56 const int penalty_factor
= c
->sub_penalty_factor
;
62 me_cmp_func cmp
, chroma_cmp
, cmp_sub
, chroma_cmp_sub
;
64 if(s
->no_rounding
/*FIXME b_type*/){
65 hpel_put
= &s
->dsp
.put_no_rnd_pixels_tab
[size
];
66 chroma_hpel_put
= &s
->dsp
.put_no_rnd_pixels_tab
[size
+1];
68 hpel_put
=& s
->dsp
.put_pixels_tab
[size
];
69 chroma_hpel_put
= &s
->dsp
.put_pixels_tab
[size
+1];
71 cmpf
= s
->dsp
.me_cmp
[size
];
72 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
73 cmp_sub
= s
->dsp
.me_sub_cmp
[size
];
74 chroma_cmp_sub
= s
->dsp
.me_sub_cmp
[size
+1];
76 if(c
->skip
){ //FIXME somehow move up (benchmark)
82 if(c
->avctx
->me_cmp
!= c
->avctx
->me_sub_cmp
){
83 CMP_HPEL(dmin
, 0, 0, mx
, my
, size
);
85 dmin
+= (mv_penalty
[2*mx
- pred_x
] + mv_penalty
[2*my
- pred_y
])*penalty_factor
;
88 if (mx
> xmin
&& mx
< xmax
&&
89 my
> ymin
&& my
< ymax
) {
93 CHECK_HALF_MV(1, 1, mx
-1, my
-1)
94 CHECK_HALF_MV(0, 1, mx
, my
-1)
95 CHECK_HALF_MV(1, 1, mx
, my
-1)
96 CHECK_HALF_MV(1, 0, mx
-1, my
)
97 CHECK_HALF_MV(1, 0, mx
, my
)
98 CHECK_HALF_MV(1, 1, mx
-1, my
)
99 CHECK_HALF_MV(0, 1, mx
, my
)
100 CHECK_HALF_MV(1, 1, mx
, my
)
102 assert(bx
>= xmin
*2 || bx
<= xmax
*2 || by
>= ymin
*2 || by
<= ymax
*2);
115 static int hpel_motion_search(MpegEncContext
* s
,
116 int *mx_ptr
, int *my_ptr
, int dmin
,
117 int src_index
, int ref_index
,
120 MotionEstContext
* const c
= &s
->me
;
121 const int mx
= *mx_ptr
;
122 const int my
= *my_ptr
;
123 const int penalty_factor
= c
->sub_penalty_factor
;
124 me_cmp_func cmp_sub
, chroma_cmp_sub
;
125 int bx
=2*mx
, by
=2*my
;
128 int flags
= c
->sub_flags
;
132 cmp_sub
= s
->dsp
.me_sub_cmp
[size
];
133 chroma_cmp_sub
= s
->dsp
.me_sub_cmp
[size
+1];
135 if(c
->skip
){ //FIXME move out of hpel?
141 if(c
->avctx
->me_cmp
!= c
->avctx
->me_sub_cmp
){
142 dmin
= cmp(s
, mx
, my
, 0, 0, size
, h
, ref_index
, src_index
, cmp_sub
, chroma_cmp_sub
, flags
);
143 if(mx
|| my
|| size
>0)
144 dmin
+= (mv_penalty
[2*mx
- pred_x
] + mv_penalty
[2*my
- pred_y
])*penalty_factor
;
147 if (mx
> xmin
&& mx
< xmax
&&
148 my
> ymin
&& my
< ymax
) {
150 const int index
= (my
<<ME_MAP_SHIFT
) + mx
;
151 const int t
= score_map
[(index
-(1<<ME_MAP_SHIFT
))&(ME_MAP_SIZE
-1)]
152 + (mv_penalty
[bx
- pred_x
] + mv_penalty
[by
-2 - pred_y
])*c
->penalty_factor
;
153 const int l
= score_map
[(index
- 1 )&(ME_MAP_SIZE
-1)]
154 + (mv_penalty
[bx
-2 - pred_x
] + mv_penalty
[by
- pred_y
])*c
->penalty_factor
;
155 const int r
= score_map
[(index
+ 1 )&(ME_MAP_SIZE
-1)]
156 + (mv_penalty
[bx
+2 - pred_x
] + mv_penalty
[by
- pred_y
])*c
->penalty_factor
;
157 const int b
= score_map
[(index
+(1<<ME_MAP_SHIFT
))&(ME_MAP_SIZE
-1)]
158 + (mv_penalty
[bx
- pred_x
] + mv_penalty
[by
+2 - pred_y
])*c
->penalty_factor
;
162 int map_generation
= c
->map_generation
;
164 uint32_t *map
= c
->map
;
166 key
= ((my
-1)<<ME_MAP_MV_BITS
) + (mx
) + map_generation
;
167 assert(map
[(index
-(1<<ME_MAP_SHIFT
))&(ME_MAP_SIZE
-1)] == key
);
168 key
= ((my
+1)<<ME_MAP_MV_BITS
) + (mx
) + map_generation
;
169 assert(map
[(index
+(1<<ME_MAP_SHIFT
))&(ME_MAP_SIZE
-1)] == key
);
170 key
= ((my
)<<ME_MAP_MV_BITS
) + (mx
+1) + map_generation
;
171 assert(map
[(index
+1)&(ME_MAP_SIZE
-1)] == key
);
172 key
= ((my
)<<ME_MAP_MV_BITS
) + (mx
-1) + map_generation
;
173 assert(map
[(index
-1)&(ME_MAP_SIZE
-1)] == key
);
176 CHECK_HALF_MV(0, 1, mx
,my
-1)
178 CHECK_HALF_MV(1, 1, mx
-1, my
-1)
180 CHECK_HALF_MV(1, 1, mx
, my
-1)
182 CHECK_HALF_MV(1, 1, mx
-1, my
)
184 CHECK_HALF_MV(1, 0, mx
-1, my
)
186 CHECK_HALF_MV(1, 1, mx
, my
-1)
188 CHECK_HALF_MV(1, 1, mx
-1, my
-1)
190 CHECK_HALF_MV(1, 1, mx
, my
)
192 CHECK_HALF_MV(1, 0, mx
, my
)
197 CHECK_HALF_MV(1, 1, mx
-1, my
-1)
199 CHECK_HALF_MV(1, 1, mx
, my
)
201 CHECK_HALF_MV(1, 0, mx
-1, my
)
202 CHECK_HALF_MV(1, 1, mx
-1, my
)
205 CHECK_HALF_MV(1, 1, mx
, my
-1)
207 CHECK_HALF_MV(1, 1, mx
-1, my
)
209 CHECK_HALF_MV(1, 0, mx
, my
)
210 CHECK_HALF_MV(1, 1, mx
, my
)
212 CHECK_HALF_MV(0, 1, mx
, my
)
214 assert(bx
>= xmin
*2 && bx
<= xmax
*2 && by
>= ymin
*2 && by
<= ymax
*2);
224 static int no_sub_motion_search(MpegEncContext
* s
,
225 int *mx_ptr
, int *my_ptr
, int dmin
,
226 int src_index
, int ref_index
,
234 int inline ff_get_mb_score(MpegEncContext
* s
, int mx
, int my
, int src_index
,
235 int ref_index
, int size
, int h
, int add_rate
)
237 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
238 MotionEstContext
* const c
= &s
->me
;
239 const int penalty_factor
= c
->mb_penalty_factor
;
240 const int flags
= c
->mb_flags
;
241 const int qpel
= flags
& FLAG_QPEL
;
242 const int mask
= 1+2*qpel
;
243 me_cmp_func cmp_sub
, chroma_cmp_sub
;
250 cmp_sub
= s
->dsp
.mb_cmp
[size
];
251 chroma_cmp_sub
= s
->dsp
.mb_cmp
[size
+1];
254 // assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
256 d
= cmp(s
, mx
>>(qpel
+1), my
>>(qpel
+1), mx
&mask
, my
&mask
, size
, h
, ref_index
, src_index
, cmp_sub
, chroma_cmp_sub
, flags
);
257 //FIXME check cbp before adding penalty for (0,0) vector
258 if(add_rate
&& (mx
|| my
|| size
>0))
259 d
+= (mv_penalty
[mx
- pred_x
] + mv_penalty
[my
- pred_y
])*penalty_factor
;
264 #define CHECK_QUARTER_MV(dx, dy, x, y)\
266 const int hx= 4*(x)+(dx);\
267 const int hy= 4*(y)+(dy);\
268 d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
269 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
270 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
273 static int qpel_motion_search(MpegEncContext
* s
,
274 int *mx_ptr
, int *my_ptr
, int dmin
,
275 int src_index
, int ref_index
,
278 MotionEstContext
* const c
= &s
->me
;
279 const int mx
= *mx_ptr
;
280 const int my
= *my_ptr
;
281 const int penalty_factor
= c
->sub_penalty_factor
;
282 const int map_generation
= c
->map_generation
;
283 const int subpel_quality
= c
->avctx
->me_subpel_quality
;
284 uint32_t *map
= c
->map
;
285 me_cmp_func cmpf
, chroma_cmpf
;
286 me_cmp_func cmp_sub
, chroma_cmp_sub
;
289 int flags
= c
->sub_flags
;
291 cmpf
= s
->dsp
.me_cmp
[size
];
292 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1]; //factorize FIXME
295 cmp_sub
= s
->dsp
.me_sub_cmp
[size
];
296 chroma_cmp_sub
= s
->dsp
.me_sub_cmp
[size
+1];
298 if(c
->skip
){ //FIXME somehow move up (benchmark)
304 if(c
->avctx
->me_cmp
!= c
->avctx
->me_sub_cmp
){
305 dmin
= cmp(s
, mx
, my
, 0, 0, size
, h
, ref_index
, src_index
, cmp_sub
, chroma_cmp_sub
, flags
);
306 if(mx
|| my
|| size
>0)
307 dmin
+= (mv_penalty
[4*mx
- pred_x
] + mv_penalty
[4*my
- pred_y
])*penalty_factor
;
310 if (mx
> xmin
&& mx
< xmax
&&
311 my
> ymin
&& my
< ymax
) {
312 int bx
=4*mx
, by
=4*my
;
315 const int index
= (my
<<ME_MAP_SHIFT
) + mx
;
316 const int t
= score_map
[(index
-(1<<ME_MAP_SHIFT
) )&(ME_MAP_SIZE
-1)];
317 const int l
= score_map
[(index
- 1 )&(ME_MAP_SIZE
-1)];
318 const int r
= score_map
[(index
+ 1 )&(ME_MAP_SIZE
-1)];
319 const int b
= score_map
[(index
+(1<<ME_MAP_SHIFT
) )&(ME_MAP_SIZE
-1)];
320 const int c
= score_map
[(index
)&(ME_MAP_SIZE
-1)];
324 memset(best
, 64, sizeof(int)*8);
326 if(s
->me
.dia_size
>=2){
327 const int tl
= score_map
[(index
-(1<<ME_MAP_SHIFT
)-1)&(ME_MAP_SIZE
-1)];
328 const int bl
= score_map
[(index
+(1<<ME_MAP_SHIFT
)-1)&(ME_MAP_SIZE
-1)];
329 const int tr
= score_map
[(index
-(1<<ME_MAP_SHIFT
)+1)&(ME_MAP_SIZE
-1)];
330 const int br
= score_map
[(index
+(1<<ME_MAP_SHIFT
)+1)&(ME_MAP_SIZE
-1)];
332 for(ny
= -3; ny
<= 3; ny
++){
333 for(nx
= -3; nx
<= 3; nx
++){
334 //FIXME this could overflow (unlikely though)
335 const int64_t t2
= nx
*nx
*(tr
+ tl
- 2*t
) + 4*nx
*(tr
-tl
) + 32*t
;
336 const int64_t c2
= nx
*nx
*( r
+ l
- 2*c
) + 4*nx
*( r
- l
) + 32*c
;
337 const int64_t b2
= nx
*nx
*(br
+ bl
- 2*b
) + 4*nx
*(br
-bl
) + 32*b
;
338 int score
= (ny
*ny
*(b2
+ t2
- 2*c2
) + 4*ny
*(b2
- t2
) + 32*c2
+ 512)>>10;
341 if((nx
&3)==0 && (ny
&3)==0) continue;
343 score
+= (mv_penalty
[4*mx
+ nx
- pred_x
] + mv_penalty
[4*my
+ ny
- pred_y
])*penalty_factor
;
345 // if(nx&1) score-=1024*c->penalty_factor;
346 // if(ny&1) score-=1024*c->penalty_factor;
350 memmove(&best
[i
+1], &best
[i
], sizeof(int)*(7-i
));
351 memmove(&best_pos
[i
+1][0], &best_pos
[i
][0], sizeof(int)*2*(7-i
));
353 best_pos
[i
][0]= nx
+ 4*mx
;
354 best_pos
[i
][1]= ny
+ 4*my
;
362 //FIXME this could overflow (unlikely though)
363 const int cx
= 4*(r
- l
);
364 const int cx2
= r
+ l
- 2*c
;
365 const int cy
= 4*(b
- t
);
366 const int cy2
= b
+ t
- 2*c
;
369 if(map
[(index
-(1<<ME_MAP_SHIFT
)-1)&(ME_MAP_SIZE
-1)] == (my
<<ME_MAP_MV_BITS
) + mx
+ map_generation
&& 0){ //FIXME
370 tl
= score_map
[(index
-(1<<ME_MAP_SHIFT
)-1)&(ME_MAP_SIZE
-1)];
372 tl
= cmp(s
, mx
-1, my
-1, 0, 0, size
, h
, ref_index
, src_index
, cmpf
, chroma_cmpf
, flags
);//FIXME wrong if chroma me is different
375 cxy
= 2*tl
+ (cx
+ cy
)/4 - (cx2
+ cy2
) - 2*c
;
377 assert(16*cx2
+ 4*cx
+ 32*c
== 32*r
);
378 assert(16*cx2
- 4*cx
+ 32*c
== 32*l
);
379 assert(16*cy2
+ 4*cy
+ 32*c
== 32*b
);
380 assert(16*cy2
- 4*cy
+ 32*c
== 32*t
);
381 assert(16*cxy
+ 16*cy2
+ 16*cx2
- 4*cy
- 4*cx
+ 32*c
== 32*tl
);
383 for(ny
= -3; ny
<= 3; ny
++){
384 for(nx
= -3; nx
<= 3; nx
++){
385 //FIXME this could overflow (unlikely though)
386 int score
= ny
*nx
*cxy
+ nx
*nx
*cx2
+ ny
*ny
*cy2
+ nx
*cx
+ ny
*cy
+ 32*c
; //FIXME factor
389 if((nx
&3)==0 && (ny
&3)==0) continue;
391 score
+= 32*(mv_penalty
[4*mx
+ nx
- pred_x
] + mv_penalty
[4*my
+ ny
- pred_y
])*penalty_factor
;
392 // if(nx&1) score-=32*c->penalty_factor;
393 // if(ny&1) score-=32*c->penalty_factor;
397 memmove(&best
[i
+1], &best
[i
], sizeof(int)*(7-i
));
398 memmove(&best_pos
[i
+1][0], &best_pos
[i
][0], sizeof(int)*2*(7-i
));
400 best_pos
[i
][0]= nx
+ 4*mx
;
401 best_pos
[i
][1]= ny
+ 4*my
;
408 for(i
=0; i
<subpel_quality
; i
++){
411 CHECK_QUARTER_MV(nx
&3, ny
&3, nx
>>2, ny
>>2)
415 const int tl
= score_map
[(index
-(1<<ME_MAP_SHIFT
)-1)&(ME_MAP_SIZE
-1)];
416 const int bl
= score_map
[(index
+(1<<ME_MAP_SHIFT
)-1)&(ME_MAP_SIZE
-1)];
417 const int tr
= score_map
[(index
-(1<<ME_MAP_SHIFT
)+1)&(ME_MAP_SIZE
-1)];
418 const int br
= score_map
[(index
+(1<<ME_MAP_SHIFT
)+1)&(ME_MAP_SIZE
-1)];
419 // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
422 // nx= FFMAX(4*mx - bx, bx - 4*mx);
423 // ny= FFMAX(4*my - by, by - 4*my);
425 static int stats
[7][7], count
;
427 stats
[4*mx
- bx
+ 3][4*my
- by
+ 3]++;
428 if(256*256*256*64 % count
==0){
430 if((i
%7)==0) printf("\n");
431 printf("%6d ", stats
[0][i
]);
439 CHECK_QUARTER_MV(2, 2, mx
-1, my
-1)
440 CHECK_QUARTER_MV(0, 2, mx
, my
-1)
441 CHECK_QUARTER_MV(2, 2, mx
, my
-1)
442 CHECK_QUARTER_MV(2, 0, mx
, my
)
443 CHECK_QUARTER_MV(2, 2, mx
, my
)
444 CHECK_QUARTER_MV(0, 2, mx
, my
)
445 CHECK_QUARTER_MV(2, 2, mx
-1, my
)
446 CHECK_QUARTER_MV(2, 0, mx
-1, my
)
452 int ox
[8]= {0, 1, 1, 1, 0,-1,-1,-1};
453 int oy
[8]= {1, 1, 0,-1,-1,-1, 0, 1};
454 CHECK_QUARTER_MV((nx
+ ox
[i
])&3, (ny
+ oy
[i
])&3, (nx
+ ox
[i
])>>2, (ny
+ oy
[i
])>>2)
459 CHECK_QUARTER_MV(1, 3, mx
-1, my
-1)
460 CHECK_QUARTER_MV(1, 2, mx
-1, my
-1)
461 CHECK_QUARTER_MV(1, 1, mx
-1, my
-1)
462 CHECK_QUARTER_MV(2, 1, mx
-1, my
-1)
463 CHECK_QUARTER_MV(3, 1, mx
-1, my
-1)
464 CHECK_QUARTER_MV(0, 1, mx
, my
-1)
465 CHECK_QUARTER_MV(1, 1, mx
, my
-1)
466 CHECK_QUARTER_MV(2, 1, mx
, my
-1)
467 CHECK_QUARTER_MV(3, 1, mx
, my
-1)
468 CHECK_QUARTER_MV(3, 2, mx
, my
-1)
469 CHECK_QUARTER_MV(3, 3, mx
, my
-1)
470 CHECK_QUARTER_MV(3, 0, mx
, my
)
471 CHECK_QUARTER_MV(3, 1, mx
, my
)
472 CHECK_QUARTER_MV(3, 2, mx
, my
)
473 CHECK_QUARTER_MV(3, 3, mx
, my
)
474 CHECK_QUARTER_MV(2, 3, mx
, my
)
475 CHECK_QUARTER_MV(1, 3, mx
, my
)
476 CHECK_QUARTER_MV(0, 3, mx
, my
)
477 CHECK_QUARTER_MV(3, 3, mx
-1, my
)
478 CHECK_QUARTER_MV(2, 3, mx
-1, my
)
479 CHECK_QUARTER_MV(1, 3, mx
-1, my
)
480 CHECK_QUARTER_MV(1, 2, mx
-1, my
)
481 CHECK_QUARTER_MV(1, 1, mx
-1, my
)
482 CHECK_QUARTER_MV(1, 0, mx
-1, my
)
484 assert(bx
>= xmin
*4 && bx
<= xmax
*4 && by
>= ymin
*4 && by
<= ymax
*4);
497 #define CHECK_MV(x,y)\
499 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
500 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
501 assert((x) >= xmin);\
502 assert((x) <= xmax);\
503 assert((y) >= ymin);\
504 assert((y) <= ymax);\
505 /*printf("check_mv %d %d\n", x, y);*/\
506 if(map[index]!=key){\
507 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
509 score_map[index]= d;\
510 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
511 /*printf("score:%d\n", d);*/\
512 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
516 #define CHECK_CLIPED_MV(ax,ay)\
520 const int x2= FFMAX(xmin, FFMIN(x, xmax));\
521 const int y2= FFMAX(ymin, FFMIN(y, ymax));\
525 #define CHECK_MV_DIR(x,y,new_dir)\
527 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
528 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
529 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
530 if(map[index]!=key){\
531 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
533 score_map[index]= d;\
534 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
535 /*printf("score:%d\n", d);*/\
545 #define check(x,y,S,v)\
546 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
547 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
548 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
549 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
551 #define LOAD_COMMON2\
552 uint32_t *map= c->map;\
553 const int qpel= flags&FLAG_QPEL;\
554 const int shift= 1+qpel;\
556 static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
557 int src_index
, int ref_index
, int const penalty_factor
,
558 int size
, int h
, int flags
)
560 MotionEstContext
* const c
= &s
->me
;
561 me_cmp_func cmpf
, chroma_cmpf
;
565 int map_generation
= c
->map_generation
;
567 cmpf
= s
->dsp
.me_cmp
[size
];
568 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
570 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
571 const int key
= (best
[1]<<ME_MAP_MV_BITS
) + best
[0] + map_generation
;
572 const int index
= ((best
[1]<<ME_MAP_SHIFT
) + best
[0])&(ME_MAP_SIZE
-1);
573 if(map
[index
]!=key
){ //this will be executed only very rarey
574 score_map
[index
]= cmp(s
, best
[0], best
[1], 0, 0, size
, h
, ref_index
, src_index
, cmpf
, chroma_cmpf
, flags
);
581 const int dir
= next_dir
;
582 const int x
= best
[0];
583 const int y
= best
[1];
587 if(dir
!=2 && x
>xmin
) CHECK_MV_DIR(x
-1, y
, 0)
588 if(dir
!=3 && y
>ymin
) CHECK_MV_DIR(x
, y
-1, 1)
589 if(dir
!=0 && x
<xmax
) CHECK_MV_DIR(x
+1, y
, 2)
590 if(dir
!=1 && y
<ymax
) CHECK_MV_DIR(x
, y
+1, 3)
598 static int funny_diamond_search(MpegEncContext
* s
, int *best
, int dmin
,
599 int src_index
, int ref_index
, int const penalty_factor
,
600 int size
, int h
, int flags
)
602 MotionEstContext
* const c
= &s
->me
;
603 me_cmp_func cmpf
, chroma_cmpf
;
607 int map_generation
= c
->map_generation
;
609 cmpf
= s
->dsp
.me_cmp
[size
];
610 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
612 for(dia_size
=1; dia_size
<=4; dia_size
++){
614 const int x
= best
[0];
615 const int y
= best
[1];
617 if(dia_size
&(dia_size
-1)) continue;
619 if( x
+ dia_size
> xmax
620 || x
- dia_size
< xmin
621 || y
+ dia_size
> ymax
622 || y
- dia_size
< ymin
)
625 for(dir
= 0; dir
<dia_size
; dir
+=2){
628 CHECK_MV(x
+ dir
, y
+ dia_size
- dir
);
629 CHECK_MV(x
+ dia_size
- dir
, y
- dir
);
630 CHECK_MV(x
- dir
, y
- dia_size
+ dir
);
631 CHECK_MV(x
- dia_size
+ dir
, y
+ dir
);
634 if(x
!=best
[0] || y
!=best
[1])
639 static int stats
[8*8];
643 dx
^=dy
; dy
^=dx
; dx
^=dy
;
646 if(256*256*256*64 % (stats
[0]+1)==0){
648 if((i
&7)==0) printf("\n");
649 printf("%8d ", stats
[i
]);
659 #define SAB_CHECK_MV(ax,ay)\
661 const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
662 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
663 /*printf("sab check %d %d\n", ax, ay);*/\
664 if(map[index]!=key){\
665 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
667 score_map[index]= d;\
668 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
669 /*printf("score: %d\n", d);*/\
670 if(d < minima[minima_count-1].height){\
673 while(d >= minima[j].height) j++;\
675 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
677 minima[j].checked= 0;\
678 minima[j].height= d;\
688 #define MAX_SAB_SIZE ME_MAP_SIZE
689 static int sab_diamond_search(MpegEncContext
* s
, int *best
, int dmin
,
690 int src_index
, int ref_index
, int const penalty_factor
,
691 int size
, int h
, int flags
)
693 MotionEstContext
* const c
= &s
->me
;
694 me_cmp_func cmpf
, chroma_cmpf
;
695 Minima minima
[MAX_SAB_SIZE
];
696 const int minima_count
= ABS(c
->dia_size
);
700 int map_generation
= c
->map_generation
;
702 cmpf
= s
->dsp
.me_cmp
[size
];
703 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
705 for(j
=i
=0; i
<ME_MAP_SIZE
; i
++){
706 uint32_t key
= map
[i
];
708 key
+= (1<<(ME_MAP_MV_BITS
-1)) + (1<<(2*ME_MAP_MV_BITS
-1));
710 if((key
&((-1)<<(2*ME_MAP_MV_BITS
))) != map_generation
) continue;
712 assert(j
<MAX_SAB_SIZE
); //max j = number of predictors
714 minima
[j
].height
= score_map
[i
];
715 minima
[j
].x
= key
& ((1<<ME_MAP_MV_BITS
)-1); key
>>=ME_MAP_MV_BITS
;
716 minima
[j
].y
= key
& ((1<<ME_MAP_MV_BITS
)-1);
717 minima
[j
].x
-= (1<<(ME_MAP_MV_BITS
-1));
718 minima
[j
].y
-= (1<<(ME_MAP_MV_BITS
-1));
720 if(minima
[j
].x
|| minima
[j
].y
)
721 minima
[j
].height
+= (mv_penalty
[((minima
[j
].x
)<<shift
)-pred_x
] + mv_penalty
[((minima
[j
].y
)<<shift
)-pred_y
])*penalty_factor
;
726 qsort(minima
, j
, sizeof(Minima
), minima_cmp
);
728 for(; j
<minima_count
; j
++){
729 minima
[j
].height
=256*256*256*64;
731 minima
[j
].x
= minima
[j
].y
=0;
734 for(i
=0; i
<minima_count
; i
++){
735 const int x
= minima
[i
].x
;
736 const int y
= minima
[i
].y
;
739 if(minima
[i
].checked
) continue;
741 if( x
>= xmax
|| x
<= xmin
742 || y
>= ymax
|| y
<= ymin
)
747 SAB_CHECK_MV(x
, y
-1)
748 SAB_CHECK_MV(x
, y
+1)
750 minima
[i
].checked
= 1;
753 best
[0]= minima
[0].x
;
754 best
[1]= minima
[0].y
;
755 dmin
= minima
[0].height
;
757 if( best
[0] < xmax
&& best
[0] > xmin
758 && best
[1] < ymax
&& best
[1] > ymin
){
760 //ensure that the refernece samples for hpel refinement are in the map
761 CHECK_MV(best
[0]-1, best
[1])
762 CHECK_MV(best
[0]+1, best
[1])
763 CHECK_MV(best
[0], best
[1]-1)
764 CHECK_MV(best
[0], best
[1]+1)
769 static int var_diamond_search(MpegEncContext
* s
, int *best
, int dmin
,
770 int src_index
, int ref_index
, int const penalty_factor
,
771 int size
, int h
, int flags
)
773 MotionEstContext
* const c
= &s
->me
;
774 me_cmp_func cmpf
, chroma_cmpf
;
778 int map_generation
= c
->map_generation
;
780 cmpf
= s
->dsp
.me_cmp
[size
];
781 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
783 for(dia_size
=1; dia_size
<=c
->dia_size
; dia_size
++){
785 const int x
= best
[0];
786 const int y
= best
[1];
788 start
= FFMAX(0, y
+ dia_size
- ymax
);
789 end
= FFMIN(dia_size
, xmax
- x
+ 1);
790 for(dir
= start
; dir
<end
; dir
++){
793 //check(x + dir,y + dia_size - dir,0, a0)
794 CHECK_MV(x
+ dir
, y
+ dia_size
- dir
);
797 start
= FFMAX(0, x
+ dia_size
- xmax
);
798 end
= FFMIN(dia_size
, y
- ymin
+ 1);
799 for(dir
= start
; dir
<end
; dir
++){
802 //check(x + dia_size - dir, y - dir,0, a1)
803 CHECK_MV(x
+ dia_size
- dir
, y
- dir
);
806 start
= FFMAX(0, -y
+ dia_size
+ ymin
);
807 end
= FFMIN(dia_size
, x
- xmin
+ 1);
808 for(dir
= start
; dir
<end
; dir
++){
811 //check(x - dir,y - dia_size + dir,0, a2)
812 CHECK_MV(x
- dir
, y
- dia_size
+ dir
);
815 start
= FFMAX(0, -x
+ dia_size
+ xmin
);
816 end
= FFMIN(dia_size
, ymax
- y
+ 1);
817 for(dir
= start
; dir
<end
; dir
++){
820 //check(x - dia_size + dir, y + dir,0, a3)
821 CHECK_MV(x
- dia_size
+ dir
, y
+ dir
);
824 if(x
!=best
[0] || y
!=best
[1])
829 static int stats
[8*8];
833 if(256*256*256*64 % (stats
[0]+1)==0){
835 if((i
&7)==0) printf("\n");
836 printf("%6d ", stats
[i
]);
846 static always_inline
int diamond_search(MpegEncContext
* s
, int *best
, int dmin
,
847 int src_index
, int ref_index
, int const penalty_factor
,
848 int size
, int h
, int flags
){
849 MotionEstContext
* const c
= &s
->me
;
851 return funny_diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
852 else if(c
->dia_size
<-1)
853 return sab_diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
854 else if(c
->dia_size
<2)
855 return small_diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
857 return var_diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
860 static always_inline
int epzs_motion_search_internal(MpegEncContext
* s
, int *mx_ptr
, int *my_ptr
,
861 int P
[10][2], int src_index
, int ref_index
, int16_t (*last_mv
)[2],
862 int ref_mv_scale
, int flags
, int size
, int h
)
864 MotionEstContext
* const c
= &s
->me
;
869 const int ref_mv_stride
= s
->mb_stride
; //pass as arg FIXME
870 const int ref_mv_xy
= s
->mb_x
+ s
->mb_y
*ref_mv_stride
; //add to last_mv beforepassing FIXME
871 me_cmp_func cmpf
, chroma_cmpf
;
877 penalty_factor
= c
->pre_penalty_factor
;
878 cmpf
= s
->dsp
.me_pre_cmp
[size
];
879 chroma_cmpf
= s
->dsp
.me_pre_cmp
[size
+1];
881 penalty_factor
= c
->penalty_factor
;
882 cmpf
= s
->dsp
.me_cmp
[size
];
883 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
886 map_generation
= update_map_generation(c
);
889 dmin
= cmp(s
, 0, 0, 0, 0, size
, h
, ref_index
, src_index
, cmpf
, chroma_cmpf
, flags
);
890 map
[0]= map_generation
;
894 if (s
->first_slice_line
) {
895 CHECK_MV(P_LEFT
[0]>>shift
, P_LEFT
[1]>>shift
)
896 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
][0]*ref_mv_scale
+ (1<<15))>>16,
897 (last_mv
[ref_mv_xy
][1]*ref_mv_scale
+ (1<<15))>>16)
899 if(dmin
<h
*h
&& ( P_LEFT
[0] |P_LEFT
[1]
901 |P_TOPRIGHT
[0]|P_TOPRIGHT
[1])==0){
907 CHECK_MV(P_MEDIAN
[0]>>shift
, P_MEDIAN
[1]>>shift
)
909 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
][0]*ref_mv_scale
+ (1<<15))>>16,
910 (last_mv
[ref_mv_xy
][1]*ref_mv_scale
+ (1<<15))>>16)
911 CHECK_MV(P_LEFT
[0] >>shift
, P_LEFT
[1] >>shift
)
912 CHECK_MV(P_TOP
[0] >>shift
, P_TOP
[1] >>shift
)
913 CHECK_MV(P_TOPRIGHT
[0]>>shift
, P_TOPRIGHT
[1]>>shift
)
918 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
-1][0]*ref_mv_scale
+ (1<<15))>>16,
919 (last_mv
[ref_mv_xy
-1][1]*ref_mv_scale
+ (1<<15))>>16)
920 if(!s
->first_slice_line
)
921 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
-ref_mv_stride
][0]*ref_mv_scale
+ (1<<15))>>16,
922 (last_mv
[ref_mv_xy
-ref_mv_stride
][1]*ref_mv_scale
+ (1<<15))>>16)
924 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
+1][0]*ref_mv_scale
+ (1<<15))>>16,
925 (last_mv
[ref_mv_xy
+1][1]*ref_mv_scale
+ (1<<15))>>16)
926 if(s
->mb_y
+1<s
->end_mb_y
) //FIXME replace at least with last_slice_line
927 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
+ref_mv_stride
][0]*ref_mv_scale
+ (1<<15))>>16,
928 (last_mv
[ref_mv_xy
+ref_mv_stride
][1]*ref_mv_scale
+ (1<<15))>>16)
932 if(c
->avctx
->last_predictor_count
){
933 const int count
= c
->avctx
->last_predictor_count
;
934 const int xstart
= FFMAX(0, s
->mb_x
- count
);
935 const int ystart
= FFMAX(0, s
->mb_y
- count
);
936 const int xend
= FFMIN(s
->mb_width
, s
->mb_x
+ count
+ 1);
937 const int yend
= FFMIN(s
->mb_height
, s
->mb_y
+ count
+ 1);
940 for(mb_y
=ystart
; mb_y
<yend
; mb_y
++){
942 for(mb_x
=xstart
; mb_x
<xend
; mb_x
++){
943 const int xy
= mb_x
+ 1 + (mb_y
+ 1)*ref_mv_stride
;
944 int mx
= (last_mv
[xy
][0]*ref_mv_scale
+ (1<<15))>>16;
945 int my
= (last_mv
[xy
][1]*ref_mv_scale
+ (1<<15))>>16;
947 if(mx
>xmax
|| mx
<xmin
|| my
>ymax
|| my
<ymin
) continue;
953 //check(best[0],best[1],0, b0)
954 dmin
= diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
956 //check(best[0],best[1],0, b1)
960 // printf("%d %d %d \n", best[0], best[1], dmin);
964 //this function is dedicated to the braindamaged gcc
965 inline int ff_epzs_motion_search(MpegEncContext
* s
, int *mx_ptr
, int *my_ptr
,
966 int P
[10][2], int src_index
, int ref_index
, int16_t (*last_mv
)[2],
967 int ref_mv_scale
, int size
, int h
)
969 MotionEstContext
* const c
= &s
->me
;
970 //FIXME convert other functions in the same way if faster
971 if(c
->flags
==0 && h
==16 && size
==0){
972 return epzs_motion_search_internal(s
, mx_ptr
, my_ptr
, P
, src_index
, ref_index
, last_mv
, ref_mv_scale
, 0, 0, 16);
974 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
976 return epzs_motion_search_internal(s
, mx_ptr
, my_ptr
, P
, src_index
, ref_index
, last_mv
, ref_mv_scale
, c
->flags
, size
, h
);
980 static int epzs_motion_search4(MpegEncContext
* s
,
981 int *mx_ptr
, int *my_ptr
, int P
[10][2],
982 int src_index
, int ref_index
, int16_t (*last_mv
)[2],
985 MotionEstContext
* const c
= &s
->me
;
989 const int penalty_factor
= c
->penalty_factor
;
992 const int ref_mv_stride
= s
->mb_stride
;
993 const int ref_mv_xy
= s
->mb_x
+ s
->mb_y
*ref_mv_stride
;
994 me_cmp_func cmpf
, chroma_cmpf
;
999 cmpf
= s
->dsp
.me_cmp
[size
];
1000 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
1002 map_generation
= update_map_generation(c
);
1005 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1007 if (s
->first_slice_line
) {
1008 CHECK_MV(P_LEFT
[0]>>shift
, P_LEFT
[1]>>shift
)
1009 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
][0]*ref_mv_scale
+ (1<<15))>>16,
1010 (last_mv
[ref_mv_xy
][1]*ref_mv_scale
+ (1<<15))>>16)
1011 CHECK_MV(P_MV1
[0]>>shift
, P_MV1
[1]>>shift
)
1013 CHECK_MV(P_MV1
[0]>>shift
, P_MV1
[1]>>shift
)
1014 //FIXME try some early stop
1016 CHECK_MV(P_MEDIAN
[0]>>shift
, P_MEDIAN
[1]>>shift
)
1017 CHECK_MV(P_LEFT
[0]>>shift
, P_LEFT
[1]>>shift
)
1018 CHECK_MV(P_TOP
[0]>>shift
, P_TOP
[1]>>shift
)
1019 CHECK_MV(P_TOPRIGHT
[0]>>shift
, P_TOPRIGHT
[1]>>shift
)
1020 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
][0]*ref_mv_scale
+ (1<<15))>>16,
1021 (last_mv
[ref_mv_xy
][1]*ref_mv_scale
+ (1<<15))>>16)
1025 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
+1][0]*ref_mv_scale
+ (1<<15))>>16,
1026 (last_mv
[ref_mv_xy
+1][1]*ref_mv_scale
+ (1<<15))>>16)
1027 if(s
->mb_y
+1<s
->end_mb_y
) //FIXME replace at least with last_slice_line
1028 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
+ref_mv_stride
][0]*ref_mv_scale
+ (1<<15))>>16,
1029 (last_mv
[ref_mv_xy
+ref_mv_stride
][1]*ref_mv_scale
+ (1<<15))>>16)
1032 dmin
= diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
1037 // printf("%d %d %d \n", best[0], best[1], dmin);
1041 //try to merge with above FIXME (needs PSNR test)
1042 static int epzs_motion_search2(MpegEncContext
* s
,
1043 int *mx_ptr
, int *my_ptr
, int P
[10][2],
1044 int src_index
, int ref_index
, int16_t (*last_mv
)[2],
1047 MotionEstContext
* const c
= &s
->me
;
1051 const int penalty_factor
= c
->penalty_factor
;
1052 const int size
=0; //FIXME pass as arg
1054 const int ref_mv_stride
= s
->mb_stride
;
1055 const int ref_mv_xy
= s
->mb_x
+ s
->mb_y
*ref_mv_stride
;
1056 me_cmp_func cmpf
, chroma_cmpf
;
1058 int flags
= c
->flags
;
1061 cmpf
= s
->dsp
.me_cmp
[size
];
1062 chroma_cmpf
= s
->dsp
.me_cmp
[size
+1];
1064 map_generation
= update_map_generation(c
);
1067 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1069 if (s
->first_slice_line
) {
1070 CHECK_MV(P_LEFT
[0]>>shift
, P_LEFT
[1]>>shift
)
1071 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
][0]*ref_mv_scale
+ (1<<15))>>16,
1072 (last_mv
[ref_mv_xy
][1]*ref_mv_scale
+ (1<<15))>>16)
1073 CHECK_MV(P_MV1
[0]>>shift
, P_MV1
[1]>>shift
)
1075 CHECK_MV(P_MV1
[0]>>shift
, P_MV1
[1]>>shift
)
1076 //FIXME try some early stop
1078 CHECK_MV(P_MEDIAN
[0]>>shift
, P_MEDIAN
[1]>>shift
)
1079 CHECK_MV(P_LEFT
[0]>>shift
, P_LEFT
[1]>>shift
)
1080 CHECK_MV(P_TOP
[0]>>shift
, P_TOP
[1]>>shift
)
1081 CHECK_MV(P_TOPRIGHT
[0]>>shift
, P_TOPRIGHT
[1]>>shift
)
1082 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
][0]*ref_mv_scale
+ (1<<15))>>16,
1083 (last_mv
[ref_mv_xy
][1]*ref_mv_scale
+ (1<<15))>>16)
1087 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
+1][0]*ref_mv_scale
+ (1<<15))>>16,
1088 (last_mv
[ref_mv_xy
+1][1]*ref_mv_scale
+ (1<<15))>>16)
1089 if(s
->mb_y
+1<s
->end_mb_y
) //FIXME replace at least with last_slice_line
1090 CHECK_CLIPED_MV((last_mv
[ref_mv_xy
+ref_mv_stride
][0]*ref_mv_scale
+ (1<<15))>>16,
1091 (last_mv
[ref_mv_xy
+ref_mv_stride
][1]*ref_mv_scale
+ (1<<15))>>16)
1094 dmin
= diamond_search(s
, best
, dmin
, src_index
, ref_index
, penalty_factor
, size
, h
, flags
);
1099 // printf("%d %d %d \n", best[0], best[1], dmin);