1 /* ***** BEGIN LICENSE BLOCK *****
5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
7 * The contents of this file are subject to the Mozilla Public License
8 * Version 1.1 (the "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 * http://www.mozilla.org/MPL/
12 * Software distributed under the License is distributed on an "AS IS" basis,
13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
14 * the specific language governing rights and limitations under the License.
16 * The Original Code is BBC Research and Development code.
18 * The Initial Developer of the Original Code is the British Broadcasting
20 * Portions created by the Initial Developer are Copyright (C) 2004.
21 * All Rights Reserved.
23 * Contributor(s): Anuradha Suraparaju (Original Author)
25 * Alternatively, the contents of this file may be used under the terms of
26 * the GNU General Public License Version 2 (the "GPL"), or the GNU Lesser
27 * Public License Version 2.1 (the "LGPL"), in which case the provisions of
28 * the GPL or the LGPL are applicable instead of those above. If you wish to
29 * allow use of your version of this file only under the terms of the either
30 * the GPL or LGPL and not to allow others to use your version of this file
31 * under the MPL, indicate your decision by deleting the provisions above
32 * and replace them with the notice and other provisions required by the GPL
33 * or LGPL. If you do not delete the provisions above, a recipient may use
34 * your version of this file under the terms of any one of the MPL, the GPL
36 * ***** END LICENSE BLOCK ***** */
40 #include <libdirac_common/mot_comp.h>
41 #include <libdirac_common/mot_comp_mmx.h>
42 #include <libdirac_common/motion.h>
43 #include <libdirac_common/dirac_assertions.h>
44 using namespace dirac
;
46 inline void check_active_columns(
47 int x
, int xmax
, ValueType act_cols1
[4],
48 ValueType act_cols2
[4], ValueType
*row1
, ValueType
*row2
)
50 // check if we need any clipping
51 if (x
>= 0 && (x
+3) < xmax
) {
52 // special case, nothing to do
53 memcpy(act_cols1
, &row1
[x
], 4 * sizeof(ValueType
));
54 memcpy(act_cols2
, &row2
[x
], 4 * sizeof(ValueType
));
58 act_cols1
[0] = row1
[BChk(x
,xmax
)];
59 act_cols2
[0] = row2
[BChk(x
,xmax
)];
60 act_cols1
[1] = row1
[BChk(x
+1,xmax
)];
61 act_cols2
[1] = row2
[BChk(x
+1,xmax
)];
62 act_cols1
[2] = row1
[BChk(x
+2,xmax
)];
63 act_cols2
[2] = row2
[BChk(x
+2,xmax
)];
64 act_cols1
[3] = row1
[BChk(x
+3,xmax
)];
65 act_cols2
[3] = row2
[BChk(x
+3,xmax
)];
69 void MotionCompensator_QuarterPixel::BlockPixelPred(
70 TwoDArray
<ValueType
> &block_data
,
71 const ImageCoords
& pos
,
72 const ImageCoords
& orig_pic_size
,
73 const PicArray
&refup_data
,
76 // Set up the start point in the reference image by rounding the motion vector
77 // to 1/2 pel accuracy.NB: bit shift rounds negative values DOWN, as required
78 const MVector
roundvec( mv
.x
>>1 , mv
.y
>>1 );
80 //Get the remainder after rounding. NB rmdr values always 0 or 1
81 const MVector
rmdr( mv
.x
& 1 , mv
.y
& 1 );
83 //Where to start in the upconverted image
84 const ImageCoords
start_pos( std::max(pos
.x
,0) , std::max(pos
.y
,0) );
85 // check that we are doing MC within true pic boundaries
86 if (start_pos
.x
>= orig_pic_size
.x
|| start_pos
.y
>= orig_pic_size
.y
)
88 const ImageCoords
ref_start( ( start_pos
.x
<<1 ) + roundvec
.x
,( start_pos
.y
<<1 ) + roundvec
.y
);
90 //An additional stage to make sure the block to be copied does not fall outside
91 //the reference image.
92 const int refXlen
= refup_data
.LengthX();
93 const int trueRefXlen
= (orig_pic_size
.x
<< 1) - 1;
94 const int trueRefYlen
= (orig_pic_size
.y
<< 1) - 1;
96 ValueType
*block_curr
= &block_data
[0][0];
98 bool do_bounds_checking
= false;
99 //Check if there are going to be any problems copying the block from
100 //the upvconverted reference image.
101 if( ref_start
.x
< 0 )
102 do_bounds_checking
= true;
103 else if( ref_start
.x
+ (block_data
.LengthX()<<1 ) >= trueRefXlen
)
104 do_bounds_checking
= true;
105 if( ref_start
.y
< 0 )
106 do_bounds_checking
= true;
107 else if( ref_start
.y
+ (block_data
.LengthY()<<1 ) >= trueRefYlen
)
108 do_bounds_checking
= true;
110 if( !do_bounds_checking
)
112 int stopX
= (block_data
.LengthX()>>2)<<2;
113 ValueType
*refup_curr
= &refup_data
[ref_start
.y
][ref_start
.x
];
114 const int refup_next( ( refXlen
- block_data
.LengthX() )*2 ); //go down 2 rows and back to beginning of block line
115 if( rmdr
.x
== 0 && rmdr
.y
== 0 )
118 for( int y
=0; y
< block_data
.LengthY(); ++y
, refup_curr
+=refup_next
)
121 for( x
=0; x
< stopX
; x
+=4, block_curr
+=4, refup_curr
+=8 )
123 m1
= _mm_unpacklo_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
124 m2
= _mm_unpackhi_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
125 // *block_curr = refup_curr[0]
126 *(__m64
*)block_curr
= _mm_unpacklo_pi16 (m1
, m2
);
128 // Mopup the last value
129 for ( x
=stopX
; x
< block_data
.LengthX(); ++x
)
131 *block_curr
= *refup_curr
;
138 else if( rmdr
.y
== 0 )
140 __m64 round
= _mm_set_pi16 (1, 1, 1, 1);
143 for( int y
=0; y
< block_data
.LengthY(); ++y
, refup_curr
+=refup_next
)
146 for( x
=0; x
< stopX
; x
+=4, block_curr
+=4, refup_curr
+=8 )
148 m1
= _mm_unpacklo_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
149 m3
= _mm_unpackhi_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
150 m2
= _mm_unpackhi_pi16 (m1
, m3
);
151 m1
= _mm_unpacklo_pi16 (m1
, m3
);
153 // (refup_curr[0] + refup_curr[1] + 1)>>1
154 m1
= _mm_add_pi16 (m1
, m2
);
155 m1
= _mm_add_pi16 (m1
, round
);
156 *(__m64
*)block_curr
= _mm_srai_pi16 (m1
, 1);
159 // Mopup the last value
160 for ( x
=stopX
; x
< block_data
.LengthX(); ++x
)
162 *block_curr
= (( *refup_curr
+
171 else if( rmdr
.x
== 0 )
173 __m64 round
= _mm_set_pi16 (1, 1, 1, 1);
175 for( int y
=0; y
< block_data
.LengthY(); ++y
, refup_curr
+=refup_next
)
178 for( x
= 0; x
< stopX
; x
+=4, block_curr
+=4, refup_curr
+=8 )
180 m1
= _mm_unpacklo_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
181 m2
= _mm_unpackhi_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
182 // m1 now contains r00 r02 r04 r06
183 m1
= _mm_unpacklo_pi16 (m1
, m2
);
185 m3
= _mm_unpacklo_pi16 (*(__m64
*)(refup_curr
+refXlen
), *(__m64
*)(refup_curr
+refXlen
+4));
186 m2
= _mm_unpackhi_pi16 (*(__m64
*)(refup_curr
+refXlen
), *(__m64
*)(refup_curr
+refXlen
+4));
187 // m1 now contains r10 r12 r14 r16
188 m2
= _mm_unpacklo_pi16 (m3
, m2
);
190 // (refup_curr[0] + (refup_curr+refXlen)[0] + 1)>>1
191 m1
= _mm_add_pi16 (m1
, m2
);
192 m1
= _mm_add_pi16 (m1
, round
);
193 *(__m64
*)block_curr
= _mm_srai_pi16 (m1
, 1);
195 for ( x
=stopX
; x
< block_data
.LengthX(); ++x
)
197 *block_curr
= (( *refup_curr
+ *(refup_curr
+refXlen
) +
208 __m64 round
= _mm_set_pi16 (2, 2, 2, 2);
210 for( int y
=0; y
< block_data
.LengthY(); ++y
, refup_curr
+=refup_next
)
213 for( x
= 0; x
< stopX
; x
+=4, block_curr
+=4, refup_curr
+=8 )
215 m1
= _mm_add_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+refXlen
));
216 m2
= _mm_add_pi16 (*(__m64
*)(refup_curr
+4), *(__m64
*)(refup_curr
+refXlen
+4));
217 m3
= _mm_unpacklo_pi16 (m1
, m2
);
218 m1
= _mm_unpackhi_pi16 (m1
, m2
);
220 m2
= _mm_unpackhi_pi16 (m3
, m1
);
221 m1
= _mm_unpacklo_pi16 (m3
, m1
);
223 m1
= _mm_add_pi16 (m1
, m2
);
224 m1
= _mm_add_pi16 (m1
, round
);
225 *(__m64
*)block_curr
= _mm_srai_pi16 (m1
, 2);
227 for ( x
=stopX
; x
< block_data
.LengthX(); ++x
)
229 *block_curr
= (( *refup_curr
+
231 *(refup_curr
+refXlen
) +
232 *(refup_curr
+refXlen
+1) +
244 // We're 2doing bounds checking because we'll fall off the edge of the reference otherwise.
246 //weights for doing linear interpolation, calculated from the remainder values
247 const ValueType linear_wts
[4] = { (2 - rmdr
.x
) * (2 - rmdr
.y
), //tl
248 rmdr
.x
* (2 - rmdr
.y
), //tr
249 (2 - rmdr
.x
) * rmdr
.y
, //bl
250 rmdr
.x
* rmdr
.y
}; //br
252 ValueType act_cols1
[4], act_cols2
[4];
254 for(c
= 0, uY
= ref_start
.y
; c
< block_data
.LengthY(); ++c
, uY
+= 2)
256 for(l
= 0, uX
=ref_start
.x
; l
< block_data
.LengthX(); ++l
, ++block_curr
, uX
+= 2)
258 check_active_columns(uX
, trueRefXlen
, act_cols1
, act_cols2
, refup_data
[BChk(uY
, trueRefYlen
)], refup_data
[BChk(uY
+1, trueRefYlen
)]);
260 *block_curr
= (( linear_wts
[0] * act_cols1
[0] +
261 linear_wts
[1] * act_cols1
[1] +
262 linear_wts
[2] * act_cols2
[0] +
263 linear_wts
[3] * act_cols2
[1] +
271 void MotionCompensator_HalfPixel::BlockPixelPred(
272 TwoDArray
<ValueType
> &block_data
,
273 const ImageCoords
& pos
,
274 const ImageCoords
& orig_pic_size
,
275 const PicArray
&refup_data
,
278 //Where to start in the upconverted image
279 const ImageCoords
start_pos( std::max(pos
.x
,0) , std::max(pos
.y
,0) );
280 const ImageCoords
ref_start( ( start_pos
.x
<<1 ) + mv
.x
,( start_pos
.y
<<1 ) + mv
.y
);
282 //An additional stage to make sure the block to be copied does not fall outside
283 //the reference image.
284 const int refXlen
= refup_data
.LengthX();
285 //const int refYlen = refup_data.LengthY();
286 const int trueRefXlen
= (orig_pic_size
.x
<< 1) - 1;
287 const int trueRefYlen
= (orig_pic_size
.y
<< 1) - 1;
289 bool do_bounds_checking
= false;
291 //Check if there are going to be any problems copying the block from
292 //the upvconverted reference image.
294 if( ref_start
.x
< 0 )
295 do_bounds_checking
= true;
296 else if( ref_start
.x
+ ((block_data
.LengthX() - 1 )<<1 ) >= trueRefXlen
)
297 do_bounds_checking
= true;
298 if( ref_start
.y
< 0 )
299 do_bounds_checking
= true;
300 else if( ref_start
.y
+ ((block_data
.LengthY() - 1 )<<1 ) >= trueRefYlen
)
301 do_bounds_checking
= true;
303 ValueType
*block_curr
= &block_data
[0][0];
305 if( !do_bounds_checking
)
307 ValueType
*refup_curr
= &refup_data
[ref_start
.y
][ref_start
.x
];
308 const int refup_next( (refXlen
- block_data
.LengthX())*2 );// go down 2 rows and back up
310 int stopX
= (block_data
.LengthX()>>2)<<2;
314 for( int y
=0; y
< block_data
.LengthY(); ++y
, refup_curr
+=refup_next
)
317 for( x
=0; x
< stopX
; x
+=4, block_curr
+=4, refup_curr
+=8 )
319 m1
= _mm_unpacklo_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
320 m2
= _mm_unpackhi_pi16 (*(__m64
*)refup_curr
, *(__m64
*)(refup_curr
+4));
321 *(__m64
*)block_curr
= _mm_unpacklo_pi16 (m1
, m2
);
323 // Mopup the last value
324 for ( x
=stopX
; x
< block_data
.LengthX(); ++x
)
326 *block_curr
= *refup_curr
;
335 for( int y
=0; y
< block_data
.LengthY(); ++y
, refup_curr
+=refup_next
)
337 for( int x
=0; x
< block_data
.LengthX(); ++x
, ++block_curr
, refup_curr
+=2 )
339 *block_curr
= refup_curr
[0];
346 // We're doing bounds checking because we'll fall off the edge of the reference otherwise.
347 for( int y
=0, ry
=ref_start
.y
, by
=BChk(ry
,trueRefYlen
);
348 y
<block_data
.LengthY();
349 ++y
, ry
+=2,by
=BChk(ry
,trueRefYlen
))
351 for( int x
=0 , rx
=ref_start
.x
, bx
=BChk(rx
,trueRefXlen
);
352 x
<block_data
.LengthX() ;
353 ++x
, ++block_curr
, rx
+=2 , bx
=BChk(rx
,trueRefXlen
))
355 *block_curr
= refup_data
[by
][bx
];
361 void MotionCompensator::AdjustBlockBySpatialWeights (
362 TwoDArray
<ValueType
>& val_block
,
363 const ImageCoords
&pos
,
364 const TwoDArray
<ValueType
> &wt_array
)
366 ImageCoords
start_pos (std::max(0, pos
.x
), std::max(0, pos
.y
));
367 ImageCoords
wt_start (start_pos
.x
- pos
.x
, start_pos
.y
- pos
.y
);
369 ValueType
*val_curr
= &val_block
[0][0];
370 ValueType
*wt_curr
= &wt_array
[wt_start
.y
][wt_start
.x
];
372 // go down at row and back to beginning of weights line
373 const int wt_next
= wt_array
.LengthX() - val_block
.LengthX();
375 const int stopX
= (val_block
.LengthX()>>2)<<2;
377 for ( int j
= 0; j
< val_block
.LengthY(); ++j
, wt_curr
+= wt_next
)
379 for ( int i
= 0; i
< stopX
; i
+=4, val_curr
+=4, wt_curr
+=4)
382 * NOTE: Using only the low 16 bits of the result of multiplication
383 * by weights because the result is supposed to fit in 16 bit
384 * words. For some weights could result in overflow and errors
386 __m64
*out
= (__m64
*)val_curr
;
387 *out
= _mm_mullo_pi16 (*(__m64
*)val_curr
, *(__m64
*)wt_curr
);
389 for (int i
= stopX
; i
< val_block
.LengthX(); ++i
, ++val_curr
, ++wt_curr
)
391 *val_curr
= *val_curr
* *wt_curr
;
399 void CompensateComponentAddAndShift_mmx (int start_y
, int end_y
,
401 const ImageCoords
& orig_pic_size
,
402 TwoDArray
<ValueType
> &comp_data
,
403 PicArray
&pic_data_out
)
405 if (start_y
>= end_y
)
407 const int round_val
= 1<<(weight_bits
-1);
408 int stopX
= pic_data_out
.FirstX() + ((orig_pic_size
.x
>>2)<<2);
409 int x_end_truepic_data
= pic_data_out
.FirstX() + orig_pic_size
.x
;
410 int x_end_data
= pic_data_out
.FirstX() + pic_data_out
.LengthX();
411 __m64 mround_val
= _mm_set_pi16 (round_val
, round_val
, round_val
, round_val
);
412 ValueType
*pic_row
= &comp_data
[0][comp_data
.FirstX()];
413 ValueType
*out_row
= &pic_data_out
[start_y
][pic_data_out
.FirstX()];
414 for ( int i
= start_y
; i
< end_y
; i
++)
416 for ( int j
= pic_data_out
.FirstX(); j
< stopX
; j
+=4)
418 __m64 in1
= _mm_add_pi16 (*(__m64
*)pic_row
, mround_val
);
419 in1
= _mm_srai_pi16 (in1
, weight_bits
);
420 __m64
*out
= (__m64
*)out_row
;
421 *out
= _mm_add_pi16 (in1
, *out
);
425 for ( int j
=stopX
; j
< x_end_truepic_data
; j
++)
427 *out_row
+= static_cast<ValueType
>( (*pic_row
+ round_val
) >> weight_bits
);
431 // Now pad past the true picture with the last true pic val in
433 ValueType last_true_val
= *(out_row
- 1);
434 for ( int j
= x_end_truepic_data
; j
< x_end_data
; ++j
)
436 *out_row
= last_true_val
;
444 void AddMCBlock_mmx (const ImageCoords
& start_pos
,
445 TwoDArray
<ValueType
> &comp_strip
,
446 TwoDArray
<ValueType
>& block_data
)
448 const int stopX
= (block_data
.LengthX()>>2)<<2;
450 const int comp_next
= comp_strip
.LengthX()-block_data
.LengthX();
451 ValueType
*comp_curr
= &comp_strip
[start_pos
.y
][start_pos
.x
];
452 ValueType
*block_curr
= &block_data
[0][0];
454 for (int j
= 0; j
< block_data
.LengthY(); ++j
, comp_curr
+= comp_next
)
456 for (int i
= 0; i
< stopX
; i
+=4, comp_curr
+=4, block_curr
+=4)
458 __m64
*out
= (__m64
*)comp_curr
;
459 // mc_tmp[y][x] += val
460 *out
= _mm_add_pi16 (*(__m64
*)comp_curr
, *(__m64
*)block_curr
);
462 for (int i
= stopX
; i
< block_data
.LengthX(); ++i
, ++comp_curr
, ++block_curr
)
464 *comp_curr
+= *block_curr
;