mpeg2enc/mblock_sub44_sads_x86_h.c

   1 /*
   2  *
   3  * mblock_sub44_sads_x86_h.c
   4  * Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk>
   5  *
   6  * Fast block sum-absolute difference computation for a rectangular area 4*x
   7  * by y where y > h against a 4 by h block.
   8  *
   9  * Used for 4*4 sub-sampled motion compensation calculations.
  10  *
  11  *
  12  * This file is part of mpeg2enc, a free MPEG-2 video stream encoder
  13  * based on the original MSSG reference design
  14  *
  15  * mpeg2enc is free software; you can redistribute new parts
  16  * and/or modify under the terms of the GNU General Public License
  17  * as published by
  18  * the Free Software Foundation; either version 2 of the License, or
  19  * (at your option) any later version.
  20  *
  21  * mpeg2enc is distributed in the hope that it will be useful,
  22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  24  * GNU General Public License for more details.
  25  *
  26  * See the files for those sections (c) MSSG
  27  *
  28  * You should have received a copy of the GNU General Public License
  29  * along with this program; if not, write to the Free Software
  30  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  31  */
  32
  33 /*
  34  *
  35  * Generates a vector sad's for 4*4 sub-sampled pel (qpel) data (with
  36  * co-ordinates and top-left qpel address) from specified rectangle
  37  * against a specified 16*h pel (4*4 qpel) reference block.  The
  38  * generated vector contains results only for those sad's that fall
  39  * below twice the running best sad and are aligned on 8-pel
  40  * boundaries
  41  *
  42  * Invariant: blk points to top-left sub-sampled pel for macroblock
  43  * at (ilow,ihigh)
  44  * i{low,high) j(low,high) must be multiples of 4.
  45  *
  46  * sad = Sum Absolute Differences
  47  *
  48  * NOTES: for best efficiency i{low,high) should be multiples of 16.
  49  *
  50  * */
  51
  52 int SIMD_SUFFIX(mblock_sub44_dists)( uint8_t *blk,  uint8_t *ref,
  53                                                                          int ilow,int jlow,
  54                                                                          int ihigh, int jhigh,
  55                                                                          int h, int rowstride,
  56                                                                          int threshold,
  57                                                                          mc_result_s *resvec)
  58 {
  59         int32_t x,y;
  60         uint8_t *currowblk = blk;
  61         uint8_t *curblk;
  62         mc_result_s *cres = resvec;
  63         int      gridrowstride = (rowstride);
  64
  65         for( y=jlow; y <= jhigh ; y+=4)
  66         {
  67                 curblk = currowblk;
  68                 for( x = ilow; x <= ihigh; x += 4)
  69                 {
  70                         int weight;
  71                         if( (x & 15) == (ilow & 15) )
  72                         {
  73                                 load_blk( curblk, rowstride, h );
  74                         }
  75                         weight = SIMD_SUFFIX(qblock_sad)(ref, h, rowstride);
  76                         if( weight <= threshold )
  77                         {
  78                                 threshold = intmin(weight<<2,threshold);
  79                                 /* Rough and-ready absolute distance penalty */
  80                                 /* NOTE: This penalty is *vital* to correct operation
  81                                    as otherwise the sub-mean filtering won't work on very
  82                                    uniform images.
  83                                  */
  84                                 cres->weight = (uint16_t)weight+((intabs(x)+intabs(y))>>3);
  85                                 cres->x = (uint8_t)x;
  86                                 cres->y = (uint8_t)y;
  87                                 ++cres;
  88                         }
  89                         curblk += 1;
  90                         shift_blk(8);
  91                 }
  92                 currowblk += gridrowstride;
  93         }
  94         emms();
  95         return cres - resvec;
  96 }
  97
  98 #undef concat