vp8/encoder/ssim.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_scale/yv12config.h"
  13 #include "math.h"
  14 #include "onyx_int.h"
  15
  16 #if CONFIG_RUNTIME_CPU_DETECT
  17 #define IF_RTCD(x)  (x)
  18 #else
  19 #define IF_RTCD(x)  NULL
  20 #endif
  21 // Google version of SSIM
  22 // SSIM
  23 #define KERNEL 3
  24 #define KERNEL_SIZE  (2 * KERNEL + 1)
  25
  26 typedef unsigned char uint8;
  27 typedef unsigned int uint32;
  28
  29 static const int K[KERNEL_SIZE] =
  30 {
  31     1, 4, 11, 16, 11, 4, 1    // 16 * exp(-0.3 * i * i)
  32 };
  33 static const double ki_w = 1. / 2304.;  // 1 / sum(i:0..6, j..6) K[i]*K[j]
  34 double get_ssimg(const uint8 *org, const uint8 *rec,
  35                  int xo, int yo, int W, int H,
  36                  const int stride1, const int stride2
  37                 )
  38 {
  39     // TODO(skal): use summed tables
  40     int y, x;
  41
  42     const int ymin = (yo - KERNEL < 0) ? 0 : yo - KERNEL;
  43     const int ymax = (yo + KERNEL > H - 1) ? H - 1 : yo + KERNEL;
  44     const int xmin = (xo - KERNEL < 0) ? 0 : xo - KERNEL;
  45     const int xmax = (xo + KERNEL > W - 1) ? W - 1 : xo + KERNEL;
  46     // worst case of accumulation is a weight of 48 = 16 + 2 * (11 + 4 + 1)
  47     // with a diff of 255, squares. That would a max error of 0x8ee0900,
  48     // which fits into 32 bits integers.
  49     uint32 w = 0, xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
  50     org += ymin * stride1;
  51     rec += ymin * stride2;
  52
  53     for (y = ymin; y <= ymax; ++y, org += stride1, rec += stride2)
  54     {
  55         const int Wy = K[KERNEL + y - yo];
  56
  57         for (x = xmin; x <= xmax; ++x)
  58         {
  59             const  int Wxy = Wy * K[KERNEL + x - xo];
  60             // TODO(skal): inlined assembly
  61             w   += Wxy;
  62             xm  += Wxy * org[x];
  63             ym  += Wxy * rec[x];
  64             xxm += Wxy * org[x] * org[x];
  65             xym += Wxy * org[x] * rec[x];
  66             yym += Wxy * rec[x] * rec[x];
  67         }
  68     }
  69
  70     {
  71         const double iw = 1. / w;
  72         const double iwx = xm * iw;
  73         const double iwy = ym * iw;
  74         double sxx = xxm * iw - iwx * iwx;
  75         double syy = yym * iw - iwy * iwy;
  76
  77         // small errors are possible, due to rounding. Clamp to zero.
  78         if (sxx < 0.) sxx = 0.;
  79
  80         if (syy < 0.) syy = 0.;
  81
  82         {
  83             const double sxsy = sqrt(sxx * syy);
  84             const double sxy = xym * iw - iwx * iwy;
  85             static const double C11 = (0.01 * 0.01) * (255 * 255);
  86             static const double C22 = (0.03 * 0.03) * (255 * 255);
  87             static const double C33 = (0.015 * 0.015) * (255 * 255);
  88             const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11);
  89             const double c = (2. * sxsy      + C22) / (sxx + syy + C22);
  90
  91             const double s = (sxy + C33) / (sxsy + C33);
  92             return l * c * s;
  93
  94         }
  95     }
  96
  97 }
  98
  99 double get_ssimfull_kernelg(const uint8 *org, const uint8 *rec,
 100                             int xo, int yo, int W, int H,
 101                             const int stride1, const int stride2)
 102 {
 103     // TODO(skal): use summed tables
 104     // worst case of accumulation is a weight of 48 = 16 + 2 * (11 + 4 + 1)
 105     // with a diff of 255, squares. That would a max error of 0x8ee0900,
 106     // which fits into 32 bits integers.
 107     int y_, x_;
 108     uint32 xm = 0, ym = 0, xxm = 0, xym = 0, yym = 0;
 109     org += (yo - KERNEL) * stride1;
 110     org += (xo - KERNEL);
 111     rec += (yo - KERNEL) * stride2;
 112     rec += (xo - KERNEL);
 113
 114     for (y_ = 0; y_ < KERNEL_SIZE; ++y_, org += stride1, rec += stride2)
 115     {
 116         const int Wy = K[y_];
 117
 118         for (x_ = 0; x_ < KERNEL_SIZE; ++x_)
 119         {
 120             const int Wxy = Wy * K[x_];
 121             // TODO(skal): inlined assembly
 122             const int org_x = org[x_];
 123             const int rec_x = rec[x_];
 124             xm  += Wxy * org_x;
 125             ym  += Wxy * rec_x;
 126             xxm += Wxy * org_x * org_x;
 127             xym += Wxy * org_x * rec_x;
 128             yym += Wxy * rec_x * rec_x;
 129         }
 130     }
 131
 132     {
 133         const double iw = ki_w;
 134         const double iwx = xm * iw;
 135         const double iwy = ym * iw;
 136         double sxx = xxm * iw - iwx * iwx;
 137         double syy = yym * iw - iwy * iwy;
 138
 139         // small errors are possible, due to rounding. Clamp to zero.
 140         if (sxx < 0.) sxx = 0.;
 141
 142         if (syy < 0.) syy = 0.;
 143
 144         {
 145             const double sxsy = sqrt(sxx * syy);
 146             const double sxy = xym * iw - iwx * iwy;
 147             static const double C11 = (0.01 * 0.01) * (255 * 255);
 148             static const double C22 = (0.03 * 0.03) * (255 * 255);
 149             static const double C33 = (0.015 * 0.015) * (255 * 255);
 150             const double l = (2. * iwx * iwy + C11) / (iwx * iwx + iwy * iwy + C11);
 151             const double c = (2. * sxsy      + C22) / (sxx + syy + C22);
 152             const double s = (sxy + C33) / (sxsy + C33);
 153             return l * c * s;
 154         }
 155     }
 156 }
 157
 158 double calc_ssimg(const uint8 *org, const uint8 *rec,
 159                   const int image_width, const int image_height,
 160                   const int stride1, const int stride2
 161                  )
 162 {
 163     int j, i;
 164     double SSIM = 0.;
 165
 166     for (j = 0; j < KERNEL; ++j)
 167     {
 168         for (i = 0; i < image_width; ++i)
 169         {
 170             SSIM += get_ssimg(org, rec, i, j, image_width, image_height, stride1, stride2);
 171         }
 172     }
 173
 174     for (j = KERNEL; j < image_height - KERNEL; ++j)
 175     {
 176         for (i = 0; i < KERNEL; ++i)
 177         {
 178             SSIM += get_ssimg(org, rec, i, j, image_width, image_height, stride1, stride2);
 179         }
 180
 181         for (i = KERNEL; i < image_width - KERNEL; ++i)
 182         {
 183             SSIM += get_ssimfull_kernelg(org, rec, i, j,
 184                                          image_width, image_height, stride1, stride2);
 185         }
 186
 187         for (i = image_width - KERNEL; i < image_width; ++i)
 188         {
 189             SSIM += get_ssimg(org, rec, i, j, image_width, image_height, stride1, stride2);
 190         }
 191     }
 192
 193     for (j = image_height - KERNEL; j < image_height; ++j)
 194     {
 195         for (i = 0; i < image_width; ++i)
 196         {
 197             SSIM += get_ssimg(org, rec, i, j, image_width, image_height, stride1, stride2);
 198         }
 199     }
 200
 201     return SSIM;
 202 }
 203
 204
 205 double vp8_calc_ssimg
 206 (
 207     YV12_BUFFER_CONFIG *source,
 208     YV12_BUFFER_CONFIG *dest,
 209     double *ssim_y,
 210     double *ssim_u,
 211     double *ssim_v
 212 )
 213 {
 214     double ssim_all = 0;
 215     int ysize  = source->y_width * source->y_height;
 216     int uvsize = ysize / 4;
 217
 218     *ssim_y = calc_ssimg(source->y_buffer, dest->y_buffer,
 219                          source->y_width, source->y_height,
 220                          source->y_stride, dest->y_stride);
 221
 222
 223     *ssim_u = calc_ssimg(source->u_buffer, dest->u_buffer,
 224                          source->uv_width, source->uv_height,
 225                          source->uv_stride, dest->uv_stride);
 226
 227
 228     *ssim_v = calc_ssimg(source->v_buffer, dest->v_buffer,
 229                          source->uv_width, source->uv_height,
 230                          source->uv_stride, dest->uv_stride);
 231
 232     ssim_all = (*ssim_y + *ssim_u + *ssim_v) / (ysize + uvsize + uvsize);
 233     *ssim_y /= ysize;
 234     *ssim_u /= uvsize;
 235     *ssim_v /= uvsize;
 236     return ssim_all;
 237 }
 238
 239
 240 void ssim_parms_c
 241 (
 242     unsigned char *s,
 243     int sp,
 244     unsigned char *r,
 245     int rp,
 246     unsigned long *sum_s,
 247     unsigned long *sum_r,
 248     unsigned long *sum_sq_s,
 249     unsigned long *sum_sq_r,
 250     unsigned long *sum_sxr
 251 )
 252 {
 253     int i,j;
 254     for(i=0;i<16;i++,s+=sp,r+=rp)
 255      {
 256          for(j=0;j<16;j++)
 257          {
 258              *sum_s += s[j];
 259              *sum_r += r[j];
 260              *sum_sq_s += s[j] * s[j];
 261              *sum_sq_r += r[j] * r[j];
 262              *sum_sxr += s[j] * r[j];
 263          }
 264      }
 265 }
 266 void ssim_parms_8x8_c
 267 (
 268     unsigned char *s,
 269     int sp,
 270     unsigned char *r,
 271     int rp,
 272     unsigned long *sum_s,
 273     unsigned long *sum_r,
 274     unsigned long *sum_sq_s,
 275     unsigned long *sum_sq_r,
 276     unsigned long *sum_sxr
 277 )
 278 {
 279     int i,j;
 280     for(i=0;i<8;i++,s+=sp,r+=rp)
 281      {
 282          for(j=0;j<8;j++)
 283          {
 284              *sum_s += s[j];
 285              *sum_r += r[j];
 286              *sum_sq_s += s[j] * s[j];
 287              *sum_sq_r += r[j] * r[j];
 288              *sum_sxr += s[j] * r[j];
 289          }
 290      }
 291 }
 292
 293 const static long long c1 =  426148; // (256^2*(.01*255)^2
 294 const static long long c2 = 3835331; //(256^2*(.03*255)^2
 295
 296 static double similarity
 297 (
 298     unsigned long sum_s,
 299     unsigned long sum_r,
 300     unsigned long sum_sq_s,
 301     unsigned long sum_sq_r,
 302     unsigned long sum_sxr,
 303     int count
 304 )
 305 {
 306     long long ssim_n = (2*sum_s*sum_r+ c1)*(2*count*sum_sxr-2*sum_s*sum_r+c2);
 307
 308     long long ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
 309             (count*sum_sq_s-sum_s*sum_s + count*sum_sq_r-sum_r*sum_r +c2) ;
 310
 311     return ssim_n * 1.0 / ssim_d;
 312 }
 313
 314 static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
 315             const vp8_variance_rtcd_vtable_t *rtcd)
 316 {
 317     unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
 318     rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
 319     return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
 320 }
 321 static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
 322                 const vp8_variance_rtcd_vtable_t *rtcd)
 323 {
 324     unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
 325     rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
 326     return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
 327 }
 328
 329 // TODO: (jbb) tried to scale this function such that we may be able to use it
 330 // for distortion metric in mode selection code ( provided we do a reconstruction)
 331 long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
 332            const vp8_variance_rtcd_vtable_t *rtcd)
 333 {
 334     unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
 335     double ssim3;
 336     long long ssim_n;
 337     long long ssim_d;
 338
 339     rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
 340     ssim_n = (2*sum_s*sum_r+ c1)*(2*256*sum_sxr-2*sum_s*sum_r+c2);
 341
 342     ssim_d = (sum_s*sum_s +sum_r*sum_r+c1)*
 343             (256*sum_sq_s-sum_s*sum_s + 256*sum_sq_r-sum_r*sum_r +c2) ;
 344
 345     ssim3 = 256 * (ssim_d-ssim_n) / ssim_d;
 346     return (long)( 256*ssim3 * ssim3 );
 347 }
 348 // TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
 349 // such that the window regions overlap block boundaries to penalize blocking
 350 // artifacts.
 351
 352 double vp8_ssim2
 353 (
 354     unsigned char *img1,
 355     unsigned char *img2,
 356     int stride_img1,
 357     int stride_img2,
 358     int width,
 359     int height,
 360     const vp8_variance_rtcd_vtable_t *rtcd
 361 )
 362 {
 363     int i,j;
 364
 365     double ssim_total=0;
 366
 367     // we can sample points as frequently as we like start with 1 per 8x8
 368     for(i=0; i < height; i+=8, img1 += stride_img1*8, img2 += stride_img2*8)
 369     {
 370         for(j=0; j < width; j+=8 )
 371         {
 372             ssim_total += ssim_8x8(img1, stride_img1, img2, stride_img2, rtcd);
 373         }
 374     }
 375     ssim_total /= (width/8 * height /8);
 376     return ssim_total;
 377
 378 }
 379 double vp8_calc_ssim
 380 (
 381     YV12_BUFFER_CONFIG *source,
 382     YV12_BUFFER_CONFIG *dest,
 383     int lumamask,
 384     double *weight,
 385     const vp8_variance_rtcd_vtable_t *rtcd
 386 )
 387 {
 388     double a, b, c;
 389     double ssimv;
 390
 391     a = vp8_ssim2(source->y_buffer, dest->y_buffer,
 392                  source->y_stride, dest->y_stride, source->y_width,
 393                  source->y_height, rtcd);
 394
 395     b = vp8_ssim2(source->u_buffer, dest->u_buffer,
 396                  source->uv_stride, dest->uv_stride, source->uv_width,
 397                  source->uv_height, rtcd);
 398
 399     c = vp8_ssim2(source->v_buffer, dest->v_buffer,
 400                  source->uv_stride, dest->uv_stride, source->uv_width,
 401                  source->uv_height, rtcd);
 402
 403     ssimv = a * .8 + .1 * (b + c);
 404
 405     *weight = 1;
 406
 407     return ssimv;
 408 }