2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vpx_scale/yv12config.h"
15 #define C1 (float)(64 * 64 * 0.01*255*0.01*255)
16 #define C2 (float)(64 * 64 * 0.03*255*0.03*255)
26 static double plane_summed_weights
= 0;
28 static short img12_sum_block
[8*4096*4096*2] ;
30 static short img1_sum
[8*4096*2];
31 static short img2_sum
[8*4096*2];
32 static int img1_sq_sum
[8*4096*2];
33 static int img2_sq_sum
[8*4096*2];
34 static int img12_mul_sum
[8*4096*2];
46 int mu_x2
, mu_y2
, mu_xy
, theta_x2
, theta_y2
, theta_xy
;
52 theta_x2
= 64 * pre_mu_x2
- mu_x2
;
53 theta_y2
= 64 * pre_mu_y2
- mu_y2
;
54 theta_xy
= 64 * pre_mu_xy2
- mu_xy
;
56 return (2 * mu_xy
+ C1
) * (2 * theta_xy
+ C2
) / ((mu_x2
+ mu_y2
+ C1
) * (theta_x2
+ theta_y2
+ C2
));
61 const unsigned char *img1
,
62 const unsigned char *img2
,
69 int x
, y
, x2
, y2
, img1_block
, img2_block
, img1_sq_block
, img2_sq_block
, img12_mul_block
, temp
;
71 double plane_quality
, weight
, mean
;
73 short *img1_sum_ptr1
, *img1_sum_ptr2
;
74 short *img2_sum_ptr1
, *img2_sum_ptr2
;
75 int *img1_sq_sum_ptr1
, *img1_sq_sum_ptr2
;
76 int *img2_sq_sum_ptr1
, *img2_sq_sum_ptr2
;
77 int *img12_mul_sum_ptr1
, *img12_mul_sum_ptr2
;
82 plane_summed_weights
= 0.0f
;
84 plane_summed_weights
= (height
- 7) * (width
- 7);
86 //some prologue for the main loop
89 img1_sum_ptr1
= img1_sum
+ temp
;
90 img2_sum_ptr1
= img2_sum
+ temp
;
91 img1_sq_sum_ptr1
= img1_sq_sum
+ temp
;
92 img2_sq_sum_ptr1
= img2_sq_sum
+ temp
;
93 img12_mul_sum_ptr1
= img12_mul_sum
+ temp
;
95 for (x
= 0; x
< width
; x
++)
97 img1_sum
[x
] = img1
[x
];
98 img2_sum
[x
] = img2
[x
];
99 img1_sq_sum
[x
] = img1
[x
] * img1
[x
];
100 img2_sq_sum
[x
] = img2
[x
] * img2
[x
];
101 img12_mul_sum
[x
] = img1
[x
] * img2
[x
];
103 img1_sum_ptr1
[x
] = 0;
104 img2_sum_ptr1
[x
] = 0;
105 img1_sq_sum_ptr1
[x
] = 0;
106 img2_sq_sum_ptr1
[x
] = 0;
107 img12_mul_sum_ptr1
[x
] = 0;
111 for (y
= 1; y
< height
; y
++)
116 temp
= (y
- 1) % 9 * width
;
118 img1_sum_ptr1
= img1_sum
+ temp
;
119 img2_sum_ptr1
= img2_sum
+ temp
;
120 img1_sq_sum_ptr1
= img1_sq_sum
+ temp
;
121 img2_sq_sum_ptr1
= img2_sq_sum
+ temp
;
122 img12_mul_sum_ptr1
= img12_mul_sum
+ temp
;
124 temp
= y
% 9 * width
;
126 img1_sum_ptr2
= img1_sum
+ temp
;
127 img2_sum_ptr2
= img2_sum
+ temp
;
128 img1_sq_sum_ptr2
= img1_sq_sum
+ temp
;
129 img2_sq_sum_ptr2
= img2_sq_sum
+ temp
;
130 img12_mul_sum_ptr2
= img12_mul_sum
+ temp
;
132 for (x
= 0; x
< width
; x
++)
134 img1_sum_ptr2
[x
] = img1_sum_ptr1
[x
] + img1
[x
];
135 img2_sum_ptr2
[x
] = img2_sum_ptr1
[x
] + img2
[x
];
136 img1_sq_sum_ptr2
[x
] = img1_sq_sum_ptr1
[x
] + img1
[x
] * img1
[x
];
137 img2_sq_sum_ptr2
[x
] = img2_sq_sum_ptr1
[x
] + img2
[x
] * img2
[x
];
138 img12_mul_sum_ptr2
[x
] = img12_mul_sum_ptr1
[x
] + img1
[x
] * img2
[x
];
143 //calculate the sum of the last 8 lines by subtracting the total sum of 8 lines back from the present sum
144 temp
= (y
+ 1) % 9 * width
;
146 img1_sum_ptr1
= img1_sum
+ temp
;
147 img2_sum_ptr1
= img2_sum
+ temp
;
148 img1_sq_sum_ptr1
= img1_sq_sum
+ temp
;
149 img2_sq_sum_ptr1
= img2_sq_sum
+ temp
;
150 img12_mul_sum_ptr1
= img12_mul_sum
+ temp
;
152 for (x
= 0; x
< width
; x
++)
154 img1_sum_ptr1
[x
] = img1_sum_ptr2
[x
] - img1_sum_ptr1
[x
];
155 img2_sum_ptr1
[x
] = img2_sum_ptr2
[x
] - img2_sum_ptr1
[x
];
156 img1_sq_sum_ptr1
[x
] = img1_sq_sum_ptr2
[x
] - img1_sq_sum_ptr1
[x
];
157 img2_sq_sum_ptr1
[x
] = img2_sq_sum_ptr2
[x
] - img2_sq_sum_ptr1
[x
];
158 img12_mul_sum_ptr1
[x
] = img12_mul_sum_ptr2
[x
] - img12_mul_sum_ptr1
[x
];
161 //here we calculate the sum over the 8x8 block of pixels
162 //this is done by sliding a window across the column sums for the last 8 lines
163 //each time adding the new column sum, and subtracting the one which fell out of the window
170 //prologue, and calculation of simularity measure from the first 8 column sums
171 for (x
= 0; x
< 8; x
++)
173 img1_block
+= img1_sum_ptr1
[x
];
174 img2_block
+= img2_sum_ptr1
[x
];
175 img1_sq_block
+= img1_sq_sum_ptr1
[x
];
176 img2_sq_block
+= img2_sq_sum_ptr1
[x
];
177 img12_mul_block
+= img12_mul_sum_ptr1
[x
];
187 mean
= (img2_block
+ img1_block
) / 128.0f
;
189 if (!(y2
% 2 || x2
% 2))
190 *(img12_sum_block
+ y2
/ 2 * width_uv
+ x2
/ 2) = img2_block
+ img1_block
;
194 mean
= *(img12_sum_block
+ y2
* width_uv
+ x2
);
195 mean
+= *(img12_sum_block
+ y2
* width_uv
+ x2
+ 4);
196 mean
+= *(img12_sum_block
+ (y2
+ 4) * width_uv
+ x2
);
197 mean
+= *(img12_sum_block
+ (y2
+ 4) * width_uv
+ x2
+ 4);
202 weight
= mean
< 40 ? 0.0f
:
203 (mean
< 50 ? (mean
- 40.0f
) / 10.0f
: 1.0f
);
204 plane_summed_weights
+= weight
;
206 plane_quality
+= weight
* vp8_similarity(img1_block
, img2_block
, img1_sq_block
, img2_sq_block
, img12_mul_block
);
209 plane_quality
+= vp8_similarity(img1_block
, img2_block
, img1_sq_block
, img2_sq_block
, img12_mul_block
);
212 for (x
= 8; x
< width
; x
++)
214 img1_block
= img1_block
+ img1_sum_ptr1
[x
] - img1_sum_ptr1
[x
- 8];
215 img2_block
= img2_block
+ img2_sum_ptr1
[x
] - img2_sum_ptr1
[x
- 8];
216 img1_sq_block
= img1_sq_block
+ img1_sq_sum_ptr1
[x
] - img1_sq_sum_ptr1
[x
- 8];
217 img2_sq_block
= img2_sq_block
+ img2_sq_sum_ptr1
[x
] - img2_sq_sum_ptr1
[x
- 8];
218 img12_mul_block
= img12_mul_block
+ img12_mul_sum_ptr1
[x
] - img12_mul_sum_ptr1
[x
- 8];
227 mean
= (img2_block
+ img1_block
) / 128.0f
;
229 if (!(y2
% 2 || x2
% 2))
230 *(img12_sum_block
+ y2
/ 2 * width_uv
+ x2
/ 2) = img2_block
+ img1_block
;
234 mean
= *(img12_sum_block
+ y2
* width_uv
+ x2
);
235 mean
+= *(img12_sum_block
+ y2
* width_uv
+ x2
+ 4);
236 mean
+= *(img12_sum_block
+ (y2
+ 4) * width_uv
+ x2
);
237 mean
+= *(img12_sum_block
+ (y2
+ 4) * width_uv
+ x2
+ 4);
242 weight
= mean
< 40 ? 0.0f
:
243 (mean
< 50 ? (mean
- 40.0f
) / 10.0f
: 1.0f
);
244 plane_summed_weights
+= weight
;
246 plane_quality
+= weight
* vp8_similarity(img1_block
, img2_block
, img1_sq_block
, img2_sq_block
, img12_mul_block
);
249 plane_quality
+= vp8_similarity(img1_block
, img2_block
, img1_sq_block
, img2_sq_block
, img12_mul_block
);
254 if (plane_summed_weights
== 0)
257 return plane_quality
/ plane_summed_weights
;
262 YV12_BUFFER_CONFIG
*source
,
263 YV12_BUFFER_CONFIG
*dest
,
272 width_y
= source
->y_width
;
273 height_y
= source
->y_height
;
274 height_uv
= source
->uv_height
;
275 width_uv
= source
->uv_width
;
276 stride_uv
= dest
->uv_stride
;
277 stride
= dest
->y_stride
;
282 a
= vp8_ssim(source
->y_buffer
, dest
->y_buffer
,
283 source
->y_stride
, dest
->y_stride
, source
->y_width
, source
->y_height
);
286 frame_weight
= plane_summed_weights
/ ((width_y
- 7) * (height_y
- 7));
288 if (frame_weight
== 0)
292 b
= vp8_ssim(source
->u_buffer
, dest
->u_buffer
,
293 source
->uv_stride
, dest
->uv_stride
, source
->uv_width
, source
->uv_height
);
295 c
= vp8_ssim(source
->v_buffer
, dest
->v_buffer
,
296 source
->uv_stride
, dest
->uv_stride
, source
->uv_width
, source
->uv_height
);
299 ssimv
= a
* .8 + .1 * (b
+ c
);
301 *weight
= frame_weight
;
306 // Google version of SSIM
309 #define KERNEL_SIZE (2 * KERNEL + 1)
311 typedef unsigned char uint8
;
312 typedef unsigned int uint32
;
314 static const int K
[KERNEL_SIZE
] =
316 1, 4, 11, 16, 11, 4, 1 // 16 * exp(-0.3 * i * i)
318 static const double ki_w
= 1. / 2304.; // 1 / sum(i:0..6, j..6) K[i]*K[j]
319 double get_ssimg(const uint8
*org
, const uint8
*rec
,
320 int xo
, int yo
, int W
, int H
,
321 const int stride1
, const int stride2
324 // TODO(skal): use summed tables
327 const int ymin
= (yo
- KERNEL
< 0) ? 0 : yo
- KERNEL
;
328 const int ymax
= (yo
+ KERNEL
> H
- 1) ? H
- 1 : yo
+ KERNEL
;
329 const int xmin
= (xo
- KERNEL
< 0) ? 0 : xo
- KERNEL
;
330 const int xmax
= (xo
+ KERNEL
> W
- 1) ? W
- 1 : xo
+ KERNEL
;
331 // worst case of accumulation is a weight of 48 = 16 + 2 * (11 + 4 + 1)
332 // with a diff of 255, squares. That would a max error of 0x8ee0900,
333 // which fits into 32 bits integers.
334 uint32 w
= 0, xm
= 0, ym
= 0, xxm
= 0, xym
= 0, yym
= 0;
335 org
+= ymin
* stride1
;
336 rec
+= ymin
* stride2
;
338 for (y
= ymin
; y
<= ymax
; ++y
, org
+= stride1
, rec
+= stride2
)
340 const int Wy
= K
[KERNEL
+ y
- yo
];
342 for (x
= xmin
; x
<= xmax
; ++x
)
344 const int Wxy
= Wy
* K
[KERNEL
+ x
- xo
];
345 // TODO(skal): inlined assembly
349 xxm
+= Wxy
* org
[x
] * org
[x
];
350 xym
+= Wxy
* org
[x
] * rec
[x
];
351 yym
+= Wxy
* rec
[x
] * rec
[x
];
356 const double iw
= 1. / w
;
357 const double iwx
= xm
* iw
;
358 const double iwy
= ym
* iw
;
359 double sxx
= xxm
* iw
- iwx
* iwx
;
360 double syy
= yym
* iw
- iwy
* iwy
;
362 // small errors are possible, due to rounding. Clamp to zero.
363 if (sxx
< 0.) sxx
= 0.;
365 if (syy
< 0.) syy
= 0.;
368 const double sxsy
= sqrt(sxx
* syy
);
369 const double sxy
= xym
* iw
- iwx
* iwy
;
370 static const double C11
= (0.01 * 0.01) * (255 * 255);
371 static const double C22
= (0.03 * 0.03) * (255 * 255);
372 static const double C33
= (0.015 * 0.015) * (255 * 255);
373 const double l
= (2. * iwx
* iwy
+ C11
) / (iwx
* iwx
+ iwy
* iwy
+ C11
);
374 const double c
= (2. * sxsy
+ C22
) / (sxx
+ syy
+ C22
);
376 const double s
= (sxy
+ C33
) / (sxsy
+ C33
);
384 double get_ssimfull_kernelg(const uint8
*org
, const uint8
*rec
,
385 int xo
, int yo
, int W
, int H
,
386 const int stride1
, const int stride2
)
388 // TODO(skal): use summed tables
389 // worst case of accumulation is a weight of 48 = 16 + 2 * (11 + 4 + 1)
390 // with a diff of 255, squares. That would a max error of 0x8ee0900,
391 // which fits into 32 bits integers.
393 uint32 xm
= 0, ym
= 0, xxm
= 0, xym
= 0, yym
= 0;
394 org
+= (yo
- KERNEL
) * stride1
;
395 org
+= (xo
- KERNEL
);
396 rec
+= (yo
- KERNEL
) * stride2
;
397 rec
+= (xo
- KERNEL
);
399 for (y_
= 0; y_
< KERNEL_SIZE
; ++y_
, org
+= stride1
, rec
+= stride2
)
401 const int Wy
= K
[y_
];
403 for (x_
= 0; x_
< KERNEL_SIZE
; ++x_
)
405 const int Wxy
= Wy
* K
[x_
];
406 // TODO(skal): inlined assembly
407 const int org_x
= org
[x_
];
408 const int rec_x
= rec
[x_
];
411 xxm
+= Wxy
* org_x
* org_x
;
412 xym
+= Wxy
* org_x
* rec_x
;
413 yym
+= Wxy
* rec_x
* rec_x
;
418 const double iw
= ki_w
;
419 const double iwx
= xm
* iw
;
420 const double iwy
= ym
* iw
;
421 double sxx
= xxm
* iw
- iwx
* iwx
;
422 double syy
= yym
* iw
- iwy
* iwy
;
424 // small errors are possible, due to rounding. Clamp to zero.
425 if (sxx
< 0.) sxx
= 0.;
427 if (syy
< 0.) syy
= 0.;
430 const double sxsy
= sqrt(sxx
* syy
);
431 const double sxy
= xym
* iw
- iwx
* iwy
;
432 static const double C11
= (0.01 * 0.01) * (255 * 255);
433 static const double C22
= (0.03 * 0.03) * (255 * 255);
434 static const double C33
= (0.015 * 0.015) * (255 * 255);
435 const double l
= (2. * iwx
* iwy
+ C11
) / (iwx
* iwx
+ iwy
* iwy
+ C11
);
436 const double c
= (2. * sxsy
+ C22
) / (sxx
+ syy
+ C22
);
437 const double s
= (sxy
+ C33
) / (sxsy
+ C33
);
443 double calc_ssimg(const uint8
*org
, const uint8
*rec
,
444 const int image_width
, const int image_height
,
445 const int stride1
, const int stride2
451 for (j
= 0; j
< KERNEL
; ++j
)
453 for (i
= 0; i
< image_width
; ++i
)
455 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
459 for (j
= KERNEL
; j
< image_height
- KERNEL
; ++j
)
461 for (i
= 0; i
< KERNEL
; ++i
)
463 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
466 for (i
= KERNEL
; i
< image_width
- KERNEL
; ++i
)
468 SSIM
+= get_ssimfull_kernelg(org
, rec
, i
, j
,
469 image_width
, image_height
, stride1
, stride2
);
472 for (i
= image_width
- KERNEL
; i
< image_width
; ++i
)
474 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
478 for (j
= image_height
- KERNEL
; j
< image_height
; ++j
)
480 for (i
= 0; i
< image_width
; ++i
)
482 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
490 double vp8_calc_ssimg
492 YV12_BUFFER_CONFIG
*source
,
493 YV12_BUFFER_CONFIG
*dest
,
500 int ysize
= source
->y_width
* source
->y_height
;
501 int uvsize
= ysize
/ 4;
503 *ssim_y
= calc_ssimg(source
->y_buffer
, dest
->y_buffer
,
504 source
->y_width
, source
->y_height
,
505 source
->y_stride
, dest
->y_stride
);
508 *ssim_u
= calc_ssimg(source
->u_buffer
, dest
->u_buffer
,
509 source
->uv_width
, source
->uv_height
,
510 source
->uv_stride
, dest
->uv_stride
);
513 *ssim_v
= calc_ssimg(source
->v_buffer
, dest
->v_buffer
,
514 source
->uv_width
, source
->uv_height
,
515 source
->uv_stride
, dest
->uv_stride
);
517 ssim_all
= (*ssim_y
+ *ssim_u
+ *ssim_v
) / (ysize
+ uvsize
+ uvsize
);