2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vpx_scale/yv12config.h"
16 #if CONFIG_RUNTIME_CPU_DETECT
17 #define IF_RTCD(x) (x)
19 #define IF_RTCD(x) NULL
21 // Google version of SSIM
24 #define KERNEL_SIZE (2 * KERNEL + 1)
26 typedef unsigned char uint8
;
27 typedef unsigned int uint32
;
29 static const int K
[KERNEL_SIZE
] =
31 1, 4, 11, 16, 11, 4, 1 // 16 * exp(-0.3 * i * i)
33 static const double ki_w
= 1. / 2304.; // 1 / sum(i:0..6, j..6) K[i]*K[j]
34 double get_ssimg(const uint8
*org
, const uint8
*rec
,
35 int xo
, int yo
, int W
, int H
,
36 const int stride1
, const int stride2
39 // TODO(skal): use summed tables
42 const int ymin
= (yo
- KERNEL
< 0) ? 0 : yo
- KERNEL
;
43 const int ymax
= (yo
+ KERNEL
> H
- 1) ? H
- 1 : yo
+ KERNEL
;
44 const int xmin
= (xo
- KERNEL
< 0) ? 0 : xo
- KERNEL
;
45 const int xmax
= (xo
+ KERNEL
> W
- 1) ? W
- 1 : xo
+ KERNEL
;
46 // worst case of accumulation is a weight of 48 = 16 + 2 * (11 + 4 + 1)
47 // with a diff of 255, squares. That would a max error of 0x8ee0900,
48 // which fits into 32 bits integers.
49 uint32 w
= 0, xm
= 0, ym
= 0, xxm
= 0, xym
= 0, yym
= 0;
50 org
+= ymin
* stride1
;
51 rec
+= ymin
* stride2
;
53 for (y
= ymin
; y
<= ymax
; ++y
, org
+= stride1
, rec
+= stride2
)
55 const int Wy
= K
[KERNEL
+ y
- yo
];
57 for (x
= xmin
; x
<= xmax
; ++x
)
59 const int Wxy
= Wy
* K
[KERNEL
+ x
- xo
];
60 // TODO(skal): inlined assembly
64 xxm
+= Wxy
* org
[x
] * org
[x
];
65 xym
+= Wxy
* org
[x
] * rec
[x
];
66 yym
+= Wxy
* rec
[x
] * rec
[x
];
71 const double iw
= 1. / w
;
72 const double iwx
= xm
* iw
;
73 const double iwy
= ym
* iw
;
74 double sxx
= xxm
* iw
- iwx
* iwx
;
75 double syy
= yym
* iw
- iwy
* iwy
;
77 // small errors are possible, due to rounding. Clamp to zero.
78 if (sxx
< 0.) sxx
= 0.;
80 if (syy
< 0.) syy
= 0.;
83 const double sxsy
= sqrt(sxx
* syy
);
84 const double sxy
= xym
* iw
- iwx
* iwy
;
85 static const double C11
= (0.01 * 0.01) * (255 * 255);
86 static const double C22
= (0.03 * 0.03) * (255 * 255);
87 static const double C33
= (0.015 * 0.015) * (255 * 255);
88 const double l
= (2. * iwx
* iwy
+ C11
) / (iwx
* iwx
+ iwy
* iwy
+ C11
);
89 const double c
= (2. * sxsy
+ C22
) / (sxx
+ syy
+ C22
);
91 const double s
= (sxy
+ C33
) / (sxsy
+ C33
);
99 double get_ssimfull_kernelg(const uint8
*org
, const uint8
*rec
,
100 int xo
, int yo
, int W
, int H
,
101 const int stride1
, const int stride2
)
103 // TODO(skal): use summed tables
104 // worst case of accumulation is a weight of 48 = 16 + 2 * (11 + 4 + 1)
105 // with a diff of 255, squares. That would a max error of 0x8ee0900,
106 // which fits into 32 bits integers.
108 uint32 xm
= 0, ym
= 0, xxm
= 0, xym
= 0, yym
= 0;
109 org
+= (yo
- KERNEL
) * stride1
;
110 org
+= (xo
- KERNEL
);
111 rec
+= (yo
- KERNEL
) * stride2
;
112 rec
+= (xo
- KERNEL
);
114 for (y_
= 0; y_
< KERNEL_SIZE
; ++y_
, org
+= stride1
, rec
+= stride2
)
116 const int Wy
= K
[y_
];
118 for (x_
= 0; x_
< KERNEL_SIZE
; ++x_
)
120 const int Wxy
= Wy
* K
[x_
];
121 // TODO(skal): inlined assembly
122 const int org_x
= org
[x_
];
123 const int rec_x
= rec
[x_
];
126 xxm
+= Wxy
* org_x
* org_x
;
127 xym
+= Wxy
* org_x
* rec_x
;
128 yym
+= Wxy
* rec_x
* rec_x
;
133 const double iw
= ki_w
;
134 const double iwx
= xm
* iw
;
135 const double iwy
= ym
* iw
;
136 double sxx
= xxm
* iw
- iwx
* iwx
;
137 double syy
= yym
* iw
- iwy
* iwy
;
139 // small errors are possible, due to rounding. Clamp to zero.
140 if (sxx
< 0.) sxx
= 0.;
142 if (syy
< 0.) syy
= 0.;
145 const double sxsy
= sqrt(sxx
* syy
);
146 const double sxy
= xym
* iw
- iwx
* iwy
;
147 static const double C11
= (0.01 * 0.01) * (255 * 255);
148 static const double C22
= (0.03 * 0.03) * (255 * 255);
149 static const double C33
= (0.015 * 0.015) * (255 * 255);
150 const double l
= (2. * iwx
* iwy
+ C11
) / (iwx
* iwx
+ iwy
* iwy
+ C11
);
151 const double c
= (2. * sxsy
+ C22
) / (sxx
+ syy
+ C22
);
152 const double s
= (sxy
+ C33
) / (sxsy
+ C33
);
158 double calc_ssimg(const uint8
*org
, const uint8
*rec
,
159 const int image_width
, const int image_height
,
160 const int stride1
, const int stride2
166 for (j
= 0; j
< KERNEL
; ++j
)
168 for (i
= 0; i
< image_width
; ++i
)
170 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
174 for (j
= KERNEL
; j
< image_height
- KERNEL
; ++j
)
176 for (i
= 0; i
< KERNEL
; ++i
)
178 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
181 for (i
= KERNEL
; i
< image_width
- KERNEL
; ++i
)
183 SSIM
+= get_ssimfull_kernelg(org
, rec
, i
, j
,
184 image_width
, image_height
, stride1
, stride2
);
187 for (i
= image_width
- KERNEL
; i
< image_width
; ++i
)
189 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
193 for (j
= image_height
- KERNEL
; j
< image_height
; ++j
)
195 for (i
= 0; i
< image_width
; ++i
)
197 SSIM
+= get_ssimg(org
, rec
, i
, j
, image_width
, image_height
, stride1
, stride2
);
205 double vp8_calc_ssimg
207 YV12_BUFFER_CONFIG
*source
,
208 YV12_BUFFER_CONFIG
*dest
,
215 int ysize
= source
->y_width
* source
->y_height
;
216 int uvsize
= ysize
/ 4;
218 *ssim_y
= calc_ssimg(source
->y_buffer
, dest
->y_buffer
,
219 source
->y_width
, source
->y_height
,
220 source
->y_stride
, dest
->y_stride
);
223 *ssim_u
= calc_ssimg(source
->u_buffer
, dest
->u_buffer
,
224 source
->uv_width
, source
->uv_height
,
225 source
->uv_stride
, dest
->uv_stride
);
228 *ssim_v
= calc_ssimg(source
->v_buffer
, dest
->v_buffer
,
229 source
->uv_width
, source
->uv_height
,
230 source
->uv_stride
, dest
->uv_stride
);
232 ssim_all
= (*ssim_y
+ *ssim_u
+ *ssim_v
) / (ysize
+ uvsize
+ uvsize
);
246 unsigned long *sum_s
,
247 unsigned long *sum_r
,
248 unsigned long *sum_sq_s
,
249 unsigned long *sum_sq_r
,
250 unsigned long *sum_sxr
254 for(i
=0;i
<16;i
++,s
+=sp
,r
+=rp
)
260 *sum_sq_s
+= s
[j
] * s
[j
];
261 *sum_sq_r
+= r
[j
] * r
[j
];
262 *sum_sxr
+= s
[j
] * r
[j
];
266 void ssim_parms_8x8_c
272 unsigned long *sum_s
,
273 unsigned long *sum_r
,
274 unsigned long *sum_sq_s
,
275 unsigned long *sum_sq_r
,
276 unsigned long *sum_sxr
280 for(i
=0;i
<8;i
++,s
+=sp
,r
+=rp
)
286 *sum_sq_s
+= s
[j
] * s
[j
];
287 *sum_sq_r
+= r
[j
] * r
[j
];
288 *sum_sxr
+= s
[j
] * r
[j
];
293 const static long long c1
= 426148; // (256^2*(.01*255)^2
294 const static long long c2
= 3835331; //(256^2*(.03*255)^2
296 static double similarity
300 unsigned long sum_sq_s
,
301 unsigned long sum_sq_r
,
302 unsigned long sum_sxr
,
306 long long ssim_n
= (2*sum_s
*sum_r
+ c1
)*(2*count
*sum_sxr
-2*sum_s
*sum_r
+c2
);
308 long long ssim_d
= (sum_s
*sum_s
+sum_r
*sum_r
+c1
)*
309 (count
*sum_sq_s
-sum_s
*sum_s
+ count
*sum_sq_r
-sum_r
*sum_r
+c2
) ;
311 return ssim_n
* 1.0 / ssim_d
;
314 static double ssim_16x16(unsigned char *s
,int sp
, unsigned char *r
,int rp
,
315 const vp8_variance_rtcd_vtable_t
*rtcd
)
317 unsigned long sum_s
=0,sum_r
=0,sum_sq_s
=0,sum_sq_r
=0,sum_sxr
=0;
318 rtcd
->ssimpf(s
, sp
, r
, rp
, &sum_s
, &sum_r
, &sum_sq_s
, &sum_sq_r
, &sum_sxr
);
319 return similarity(sum_s
, sum_r
, sum_sq_s
, sum_sq_r
, sum_sxr
, 256);
321 static double ssim_8x8(unsigned char *s
,int sp
, unsigned char *r
,int rp
,
322 const vp8_variance_rtcd_vtable_t
*rtcd
)
324 unsigned long sum_s
=0,sum_r
=0,sum_sq_s
=0,sum_sq_r
=0,sum_sxr
=0;
325 rtcd
->ssimpf_8x8(s
, sp
, r
, rp
, &sum_s
, &sum_r
, &sum_sq_s
, &sum_sq_r
, &sum_sxr
);
326 return similarity(sum_s
, sum_r
, sum_sq_s
, sum_sq_r
, sum_sxr
, 64);
329 // TODO: (jbb) tried to scale this function such that we may be able to use it
330 // for distortion metric in mode selection code ( provided we do a reconstruction)
331 long dssim(unsigned char *s
,int sp
, unsigned char *r
,int rp
,
332 const vp8_variance_rtcd_vtable_t
*rtcd
)
334 unsigned long sum_s
=0,sum_r
=0,sum_sq_s
=0,sum_sq_r
=0,sum_sxr
=0;
339 rtcd
->ssimpf(s
, sp
, r
, rp
, &sum_s
, &sum_r
, &sum_sq_s
, &sum_sq_r
, &sum_sxr
);
340 ssim_n
= (2*sum_s
*sum_r
+ c1
)*(2*256*sum_sxr
-2*sum_s
*sum_r
+c2
);
342 ssim_d
= (sum_s
*sum_s
+sum_r
*sum_r
+c1
)*
343 (256*sum_sq_s
-sum_s
*sum_s
+ 256*sum_sq_r
-sum_r
*sum_r
+c2
) ;
345 ssim3
= 256 * (ssim_d
-ssim_n
) / ssim_d
;
346 return (long)( 256*ssim3
* ssim3
);
348 // TODO: (jbb) this 8x8 window might be too big + we may want to pick pixels
349 // such that the window regions overlap block boundaries to penalize blocking
360 const vp8_variance_rtcd_vtable_t
*rtcd
367 // we can sample points as frequently as we like start with 1 per 8x8
368 for(i
=0; i
< height
; i
+=8, img1
+= stride_img1
*8, img2
+= stride_img2
*8)
370 for(j
=0; j
< width
; j
+=8 )
372 ssim_total
+= ssim_8x8(img1
, stride_img1
, img2
, stride_img2
, rtcd
);
375 ssim_total
/= (width
/8 * height
/8);
381 YV12_BUFFER_CONFIG
*source
,
382 YV12_BUFFER_CONFIG
*dest
,
385 const vp8_variance_rtcd_vtable_t
*rtcd
391 a
= vp8_ssim2(source
->y_buffer
, dest
->y_buffer
,
392 source
->y_stride
, dest
->y_stride
, source
->y_width
,
393 source
->y_height
, rtcd
);
395 b
= vp8_ssim2(source
->u_buffer
, dest
->u_buffer
,
396 source
->uv_stride
, dest
->uv_stride
, source
->uv_width
,
397 source
->uv_height
, rtcd
);
399 c
= vp8_ssim2(source
->v_buffer
, dest
->v_buffer
,
400 source
->uv_stride
, dest
->uv_stride
, source
->uv_width
,
401 source
->uv_height
, rtcd
);
403 ssimv
= a
* .8 + .1 * (b
+ c
);