2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
11 %include "vpx_ports/x86_abi_support.asm"
13 ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr
14 %macro TABULATE_SSIM
0
15 paddusw xmm15
, xmm3
; sum_s
16 paddusw xmm14
, xmm4
; sum_r
19 paddq xmm13
, xmm1
; sum_sq_s
22 paddq xmm12
, xmm2
; sum_sq_r
24 paddq xmm11
, xmm3
; sum_sxr
27 ; Sum across the register %1 starting with q words
39 ; Sum across the register %1 starting with q words
47 ;void ssim_parms_sse3(
52 ; unsigned long *sum_s,
53 ; unsigned long *sum_r,
54 ; unsigned long *sum_sq_s,
55 ; unsigned long *sum_sq_r,
56 ; unsigned long *sum_sxr);
58 ; TODO: Use parm passing through structure, probably don't need the pxors
59 ; ( calling app will initialize to 0 ) could easily fit everything in sse2
60 ; without too much hastle, and can probably do better estimates with psadw
61 ; or pavgb At this point this is just meant to be first pass for calculating
62 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
63 ; in mode selection code.
64 global sym
(vp8_ssim_parms_16x16_sse3
)
65 sym
(vp8_ssim_parms_16x16_sse3
):
68 SHADOW_ARGS_TO_STACK
9
79 pxor xmm15
,xmm15
;sum_s
80 pxor xmm14
,xmm14
;sum_r
81 pxor xmm13
,xmm13
;sum_sq_s
82 pxor xmm12
,xmm12
;sum_sq_r
83 pxor xmm11
,xmm11
;sum_sxr
85 mov rdx
, 16 ;row counter
88 ;grab source and reference pixels
93 punpckhbw xmm3
, xmm0
; high_s
94 punpckhbw xmm4
, xmm0
; high_r
100 punpcklbw xmm3
, xmm0
; low_s
101 punpcklbw xmm4
, xmm0
; low_r
105 add rsi
, rcx
; next s row
106 add rdi
, rax
; next r row
135 ;void ssim_parms_sse3(
140 ; unsigned long *sum_s,
141 ; unsigned long *sum_r,
142 ; unsigned long *sum_sq_s,
143 ; unsigned long *sum_sq_r,
144 ; unsigned long *sum_sxr);
146 ; TODO: Use parm passing through structure, probably don't need the pxors
147 ; ( calling app will initialize to 0 ) could easily fit everything in sse2
148 ; without too much hastle, and can probably do better estimates with psadw
149 ; or pavgb At this point this is just meant to be first pass for calculating
150 ; all the parms needed for 16x16 ssim so we can play with dssim as distortion
151 ; in mode selection code.
152 global sym
(vp8_ssim_parms_8x8_sse3
)
153 sym
(vp8_ssim_parms_8x8_sse3
):
156 SHADOW_ARGS_TO_STACK
9
167 pxor xmm15
,xmm15
;sum_s
168 pxor xmm14
,xmm14
;sum_r
169 pxor xmm13
,xmm13
;sum_sq_s
170 pxor xmm12
,xmm12
;sum_sq_r
171 pxor xmm11
,xmm11
;sum_sxr
173 mov rdx
, 8 ;row counter
176 ;grab source and reference pixels
182 punpcklbw xmm3
, xmm0
; low_s
183 punpcklbw xmm4
, xmm0
; low_r
187 add rsi
, rcx
; next s row
188 add rdi
, rax
; next r row