1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
10 ; Non-PIC code is the fastest so use this if possible.
13 %assign stack_offset 0
14 PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP
15 extern mangle(kCoefficientsRgbY)
19 movzx TEMPd, BYTE [Uq]
20 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq]
22 movzx TEMPd, BYTE [Vq]
23 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq]
25 movzx TEMPd, BYTE [Yq]
26 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
27 movzx TEMPd, BYTE [Yq + 1]
28 movq mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
36 ; Multiply ARGB by alpha value.
41 movzx TEMPd, BYTE [Aq]
42 movq mm2, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq]
45 movzx TEMPd, BYTE [Aq + 1]
46 movq mm2, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq]
59 ; If number of pixels is odd then compute it.
63 movzx TEMPd, BYTE [Uq]
64 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPq]
65 movzx TEMPd, BYTE [Vq]
66 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPq]
67 movzx TEMPd, BYTE [Yq]
68 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPq]
73 ; Multiply ARGB by alpha value.
76 movzx TEMPd, BYTE [Aq]
77 movq mm0, [mangle(kCoefficientsRgbY) + 6144 + 8 * TEMPq]
88 ; With PIC code we need to load the address of mangle(kCoefficientsRgbY).
89 ; This code is slower than the above version.
92 %assign stack_offset 0
93 PROLOGUE 6, 7, 3, Y, U, V, A, ARGB, WIDTH, TEMP
94 extern mangle(kCoefficientsRgbY)
96 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP
97 LOAD_SYM TABLEq, mangle(kCoefficientsRgbY)
101 movzx TEMPd, BYTE [Uq]
102 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
105 movzx TEMPd, BYTE [Vq]
106 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
109 movzx TEMPd, BYTE [Yq]
110 movq mm1, [TABLEq + 8 * TEMPq]
112 movzx TEMPd, BYTE [Yq + 1]
113 movq mm2, [TABLEq + 8 * TEMPq]
116 ; Add UV components to Y component.
120 ; Down shift and then pack.
125 ; Unpack and multiply by alpha value, then repack high bytes of words.
130 movzx TEMPd, BYTE [Aq]
131 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
134 movzx TEMPd, BYTE [Aq + 1]
135 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
148 ; If number of pixels is odd then compute it.
152 movzx TEMPd, BYTE [Uq]
153 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
154 movzx TEMPd, BYTE [Vq]
155 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
156 movzx TEMPd, BYTE [Yq]
157 movq mm1, [TABLEq + 8 * TEMPq]
162 ; Multiply ARGB by alpha value.
165 movzx TEMPd, BYTE [Aq]
166 movq mm0, [TABLEq + 6144 + 8 * TEMPq]