1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
10 ; Non-PIC code is the fastest so use this if possible.
13 %assign stack_offset 0
14 PROLOGUE 5, 7, 3, Y, U, V, ARGB, WIDTH, TEMPU, TEMPV
15 extern mangle(kCoefficientsRgbY)
19 movzx TEMPUd, BYTE [Uq]
21 movzx TEMPVd, BYTE [Vq]
23 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq]
24 movzx TEMPUd, BYTE [Yq]
25 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq]
26 movzx TEMPVd, BYTE [Yq + 1]
27 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq]
29 movq mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPVq]
42 ; If number of pixels is odd then compute it.
46 movzx TEMPUd, BYTE [Uq]
47 movq mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq]
48 movzx TEMPVd, BYTE [Vq]
49 paddsw mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq]
50 movzx TEMPUd, BYTE [Yq]
51 movq mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq]
61 ; With PIC code we need to load the address of mangle(kCoefficientsRgbY).
62 ; This code is slower than the above version.
65 %assign stack_offset 0
66 PROLOGUE 5, 7, 3, Y, U, V, ARGB, WIDTH, TEMP, TABLE
68 extern mangle(kCoefficientsRgbY)
69 LOAD_SYM TABLEq, mangle(kCoefficientsRgbY)
74 movzx TEMPd, BYTE [Uq]
75 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
78 movzx TEMPd, BYTE [Vq]
79 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
82 movzx TEMPd, BYTE [Yq]
83 movq mm1, [TABLEq + 8 * TEMPq]
85 movzx TEMPd, BYTE [Yq + 1]
86 movq mm2, [TABLEq + 8 * TEMPq]
89 ; Add UV components to Y component.
93 ; Down shift and then pack.
104 ; If number of pixels is odd then compute it.
108 movzx TEMPd, BYTE [Uq]
109 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
110 movzx TEMPd, BYTE [Vq]
111 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
112 movzx TEMPd, BYTE [Yq]
113 movq mm1, [TABLEq + 8 * TEMPq]