1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
11 %assign stack_offset 0
12 PROLOGUE 7, 7, 3, Y, U, V, A, ARGB, WIDTH, TABLE
14 DEFINE_ARGS Y, U, V, A, ARGB, TABLE, TEMP
19 movzx TEMPd, BYTE [Uq]
20 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
23 movzx TEMPd, BYTE [Vq]
24 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
27 movzx TEMPd, BYTE [Yq]
28 movq mm1, [TABLEq + 8 * TEMPq]
30 movzx TEMPd, BYTE [Yq + 1]
31 movq mm2, [TABLEq + 8 * TEMPq]
34 ; Add UV components to Y component.
38 ; Down shift and then pack.
49 ; Add one to our alpha values, this is a somewhat unfortunate hack; while
50 ; the pack/unpack above handle saturating any negative numbers to 0, they also
51 ; truncate the alpha value to 255. The math ahead wants to produce the same
52 ; ARGB alpha value as the source pixel in YUVA, but this depends on the alpha
53 ; value in |mm0| and |mm1| being 256, (let A be the source image alpha,
54 ; 256 * A >> 8 == A, whereas 255 * A >> 8 is off by one except at 0).
61 ; Multiply by alpha value, then repack high bytes of words.
62 movzx TEMPd, BYTE [Aq]
63 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
66 movzx TEMPd, BYTE [Aq + 1]
67 movq mm2, [TABLEq + 6144 + 8 * TEMPq]
80 ; If number of pixels is odd then compute it.
84 movzx TEMPd, BYTE [Uq]
85 movq mm0, [TABLEq + 2048 + 8 * TEMPq]
86 movzx TEMPd, BYTE [Vq]
87 paddsw mm0, [TABLEq + 4096 + 8 * TEMPq]
88 movzx TEMPd, BYTE [Yq]
89 movq mm1, [TABLEq + 8 * TEMPq]
94 ; Multiply ARGB by alpha value.
98 ; See above note about this hack.
104 movzx TEMPd, BYTE [Aq]
105 movq mm0, [TABLEq + 6144 + 8 * TEMPq]