1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
11 %assign stack_offset 0
13 extern mangle(kCoefficientsRgbY)
15 ; Parameters are in the following order:
23 PROLOGUE 6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP
26 %define WORD_SIZE QWORD
28 %define WORD_SIZE DWORD
31 ; Define register aliases.
32 %define Xq R1q ; Current X position
33 %define COMPLq R2q ; Component A value
34 %define COMPLd R2d ; Component A value
35 %define U_ARG_REGq R0q ; U plane address argument
36 %define V_ARG_REGq R1q ; V plane address argument
37 %define SOURCE_DX_ARG_REGq R3q ; Source dx argument
38 %define WIDTH_ARG_REGq R2q ; Width argument
41 ; PIC code shared COMPR, U and V with the same register. Need to be careful in the
42 ; code they don't mix up. This allows R3q to be used for YUV table.
43 %define COMPRq R0q ; Component B value
44 %define COMPRd R0d ; Component B value
45 %define Uq R0q ; U plane address
46 %define Vq R0q ; V plane address
47 %define U_PLANE WORD_SIZE [rsp + 3 * gprsize]
48 %define TABLE R3q ; Address of the table
50 ; Non-PIC code defines.
51 %define COMPRq R3q ; Component B value
52 %define COMPRd R3d ; Component B value
53 %define Uq R0q ; U plane address
54 %define Vq R3q ; V plane address
55 %define TABLE mangle(kCoefficientsRgbY)
58 ; Defines for stack variables. These are used in both PIC and non-PIC code.
59 %define V_PLANE WORD_SIZE [rsp + 2 * gprsize]
60 %define SOURCE_DX WORD_SIZE [rsp + gprsize]
61 %define SOURCE_WIDTH WORD_SIZE [rsp]
63 ; Handle stack variables differently for PIC and non-PIC code.
66 ; Define stack usage for PIC code. PIC code push U plane onto stack.
69 PUSH SOURCE_DX_ARG_REGq
70 imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx
73 ; Load the address of kCoefficientsRgbY into TABLE
74 mov TEMPq, SOURCE_DX_ARG_REGq ; Need to save source_dx first
75 LOAD_SYM TABLE, mangle(kCoefficientsRgbY)
76 %define SOURCE_DX_ARG_REGq TEMPq ; Overwrite SOURCE_DX_ARG_REGq to TEMPq
78 ; Define stack usage. Non-PIC code just push 3 registers to stack.
80 PUSH SOURCE_DX_ARG_REGq
81 imul WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq ; source_width = width * source_dx
94 cmp SOURCE_DX_ARG_REGq, 0x20000
96 mov Xq, 0x8000 ; x = 0.5 for 1/2 or less
101 mov Uq, U_PLANE ; PIC code saves U_PLANE on stack.
104 ; Define macros for scaling YUV components since they are reused.
108 movzx COMPLd, BYTE [%1 + TEMPq]
109 movzx COMPRd, BYTE [%1 + TEMPq + 1]
118 SCALEUV Uq ; Use the above macro to scale U
119 movq mm0, [TABLE + 2048 + 8 * COMPLq]
121 mov Vq, V_PLANE ; Read V address from stack
122 SCALEUV Vq ; Use the above macro to scale V
123 paddsw mm0, [TABLE + 4096 + 8 * COMPLq]
128 movzx COMPLd, BYTE [Yq + TEMPq]
129 movzx COMPRd, BYTE [Yq + TEMPq + 1]
131 add Xq, SOURCE_DX ; Add source_dx from stack
139 SCALEY ; Use the above macro to scale Y1
140 movq mm1, [TABLE + 8 * COMPLq]
142 cmp Xq, SOURCE_WIDTH ; Compare source_width from stack
145 SCALEY ; Use the above macro to sacle Y2
146 movq mm2, [TABLE + 8 * COMPLq]
157 cmp Xq, SOURCE_WIDTH ; Compare source_width from stack