1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
6 %include "third_party/x86inc/x86inc.asm"
9 ; This file uses MMX instructions.
14 ;void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf,
19 ; ptrdiff_t source_dx);
20 %define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64
25 %assign stack_offset
0
27 ; Parameters are in the following order:
34 ; 7. Conversion lookup table
36 PROLOGUE
7, 7, 3, Y
, U
, V
, ARGB
, WIDTH, SOURCE_DX
, R1
64 imul WIDTHq
, SOURCE_DXq
; source_width = width * source_dx
66 cmp SOURCE_DXq
, 0x20000
68 mov Xq
, 0x8000 ; x = 0.5 for 1/2 or less
75 movzx COMPLd
, BYTE [Uq
+ INDEXq
]
76 movzx COMPRd
, BYTE [Uq
+ INDEXq
+ 1]
78 and FRACTIONq
, 0x1fffe
79 imul COMPRq
, FRACTIONq
80 xor FRACTIONq
, 0x1fffe
81 imul COMPLq
, FRACTIONq
84 movq mm0
, [TABLEq
+ 2048 + 8 * COMPLq
]
87 movzx COMPLd
, BYTE [Vq
+ INDEXq
]
88 movzx COMPRd
, BYTE [Vq
+ INDEXq
+ 1]
89 ; Trick here to imul COMPL first then COMPR.
90 ; Saves two instruction. :)
91 imul COMPLq
, FRACTIONq
92 xor FRACTIONq
, 0x1fffe
93 imul COMPRq
, FRACTIONq
96 paddsw mm0
, [TABLEq
+ 4096 + 8 * COMPLq
]
98 ; Interpolate first Y1.
99 lea INDEXq
, [Xq
+ SOURCE_DXq
] ; INDEXq now points to next pixel.
100 ; Xq points to current pixel.
103 movzx COMPLd
, BYTE [Yq
+ Xq
]
104 movzx COMPRd
, BYTE [Yq
+ Xq
+ 1]
105 and FRACTIONq
, 0xffff
106 imul COMPRq
, FRACTIONq
107 xor FRACTIONq
, 0xffff
108 imul COMPLq
, FRACTIONq
111 movq mm1
, [TABLEq
+ 8 * COMPLq
]
113 ; Interpolate Y2 if available.
117 lea Xq
, [INDEXq
+ SOURCE_DXq
] ; Xq points to next pixel.
118 ; INDEXq points to current pixel.
119 mov FRACTIONq
, INDEXq
121 movzx COMPLd
, BYTE [Yq
+ INDEXq
]
122 movzx COMPRd
, BYTE [Yq
+ INDEXq
+ 1]
123 and FRACTIONq
, 0xffff
124 imul COMPRq
, FRACTIONq
125 xor FRACTIONq
, 0xffff
126 imul COMPLq
, FRACTIONq
129 movq mm2
, [TABLEq
+ 8 * COMPLq
]