Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / media / base / simd / linear_scale_yuv_to_rgb_mmx_x64.asm
blob03213f2686df56ce94706447acf0b830cb188d73
1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
6 %include "third_party/x86inc/x86inc.asm"
9 ; This file uses MMX instructions.
11 SECTION_TEXT
12 CPU MMX
14 ;void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf,
15 ; const uint8* u_buf,
16 ; const uint8* v_buf,
17 ; uint8* rgb_buf,
18 ; ptrdiff_t width,
19 ; ptrdiff_t source_dx);
20 %define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64
21 EXPORT SYMBOL
22 align function_align
24 mangle(SYMBOL):
25 %assign stack_offset 0
27 ; Parameters are in the following order:
28 ; 1. Y plane
29 ; 2. U plane
30 ; 3. V plane
31 ; 4. ARGB frame
32 ; 5. Width
33 ; 6. Source dx
34 ; 7. Conversion lookup table
36 PROLOGUE 7, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, R1
38 %define TABLEq r10
39 %define Xq r11
40 %define INDEXq r12
41 %define COMPRd r13d
42 %define COMPRq r13
43 %define FRACTIONq r14
44 %define COMPL R1
45 %define COMPLq R1q
46 %define COMPLd R1d
48 PUSH TABLEq
49 PUSH Xq
50 PUSH INDEXq
51 PUSH COMPRq
52 PUSH FRACTIONq
54 %macro EPILOGUE 0
55 POP FRACTIONq
56 POP COMPRq
57 POP INDEXq
58 POP Xq
59 POP TABLEq
60 %endmacro
62 mov TABLEq, R1q
64 imul WIDTHq, SOURCE_DXq ; source_width = width * source_dx
65 xor Xq, Xq ; x = 0
66 cmp SOURCE_DXq, 0x20000
67 jl .lscaleend
68 mov Xq, 0x8000 ; x = 0.5 for 1/2 or less
69 jmp .lscaleend
71 .lscaleloop:
72 ; Interpolate U
73 mov INDEXq, Xq
74 sar INDEXq, 0x11
75 movzx COMPLd, BYTE [Uq + INDEXq]
76 movzx COMPRd, BYTE [Uq + INDEXq + 1]
77 mov FRACTIONq, Xq
78 and FRACTIONq, 0x1fffe
79 imul COMPRq, FRACTIONq
80 xor FRACTIONq, 0x1fffe
81 imul COMPLq, FRACTIONq
82 add COMPLq, COMPRq
83 shr COMPLq, 17
84 movq mm0, [TABLEq + 2048 + 8 * COMPLq]
86 ; Interpolate V
87 movzx COMPLd, BYTE [Vq + INDEXq]
88 movzx COMPRd, BYTE [Vq + INDEXq + 1]
89 ; Trick here to imul COMPL first then COMPR.
90 ; Saves two instruction. :)
91 imul COMPLq, FRACTIONq
92 xor FRACTIONq, 0x1fffe
93 imul COMPRq, FRACTIONq
94 add COMPLq, COMPRq
95 shr COMPLq, 17
96 paddsw mm0, [TABLEq + 4096 + 8 * COMPLq]
98 ; Interpolate first Y1.
99 lea INDEXq, [Xq + SOURCE_DXq] ; INDEXq now points to next pixel.
100 ; Xq points to current pixel.
101 mov FRACTIONq, Xq
102 sar Xq, 0x10
103 movzx COMPLd, BYTE [Yq + Xq]
104 movzx COMPRd, BYTE [Yq + Xq + 1]
105 and FRACTIONq, 0xffff
106 imul COMPRq, FRACTIONq
107 xor FRACTIONq, 0xffff
108 imul COMPLq, FRACTIONq
109 add COMPLq, COMPRq
110 shr COMPLq, 16
111 movq mm1, [TABLEq + 8 * COMPLq]
113 ; Interpolate Y2 if available.
114 cmp INDEXq, WIDTHq
115 jge .lscalelastpixel
117 lea Xq, [INDEXq + SOURCE_DXq] ; Xq points to next pixel.
118 ; INDEXq points to current pixel.
119 mov FRACTIONq, INDEXq
120 sar INDEXq, 0x10
121 movzx COMPLd, BYTE [Yq + INDEXq]
122 movzx COMPRd, BYTE [Yq + INDEXq + 1]
123 and FRACTIONq, 0xffff
124 imul COMPRq, FRACTIONq
125 xor FRACTIONq, 0xffff
126 imul COMPLq, FRACTIONq
127 add COMPLq, COMPRq
128 shr COMPLq, 16
129 movq mm2, [TABLEq + 8 * COMPLq]
131 paddsw mm1, mm0
132 paddsw mm2, mm0
133 psraw mm1, 0x6
134 psraw mm2, 0x6
135 packuswb mm1, mm2
136 movntq [ARGBq], mm1
137 add ARGBq, 0x8
139 .lscaleend:
140 cmp Xq, WIDTHq
141 jl .lscaleloop
142 jmp .epilogue
144 .lscalelastpixel:
145 paddsw mm1, mm0
146 psraw mm1, 6
147 packuswb mm1, mm1
148 movd [ARGBq], mm1
150 .epilogue
151 EPILOGUE