1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
6 %include "third_party/x86inc/x86inc.asm"
9 ; This file uses MMX, SSE2 and instructions.
14 ; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf,
19 ; ptrdiff_t source_dx);
20 %define SYMBOL ScaleYUVToRGB32Row_SSE2_X64
25 %assign stack_offset
0
26 extern mangle
(kCoefficientsRgbY
)
28 ; Parameters are in the following order:
37 PROLOGUE
7, 7, 3, Y
, U
, V
, ARGB
, WIDTH, SOURCE_DX
, R1
59 movzx COMPd
, BYTE [Uq
+ INDEXq
]
60 movq xmm0
, [TABLEq
+ 2048 + 8 * COMPq
]
61 movzx COMPd
, BYTE [Vq
+ INDEXq
]
62 movq xmm1
, [TABLEq
+ 4096 + 8 * COMPq
]
65 lea INDEXq
, [Xq
+ SOURCE_DXq
] ; INDEXq nows points to next pixel.
67 movzx COMPd
, BYTE [Yq
+ Xq
]
68 paddsw xmm0
, xmm1
; Hide a ADD after memory load.
69 movq xmm1
, [TABLEq
+ 8 * COMPq
]
72 lea Xq
, [INDEXq
+ SOURCE_DXq
] ; Xq now points to next pixel.
74 movzx COMPd
, BYTE [Yq
+ INDEXq
]
75 movq xmm2
, [TABLEq
+ 8 * COMPq
]
78 shufps xmm1
, xmm2
, 0x44 ; Join two pixels into one XMM register
81 movq
QWORD [ARGBq
], xmm1
88 and WIDTHq
, 1 ; odd number of pixels?
91 ; Read U V components.
94 movzx COMPd
, BYTE [Uq
+ INDEXq
]
95 movq xmm0
, [TABLEq
+ 2048 + 8 * COMPq
]
96 movzx COMPd
, BYTE [Vq
+ INDEXq
]
97 movq xmm1
, [TABLEq
+ 4096 + 8 * COMPq
]
100 ; Read one Y component.
103 movzx COMPd
, BYTE [Yq
+ INDEXq
]
104 movq xmm1
, [TABLEq
+ 8 * COMPq
]
108 movd
DWORD [ARGBq
], xmm1