1 ; Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 ; Use of this source code is governed by a BSD-style license that can be
3 ; found in the LICENSE file.
5 %include "media/base/simd/media_export.asm"
6 %include "third_party/x86inc/x86inc.asm"
9 ; This file uses MMX, SSE2 and instructions.
14 ; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf,
19 ; ptrdiff_t source_dx);
20 %define SYMBOL ScaleYUVToRGB32Row_SSE2_X64
25 %assign stack_offset
0
26 extern mangle
(kCoefficientsRgbY
)
28 ; Parameters are in the following order:
36 PROLOGUE
6, 7, 3, Y
, U
, V
, ARGB
, WIDTH, SOURCE_DX
, COMP
45 LOAD_SYM TABLEq
, mangle
(kCoefficientsRgbY
)
55 movzx COMPd
, BYTE [Uq
+ INDEXq
]
56 movq xmm0
, [TABLEq
+ 2048 + 8 * COMPq
]
57 movzx COMPd
, BYTE [Vq
+ INDEXq
]
58 movq xmm1
, [TABLEq
+ 4096 + 8 * COMPq
]
61 lea INDEXq
, [Xq
+ SOURCE_DXq
] ; INDEXq nows points to next pixel.
63 movzx COMPd
, BYTE [Yq
+ Xq
]
64 paddsw xmm0
, xmm1
; Hide a ADD after memory load.
65 movq xmm1
, [TABLEq
+ 8 * COMPq
]
68 lea Xq
, [INDEXq
+ SOURCE_DXq
] ; Xq now points to next pixel.
70 movzx COMPd
, BYTE [Yq
+ INDEXq
]
71 movq xmm2
, [TABLEq
+ 8 * COMPq
]
74 shufps xmm1
, xmm2
, 0x44 ; Join two pixels into one XMM register
77 movq
QWORD [ARGBq
], xmm1
84 and WIDTHq
, 1 ; odd number of pixels?
87 ; Read U V components.
90 movzx COMPd
, BYTE [Uq
+ INDEXq
]
91 movq xmm0
, [TABLEq
+ 2048 + 8 * COMPq
]
92 movzx COMPd
, BYTE [Vq
+ INDEXq
]
93 movq xmm1
, [TABLEq
+ 4096 + 8 * COMPq
]
96 ; Read one Y component.
99 movzx COMPd
, BYTE [Yq
+ INDEXq
]
100 movq xmm1
, [TABLEq
+ 8 * COMPq
]
104 movd
DWORD [ARGBq
], xmm1