2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * These functions are scheduled for pca56. They should work
22 * reasonably on ev6, though.
27 /* Some nicer register names. */
32 /* Danger: these overlap with the argument list and the return value */
43 /************************************************************************
44 * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
45 * int line_size, int h)
48 .globl put_pixels_axp_asm
49 .ent put_pixels_axp_asm
138 .end put_pixels_axp_asm
140 /************************************************************************
141 * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
145 .globl put_pixels_clamped_mvi_asm
146 .ent put_pixels_clamped_mvi_asm
147 put_pixels_clamped_mvi_asm:
153 jsr AT, (AT), _mcount
157 lda t9, 8 # loop counter
158 zap t8, 0xaa, t8 # 00ff00ff00ff00ff
193 .end put_pixels_clamped_mvi_asm
195 /************************************************************************
196 * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
200 .globl add_pixels_clamped_mvi_asm
201 .ent add_pixels_clamped_mvi_asm
202 add_pixels_clamped_mvi_asm:
208 jsr AT, (AT), _mcount
217 xor tg, t0, tg # 0x8000800080008000
218 zap t1, 0xaa, tf # 0x00ff00ff00ff00ff
221 1: ldl t1, 0(a1) # pix0 (try to hit cache line soon)
223 addq a1, a2, te # pixels += line_size
224 ldq t0, 0(a0) # shorts0
226 ldl t7, 0(te) # pix2 (try to hit cache line soon)
228 ldq t3, 8(a0) # shorts1
229 ldq t6, 16(a0) # shorts2
231 ldq t9, 24(a0) # shorts3
232 unpkbw t1, t1 # 0 0 (quarter/op no.)
239 addq t0, t1, t0 # 0 3
244 maxsw4 t0, zero, t0 # 0 5
248 minsw4 t0, tf, t0 # 0 6
249 addq t3, t4, t3 # 1 3
253 maxsw4 t3, zero, t3 # 1 5
254 addq t6, t7, t6 # 2 3
258 minsw4 t3, tf, t3 # 1 6
261 maxsw4 t6, zero, t6 # 2 5
262 addq t9, ta, t9 # 3 3
264 minsw4 t6, tf, t6 # 2 6
267 maxsw4 t9, zero, t9 # 3 5
268 lda a0, 32(a0) # block += 16;
271 minsw4 t9, tf, t9 # 3 6
277 addq te, a2, a1 # pixels += line_size
283 .end add_pixels_clamped_mvi_asm