2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /* Some nicer register names. */
27 /* Danger: these overlap with the argument list and the return value */
38 /*****************************************************************************
39 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
41 * This code is written with a pca56 in mind. For ev6, one should
42 * really take the increased latency of 3 cycles for MVI instructions
45 * It is important to keep the loading and first use of a register as
46 * far apart as possible, because if a register is accessed before it
47 * has been fetched from memory, the CPU will stall.
50 .globl pix_abs16x16_mvi_asm
51 .ent pix_abs16x16_mvi_asm
69 t0: left_u -> left lo -> left
71 t2: right_u -> right hi -> right
75 t5: left_u -> left lo -> left
77 t7: right_u -> right hi -> right
87 ldq_u t0, 0(a1) # left_u
89 ldq_u t2, 16(a1) # right_u
90 ldq t3, 0(a0) # ref left
91 ldq t4, 8(a0) # ref right
92 addq a0, a2, a0 # pix1
93 addq a1, a2, a1 # pix2
95 ldq_u t5, 0(a1) # left_u
97 ldq_u t7, 16(a1) # right_u
98 ldq t8, 0(a0) # ref left
99 ldq t9, 8(a0) # ref right
100 addq a0, a2, a0 # pix1
101 addq a1, a2, a1 # pix2
103 extql t0, a1, t0 # left lo
104 extqh t1, a1, ta # left hi
105 extql t1, a1, tb # right lo
107 extqh t2, a1, t2 # right hi
108 perr t3, t0, tc # error left
109 or t2, tb, t2 # right
110 perr t4, t2, td # error right
111 addq v0, tc, v0 # add error left
112 addq v0, td, v0 # add error left
114 extql t5, a1, t5 # left lo
115 extqh t6, a1, ta # left hi
116 extql t6, a1, tb # right lo
118 extqh t7, a1, t7 # right hi
119 perr t8, t5, tc # error left
120 or t7, tb, t7 # right
121 perr t9, t7, td # error right
122 addq v0, tc, v0 # add error left
123 addq v0, td, v0 # add error left
125 subq a3, 2, a3 # h -= 2
133 ldq t1, 8(a1) # right
134 addq a1, a2, a1 # pix2
135 ldq t2, 0(a0) # ref left
136 ldq t3, 8(a0) # ref right
137 addq a0, a2, a0 # pix1
140 ldq t5, 8(a1) # right
141 addq a1, a2, a1 # pix2
142 ldq t6, 0(a0) # ref left
143 ldq t7, 8(a0) # ref right
144 addq a0, a2, a0 # pix1
147 ldq t9, 8(a1) # right
148 addq a1, a2, a1 # pix2
149 ldq ta, 0(a0) # ref left
150 ldq tb, 8(a0) # ref right
151 addq a0, a2, a0 # pix1
154 ldq td, 8(a1) # right
155 addq a1, a2, a1 # pix2
156 ldq te, 0(a0) # ref left
157 ldq tf, 8(a0) # ref right
159 perr t0, t2, t0 # error left
160 addq a0, a2, a0 # pix1
161 perr t1, t3, t1 # error right
162 addq v0, t0, v0 # add error left
164 perr t4, t6, t0 # error left
165 addq v0, t1, v0 # add error right
166 perr t5, t7, t1 # error right
167 addq v0, t0, v0 # add error left
169 perr t8, ta, t0 # error left
170 addq v0, t1, v0 # add error right
171 perr t9, tb, t1 # error right
172 addq v0, t0, v0 # add error left
174 perr tc, te, t0 # error left
175 addq v0, t1, v0 # add error right
176 perr td, tf, t1 # error right
177 addq v0, t0, v0 # add error left
178 addq v0, t1, v0 # add error right
180 subq a3, 4, a3 # h -= 4
183 .end pix_abs16x16_mvi_asm