3 void j_rev_dct_ARM(DCTBLOCK data)
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #define FIX_0_298631336 2446
28 #define FIX_0_541196100 4433
29 #define FIX_0_765366865 6270
30 #define FIX_1_175875602 9633
31 #define FIX_1_501321110 12299
32 #define FIX_2_053119869 16819
33 #define FIX_3_072711026 25172
34 #define FIX_M_0_390180644 -3196
35 #define FIX_M_0_899976223 -7373
36 #define FIX_M_1_847759065 -15137
37 #define FIX_M_1_961570560 -16069
38 #define FIX_M_2_562915447 -20995
39 #define FIX_0xFFFF 0xFFFF
41 #define FIX_0_298631336_ID 0
42 #define FIX_0_541196100_ID 4
43 #define FIX_0_765366865_ID 8
44 #define FIX_1_175875602_ID 12
45 #define FIX_1_501321110_ID 16
46 #define FIX_2_053119869_ID 20
47 #define FIX_3_072711026_ID 24
48 #define FIX_M_0_390180644_ID 28
49 #define FIX_M_0_899976223_ID 32
50 #define FIX_M_1_847759065_ID 36
51 #define FIX_M_1_961570560_ID 40
52 #define FIX_M_2_562915447_ID 44
53 #define FIX_0xFFFF_ID 48
59 stmdb sp!, { r4 - r12, lr } @ all callee saved regs
61 sub sp, sp, #4 @ reserve some space on the stack
62 str r0, [ sp ] @ save the DCT pointer to the stack
64 mov lr, r0 @ lr = pointer to the current row
65 mov r12, #8 @ r12 = row-counter
66 add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
68 ldrsh r0, [lr, # 0] @ r0 = 'd0'
69 ldrsh r1, [lr, # 8] @ r1 = 'd1'
71 @ Optimization for row that have all items except the first set to 0
72 @ (this works as the DCTELEMS are always 4-byte aligned)
80 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
84 ldrsh r2, [lr, # 2] @ r2 = 'd2'
85 ldrsh r4, [lr, # 4] @ r4 = 'd4'
86 ldrsh r6, [lr, # 6] @ r6 = 'd6'
88 ldr r3, [r11, #FIX_0_541196100_ID]
90 ldr r5, [r11, #FIX_M_1_847759065_ID]
91 mul r7, r3, r7 @ r7 = z1
92 ldr r3, [r11, #FIX_0_765366865_ID]
93 mla r6, r5, r6, r7 @ r6 = tmp2
94 add r5, r0, r4 @ r5 = tmp0
95 mla r2, r3, r2, r7 @ r2 = tmp3
96 sub r3, r0, r4 @ r3 = tmp1
98 add r0, r2, r5, lsl #13 @ r0 = tmp10
99 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
100 add r4, r6, r3, lsl #13 @ r4 = tmp11
101 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
103 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
105 ldrsh r3, [lr, #10] @ r3 = 'd3'
106 ldrsh r5, [lr, #12] @ r5 = 'd5'
107 ldrsh r7, [lr, #14] @ r7 = 'd7'
109 add r0, r3, r5 @ r0 = 'z2'
110 add r2, r1, r7 @ r2 = 'z1'
111 add r4, r3, r7 @ r4 = 'z3'
112 add r6, r1, r5 @ r6 = 'z4'
113 ldr r9, [r11, #FIX_1_175875602_ID]
114 add r8, r4, r6 @ r8 = z3 + z4
115 ldr r10, [r11, #FIX_M_0_899976223_ID]
116 mul r8, r9, r8 @ r8 = 'z5'
117 ldr r9, [r11, #FIX_M_2_562915447_ID]
118 mul r2, r10, r2 @ r2 = 'z1'
119 ldr r10, [r11, #FIX_M_1_961570560_ID]
120 mul r0, r9, r0 @ r0 = 'z2'
121 ldr r9, [r11, #FIX_M_0_390180644_ID]
122 mla r4, r10, r4, r8 @ r4 = 'z3'
123 ldr r10, [r11, #FIX_0_298631336_ID]
124 mla r6, r9, r6, r8 @ r6 = 'z4'
125 ldr r9, [r11, #FIX_2_053119869_ID]
126 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
127 ldr r10, [r11, #FIX_3_072711026_ID]
128 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
129 ldr r9, [r11, #FIX_1_501321110_ID]
130 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
131 add r7, r7, r4 @ r7 = tmp0
132 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
133 add r5, r5, r6 @ r5 = tmp1
134 add r3, r3, r4 @ r3 = tmp2
135 add r1, r1, r6 @ r1 = tmp3
137 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
138 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
140 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
146 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
152 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
158 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
164 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
170 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
176 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
182 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
192 beq start_column_loop
195 ldr r1, [r11, #FIX_0xFFFF_ID]
198 add r0, r0, r0, lsl #16
211 @ Start of column loop
215 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
216 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
217 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
218 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
220 ldr r3, [r11, #FIX_0_541196100_ID]
222 ldr r5, [r11, #FIX_M_1_847759065_ID]
223 mul r1, r3, r1 @ r1 = z1
224 ldr r3, [r11, #FIX_0_765366865_ID]
225 mla r6, r5, r6, r1 @ r6 = tmp2
226 add r5, r0, r4 @ r5 = tmp0
227 mla r2, r3, r2, r1 @ r2 = tmp3
228 sub r3, r0, r4 @ r3 = tmp1
230 add r0, r2, r5, lsl #13 @ r0 = tmp10
231 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
232 add r4, r6, r3, lsl #13 @ r4 = tmp11
233 rsb r6, r6, r3, lsl #13 @ r6 = tmp12
235 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
236 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
237 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
238 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
240 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
246 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
248 add r0, r3, r5 @ r0 = 'z2'
249 add r2, r1, r7 @ r2 = 'z1'
250 add r4, r3, r7 @ r4 = 'z3'
251 add r6, r1, r5 @ r6 = 'z4'
252 ldr r9, [r11, #FIX_1_175875602_ID]
254 ldr r10, [r11, #FIX_M_0_899976223_ID]
255 mul r8, r9, r8 @ r8 = 'z5'
256 ldr r9, [r11, #FIX_M_2_562915447_ID]
257 mul r2, r10, r2 @ r2 = 'z1'
258 ldr r10, [r11, #FIX_M_1_961570560_ID]
259 mul r0, r9, r0 @ r0 = 'z2'
260 ldr r9, [r11, #FIX_M_0_390180644_ID]
261 mla r4, r10, r4, r8 @ r4 = 'z3'
262 ldr r10, [r11, #FIX_0_298631336_ID]
263 mla r6, r9, r6, r8 @ r6 = 'z4'
264 ldr r9, [r11, #FIX_2_053119869_ID]
265 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
266 ldr r10, [r11, #FIX_3_072711026_ID]
267 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
268 ldr r9, [r11, #FIX_1_501321110_ID]
269 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
270 add r7, r7, r4 @ r7 = tmp0
271 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
272 add r5, r5, r6 @ r5 = tmp1
273 add r3, r3, r4 @ r3 = tmp2
274 add r1, r1, r6 @ r1 = tmp3
276 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
277 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
279 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
283 strh r8, [lr, #( 0*8)]
285 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
289 strh r8, [lr, #(14*8)]
291 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
295 strh r8, [lr, #( 2*8)]
297 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
301 strh r8, [lr, #(12*8)]
303 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
307 strh r8, [lr, #( 4*8)]
309 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
313 strh r8, [lr, #(10*8)]
315 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
319 strh r8, [lr, #( 6*8)]
321 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
325 strh r8, [lr, #( 8*8)]
334 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
335 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
338 strh r0, [lr, #( 0*8)]
339 strh r0, [lr, #(14*8)]
341 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
342 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
345 strh r4, [lr, #( 2*8)]
346 strh r4, [lr, #(12*8)]
348 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
349 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
352 strh r6, [lr, #( 4*8)]
353 strh r6, [lr, #(10*8)]
355 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
356 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
359 strh r2, [lr, #( 6*8)]
360 strh r2, [lr, #( 8*8)]
370 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
374 .word FIX_0_298631336
375 .word FIX_0_541196100
376 .word FIX_0_765366865
377 .word FIX_1_175875602
378 .word FIX_1_501321110
379 .word FIX_2_053119869
380 .word FIX_3_072711026
381 .word FIX_M_0_390180644
382 .word FIX_M_0_899976223
383 .word FIX_M_1_847759065
384 .word FIX_M_1_961570560
385 .word FIX_M_2_562915447