2 bttvgrab 0.15.4 [1999-03-23]
3 (c) 1998, 1999 by Joerg Walter <trouble@moes.pmnet.uni-oldenburg.de>
5 Maintained by: Joerg Walter
6 Current version at http:/*moes.pmnet.uni-oldenburg.de/bttvgrab/ */
8 This program is free software
; you can rquantptr it
and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation
; either version
2 of the License
, or
11 (at your option
) any later version
.
13 This program is distributed in the hope that it will be useful
,
14 but WITHOUT ANY WARRANTY
; without even the implied warranty of
15 MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE
. See the
16 GNU General Public License
for more details
.
18 You should have received a copy of the GNU General Public License
19 along with
this program
; if not, write to the Free Software
20 Foundation
, Inc
., 675 Mass Ave
, Cambridge
, MA
02139, USA
.
22 This file is a modified version of RTjpeg
0.1.2, (C
) Justin Schoeman
1998
30 This file contains most of the initialisation and control functions
32 (C) Justin Schoeman 1998
36 #include <sys/types.h>
40 #include "rtjpeg_core.h"
42 static const unsigned char RTjpeg_ZZ
[64]={
48 40, 33, 26, 19, 12, 5,
49 6, 13, 20, 27, 34, 41, 48,
50 56, 49, 42, 35, 28, 21, 14, 7,
51 15, 22, 29, 36, 43, 50, 57,
52 58, 51, 44, 37, 30, 23,
59 static const __u64 RTjpeg_aan_tab
[64]={
60 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
61 5957222912ULL, 8263040512ULL, 7783580160ULL, 7005009920ULL, 5957222912ULL, 4680582144ULL, 3224107520ULL, 1643641088ULL,
62 5611718144ULL, 7783580160ULL, 7331904512ULL, 6598688768ULL, 5611718144ULL, 4408998912ULL, 3036936960ULL, 1548224000ULL,
63 5050464768ULL, 7005009920ULL, 6598688768ULL, 5938608128ULL, 5050464768ULL, 3968072960ULL, 2733115392ULL, 1393296000ULL,
64 4294967296ULL, 5957222912ULL, 5611718144ULL, 5050464768ULL, 4294967296ULL, 3374581504ULL, 2324432128ULL, 1184891264ULL,
65 3374581504ULL, 4680582144ULL, 4408998912ULL, 3968072960ULL, 3374581504ULL, 2651326208ULL, 1826357504ULL, 931136000ULL,
66 2324432128ULL, 3224107520ULL, 3036936960ULL, 2733115392ULL, 2324432128ULL, 1826357504ULL, 1258030336ULL, 641204288ULL,
67 1184891264ULL, 1643641088ULL, 1548224000ULL, 1393296000ULL, 1184891264ULL, 931136000ULL, 641204288ULL, 326894240ULL,
70 static const unsigned char RTjpeg_lum_quant_tbl
[64] = {
71 16, 11, 10, 16, 24, 40, 51, 61,
72 12, 12, 14, 19, 26, 58, 60, 55,
73 14, 13, 16, 24, 40, 57, 69, 56,
74 14, 17, 22, 29, 51, 87, 80, 62,
75 18, 22, 37, 56, 68, 109, 103, 77,
76 24, 35, 55, 64, 81, 104, 113, 92,
77 49, 64, 78, 87, 103, 121, 120, 101,
78 72, 92, 95, 98, 112, 100, 103, 99
81 static const unsigned char RTjpeg_chrom_quant_tbl
[64] = {
82 17, 18, 24, 47, 99, 99, 99, 99,
83 18, 21, 26, 66, 99, 99, 99, 99,
84 24, 26, 56, 99, 99, 99, 99, 99,
85 47, 66, 99, 99, 99, 99, 99, 99,
86 99, 99, 99, 99, 99, 99, 99, 99,
87 99, 99, 99, 99, 99, 99, 99, 99,
88 99, 99, 99, 99, 99, 99, 99, 99,
89 99, 99, 99, 99, 99, 99, 99, 99
92 int RTjpeg_b2s(__s16
*data
, __s8
*strm
, __u8 bt8
)
94 register int ci
, co
=1, tmp
;
95 register __s16 ZZvalue
;
97 (__u8
)strm
[0]=(__u8
)(data
[RTjpeg_ZZ
[0]]>254) ? 254:((data
[RTjpeg_ZZ
[0]]<0)?0:data
[RTjpeg_ZZ
[0]]);
99 for(ci
=1; ci
<=bt8
; ci
++)
101 ZZvalue
= data
[RTjpeg_ZZ
[ci
]];
105 strm
[co
++]=(__s8
)(ZZvalue
>127)?127:ZZvalue
;
109 strm
[co
++]=(__s8
)(ZZvalue
<-128)?-128:ZZvalue
;
115 ZZvalue
= data
[RTjpeg_ZZ
[ci
]];
119 strm
[co
++]=(__s8
)(ZZvalue
>63)?63:ZZvalue
;
123 strm
[co
++]=(__s8
)(ZZvalue
<-64)?-64:ZZvalue
;
125 else /* compress zeros */
132 while((ci
<64)&&(data
[RTjpeg_ZZ
[ci
]]==0));
134 strm
[co
++]=(__s8
)(63+(ci
-tmp
));
141 int RTjpeg_s2b(__s16
*data
, __s8
*strm
, __u8 bt8
, __u32
*qtbl
)
147 data
[i
]=((__u8
)strm
[0])*qtbl
[i
];
149 for(co
=1; co
<=bt8
; co
++)
152 data
[i
]=strm
[ci
++]*qtbl
[i
];
160 for(; co
<tmp
; co
++)data
[RTjpeg_ZZ
[co
]]=0;
165 data
[i
]=strm
[ci
]*qtbl
[i
];
173 void RTjpeg_quant_init(void)
178 qtbl
=(__s16
*)RTjpeg_lqt
;
179 for(i
=0; i
<64; i
++)qtbl
[i
]=(__s16
)RTjpeg_lqt
[i
];
181 qtbl
=(__s16
*)RTjpeg_cqt
;
182 for(i
=0; i
<64; i
++)qtbl
[i
]=(__s16
)RTjpeg_cqt
[i
];
185 static mmx_t RTjpeg_ones
=(mmx_t
)(long long)0x0001000100010001LL
;
186 static mmx_t RTjpeg_half
=(mmx_t
)(long long)0x7fff7fff7fff7fffLL
;
188 void RTjpeg_quant(__s16
*block
, __s32
*qtbl
)
196 movq_m2r(RTjpeg_ones
, mm6
);
197 movq_m2r(RTjpeg_half
, mm7
);
201 movq_m2r(*(ql
++), mm0
); /* quant vals (4) */
202 movq_m2r(*bl
, mm2
); /* block vals (4) */
206 punpcklwd_r2r(mm6
, mm0
); /* 1 qb 1 qa */
207 punpckhwd_r2r(mm6
, mm1
); /* 1 qd 1 qc */
209 punpcklwd_r2r(mm7
, mm2
); /* 32767 bb 32767 ba */
210 punpckhwd_r2r(mm7
, mm3
); /* 32767 bd 32767 bc */
212 pmaddwd_r2r(mm2
, mm0
); /* 32767+bb*qb 32767+ba*qa */
213 pmaddwd_r2r(mm3
, mm1
); /* 32767+bd*qd 32767+bc*qc */
218 packssdw_r2r(mm1
, mm0
);
220 movq_r2m(mm0
, *(bl
++));
225 void RTjpeg_quant_init(void)
229 void RTjpeg_quant(__s16
*block
, __s32
*qtbl
)
234 block
[i
]=(__s16
)((block
[i
]*qtbl
[i
]+32767)>>16);
239 * Perform the forward DCT on one block of samples.
242 static mmx_t RTjpeg_C4
=(mmx_t
)(long long)0x2D412D412D412D41LL
;
243 static mmx_t RTjpeg_C6
=(mmx_t
)(long long)0x187E187E187E187ELL
;
244 static mmx_t RTjpeg_C2mC6
=(mmx_t
)(long long)0x22A322A322A322A3LL
;
245 static mmx_t RTjpeg_C2pC6
=(mmx_t
)(long long)0x539F539F539F539FLL
;
246 static mmx_t RTjpeg_zero
=(mmx_t
)(long long)0x0000000000000000LL
;
250 #define FIX_0_382683433 ((__s32) 98) /* FIX(0.382683433) */
251 #define FIX_0_541196100 ((__s32) 139) /* FIX(0.541196100) */
252 #define FIX_0_707106781 ((__s32) 181) /* FIX(0.707106781) */
253 #define FIX_1_306562965 ((__s32) 334) /* FIX(1.306562965) */
255 #define DESCALE10(x) (__s16)( ((x)+128) >> 8)
256 #define DESCALE20(x) (__s16)(((x)+32768) >> 16)
257 #define D_MULTIPLY(var,const) ((__s32) ((var) * (const)))
260 void RTjpeg_dct_init(void)
266 RTjpeg_lqt
[i
]=(((__u64
)RTjpeg_lqt
[i
]<<32)/RTjpeg_aan_tab
[i
]);
267 RTjpeg_cqt
[i
]=(((__u64
)RTjpeg_cqt
[i
]<<32)/RTjpeg_aan_tab
[i
]);
271 void RTjpeg_dctY(__u8
*idata
, __s16
*odata
, int rskip
)
274 __s32 tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
275 __s32 tmp10
, tmp11
, tmp12
, tmp13
;
276 __s32 z1
, z2
, z3
, z4
, z5
, z11
, z13
;
284 for (ctr
= 7; ctr
>= 0; ctr
--) {
285 tmp0
= idataptr
[0] + idataptr
[7];
286 tmp7
= idataptr
[0] - idataptr
[7];
287 tmp1
= idataptr
[1] + idataptr
[6];
288 tmp6
= idataptr
[1] - idataptr
[6];
289 tmp2
= idataptr
[2] + idataptr
[5];
290 tmp5
= idataptr
[2] - idataptr
[5];
291 tmp3
= idataptr
[3] + idataptr
[4];
292 tmp4
= idataptr
[3] - idataptr
[4];
294 tmp10
= (tmp0
+ tmp3
); /* phase 2 */
296 tmp11
= (tmp1
+ tmp2
);
299 wsptr
[0] = (tmp10
+ tmp11
)<<8; /* phase 3 */
300 wsptr
[4] = (tmp10
- tmp11
)<<8;
302 z1
= D_MULTIPLY(tmp12
+ tmp13
, FIX_0_707106781
); /* c4 */
303 wsptr
[2] = (tmp13
<<8) + z1
; /* phase 5 */
304 wsptr
[6] = (tmp13
<<8) - z1
;
306 tmp10
= tmp4
+ tmp5
; /* phase 2 */
310 z5
= D_MULTIPLY(tmp10
- tmp12
, FIX_0_382683433
); /* c6 */
311 z2
= D_MULTIPLY(tmp10
, FIX_0_541196100
) + z5
; /* c2-c6 */
312 z4
= D_MULTIPLY(tmp12
, FIX_1_306562965
) + z5
; /* c2+c6 */
313 z3
= D_MULTIPLY(tmp11
, FIX_0_707106781
); /* c4 */
315 z11
= (tmp7
<<8) + z3
; /* phase 5 */
316 z13
= (tmp7
<<8) - z3
;
318 wsptr
[5] = z13
+ z2
; /* phase 6 */
323 idataptr
+= rskip
<<3; /* advance pointer to next row */
329 for (ctr
= 7; ctr
>= 0; ctr
--) {
330 tmp0
= wsptr
[0] + wsptr
[56];
331 tmp7
= wsptr
[0] - wsptr
[56];
332 tmp1
= wsptr
[8] + wsptr
[48];
333 tmp6
= wsptr
[8] - wsptr
[48];
334 tmp2
= wsptr
[16] + wsptr
[40];
335 tmp5
= wsptr
[16] - wsptr
[40];
336 tmp3
= wsptr
[24] + wsptr
[32];
337 tmp4
= wsptr
[24] - wsptr
[32];
339 tmp10
= tmp0
+ tmp3
; /* phase 2 */
344 odataptr
[0] = DESCALE10(tmp10
+ tmp11
); /* phase 3 */
345 odataptr
[32] = DESCALE10(tmp10
- tmp11
);
347 z1
= D_MULTIPLY(tmp12
+ tmp13
, FIX_0_707106781
); /* c4 */
348 odataptr
[16] = DESCALE20((tmp13
<<8) + z1
); /* phase 5 */
349 odataptr
[48] = DESCALE20((tmp13
<<8) - z1
);
351 tmp10
= tmp4
+ tmp5
; /* phase 2 */
355 z5
= D_MULTIPLY(tmp10
- tmp12
, FIX_0_382683433
); /* c6 */
356 z2
= D_MULTIPLY(tmp10
, FIX_0_541196100
) + z5
; /* c2-c6 */
357 z4
= D_MULTIPLY(tmp12
, FIX_1_306562965
) + z5
; /* c2+c6 */
358 z3
= D_MULTIPLY(tmp11
, FIX_0_707106781
); /* c4 */
360 z11
= (tmp7
<<8) + z3
; /* phase 5 */
361 z13
= (tmp7
<<8) - z3
;
363 odataptr
[40] = DESCALE20(z13
+ z2
); /* phase 6 */
364 odataptr
[24] = DESCALE20(z13
- z2
);
365 odataptr
[8] = DESCALE20(z11
+ z4
);
366 odataptr
[56] = DESCALE20(z11
- z4
);
368 odataptr
++; /* advance pointer to next column */
373 register mmx_t
*dataptr
= (mmx_t
*)odata
;
374 mmx_t
*idata2
= (mmx_t
*)idata
;
376 /* first copy the input 8 bit to the destination 16 bits */
378 movq_m2r(RTjpeg_zero
, mm2
);
381 movq_m2r(*idata2
, mm0
);
384 punpcklbw_r2r(mm2
, mm0
);
385 movq_r2m(mm0
, *(dataptr
));
387 punpckhbw_r2r(mm2
, mm1
);
388 movq_r2m(mm1
, *(dataptr
+1));
392 movq_m2r(*idata2
, mm0
);
395 punpcklbw_r2r(mm2
, mm0
);
396 movq_r2m(mm0
, *(dataptr
+2));
398 punpckhbw_r2r(mm2
, mm1
);
399 movq_r2m(mm1
, *(dataptr
+3));
403 movq_m2r(*idata2
, mm0
);
406 punpcklbw_r2r(mm2
, mm0
);
407 movq_r2m(mm0
, *(dataptr
+4));
409 punpckhbw_r2r(mm2
, mm1
);
410 movq_r2m(mm1
, *(dataptr
+5));
414 movq_m2r(*idata2
, mm0
);
417 punpcklbw_r2r(mm2
, mm0
);
418 movq_r2m(mm0
, *(dataptr
+6));
420 punpckhbw_r2r(mm2
, mm1
);
421 movq_r2m(mm1
, *(dataptr
+7));
425 movq_m2r(*idata2
, mm0
);
428 punpcklbw_r2r(mm2
, mm0
);
429 movq_r2m(mm0
, *(dataptr
+8));
431 punpckhbw_r2r(mm2
, mm1
);
432 movq_r2m(mm1
, *(dataptr
+9));
436 movq_m2r(*idata2
, mm0
);
439 punpcklbw_r2r(mm2
, mm0
);
440 movq_r2m(mm0
, *(dataptr
+10));
442 punpckhbw_r2r(mm2
, mm1
);
443 movq_r2m(mm1
, *(dataptr
+11));
447 movq_m2r(*idata2
, mm0
);
450 punpcklbw_r2r(mm2
, mm0
);
451 movq_r2m(mm0
, *(dataptr
+12));
453 punpckhbw_r2r(mm2
, mm1
);
454 movq_r2m(mm1
, *(dataptr
+13));
458 movq_m2r(*idata2
, mm0
);
461 punpcklbw_r2r(mm2
, mm0
);
462 movq_r2m(mm0
, *(dataptr
+14));
464 punpckhbw_r2r(mm2
, mm1
);
465 movq_r2m(mm1
, *(dataptr
+15));
467 /* Start Transpose to do calculations on rows */
469 movq_m2r(*(dataptr
+9), mm7
); /* m03:m02|m01:m00 - first line (line 4)and copy into m5 */
471 movq_m2r(*(dataptr
+13), mm6
); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
474 punpcklwd_m2r(*(dataptr
+11), mm7
); /* m11:m01|m10:m00 - interleave first and second lines */
477 punpcklwd_m2r(*(dataptr
+15), mm6
); /* m31:m21|m30:m20 - interleave third and fourth lines */
480 movq_m2r(*(dataptr
+11), mm3
); /* m13:m13|m11:m10 - second line */
481 punpckldq_r2r(mm6
, mm7
); /* m30:m20|m10:m00 - interleave to produce result 1 */
483 movq_m2r(*(dataptr
+15), mm0
); /* m13:m13|m11:m10 - fourth line */
484 punpckhdq_r2r(mm6
, mm1
); /* m31:m21|m11:m01 - interleave to produce result 2 */
486 movq_r2m(mm7
,*(dataptr
+9)); /* write result 1 */
487 punpckhwd_r2r(mm3
, mm5
); /* m13:m03|m12:m02 - interleave first and second lines */
489 movq_r2m(mm1
,*(dataptr
+11)); /* write result 2 */
490 punpckhwd_r2r(mm0
, mm2
); /* m33:m23|m32:m22 - interleave third and fourth lines */
493 punpckldq_r2r(mm2
, mm5
); /* m32:m22|m12:m02 - interleave to produce result 3 */
495 movq_m2r(*(dataptr
+1), mm0
); /* m03:m02|m01:m00 - first line, 4x4 */
496 punpckhdq_r2r(mm2
, mm1
); /* m33:m23|m13:m03 - interleave to produce result 4 */
498 movq_r2m(mm5
,*(dataptr
+13)); /* write result 3 */
502 movq_r2m(mm1
, *(dataptr
+15)); /* write result 4, last 4x4 */
504 movq_m2r(*(dataptr
+5), mm2
); /* m23:m22|m21:m20 - third line */
507 punpcklwd_m2r(*(dataptr
+3), mm0
); /* m11:m01|m10:m00 - interleave first and second lines */
510 punpcklwd_m2r(*(dataptr
+7), mm2
); /* m31:m21|m30:m20 - interleave third and fourth lines */
514 movq_m2r(*(dataptr
+8), mm1
); /* n03:n02|n01:n00 - first line */
515 punpckldq_r2r(mm2
, mm0
); /* m30:m20|m10:m00 - interleave to produce first result */
517 movq_m2r(*(dataptr
+12), mm3
); /* n23:n22|n21:n20 - third line */
518 punpckhdq_r2r(mm2
, mm4
); /* m31:m21|m11:m01 - interleave to produce second result */
520 punpckhwd_m2r(*(dataptr
+3), mm6
); /* m13:m03|m12:m02 - interleave first and second lines */
521 movq_r2r(mm1
, mm2
); /* copy first line */
523 punpckhwd_m2r(*(dataptr
+7), mm7
); /* m33:m23|m32:m22 - interleave third and fourth lines */
524 movq_r2r(mm6
, mm5
); /* copy first intermediate result */
526 movq_r2m(mm0
, *(dataptr
+8)); /* write result 1 */
527 punpckhdq_r2r(mm7
, mm5
); /* m33:m23|m13:m03 - produce third result */
529 punpcklwd_m2r(*(dataptr
+10), mm1
); /* n11:n01|n10:n00 - interleave first and second lines */
530 movq_r2r(mm3
, mm0
); /* copy third line */
532 punpckhwd_m2r(*(dataptr
+10), mm2
); /* n13:n03|n12:n02 - interleave first and second lines */
534 movq_r2m(mm4
, *(dataptr
+10)); /* write result 2 out */
535 punpckldq_r2r(mm7
, mm6
); /* m32:m22|m12:m02 - produce fourth result */
537 punpcklwd_m2r(*(dataptr
+14), mm3
); /* n31:n21|n30:n20 - interleave third and fourth lines */
540 movq_r2m(mm6
, *(dataptr
+12)); /* write result 3 out */
541 punpckldq_r2r(mm3
, mm1
); /* n30:n20|n10:n00 - produce first result */
543 punpckhwd_m2r(*(dataptr
+14), mm0
); /* n33:n23|n32:n22 - interleave third and fourth lines */
546 movq_r2m(mm5
, *(dataptr
+14)); /* write result 4 out */
547 punpckhdq_r2r(mm3
, mm4
); /* n31:n21|n11:n01- produce second result */
549 movq_r2m(mm1
, *(dataptr
+1)); /* write result 5 out - (first result for other 4 x 4 block) */
550 punpckldq_r2r(mm0
, mm2
); /* n32:n22|n12:n02- produce third result */
552 movq_r2m(mm4
, *(dataptr
+3)); /* write result 6 out */
553 punpckhdq_r2r(mm0
, mm6
); /* n33:n23|n13:n03 - produce fourth result */
555 movq_r2m(mm2
, *(dataptr
+5)); /* write result 7 out */
557 movq_m2r(*dataptr
, mm0
); /* m03:m02|m01:m00 - first line, first 4x4 */
559 movq_r2m(mm6
, *(dataptr
+7)); /* write result 8 out */
562 /* Do first 4x4 quadrant, which is used in the beginning of the DCT: */
564 movq_m2r(*(dataptr
+4), mm7
); /* m23:m22|m21:m20 - third line */
567 punpcklwd_m2r(*(dataptr
+2), mm0
); /* m11:m01|m10:m00 - interleave first and second lines */
570 punpcklwd_m2r(*(dataptr
+6), mm7
); /* m31:m21|m30:m20 - interleave third and fourth lines */
573 movq_m2r(*(dataptr
+2), mm6
); /* m13:m12|m11:m10 - second line */
574 punpckldq_r2r(mm7
, mm0
); /* m30:m20|m10:m00 - interleave to produce result 1 */
576 movq_m2r(*(dataptr
+6), mm5
); /* m33:m32|m31:m30 - fourth line */
577 punpckhdq_r2r(mm7
, mm1
); /* m31:m21|m11:m01 - interleave to produce result 2 */
579 movq_r2r(mm0
, mm7
); /* write result 1 */
580 punpckhwd_r2r(mm6
, mm2
); /* m13:m03|m12:m02 - interleave first and second lines */
582 psubw_m2r(*(dataptr
+14), mm7
); /* tmp07=x0-x7 /* Stage 1 */ */
583 movq_r2r(mm1
, mm6
); /* write result 2 */
585 paddw_m2r(*(dataptr
+14), mm0
); /* tmp00=x0+x7 /* Stage 1 */ */
586 punpckhwd_r2r(mm5
, mm4
); /* m33:m23|m32:m22 - interleave third and fourth lines */
588 paddw_m2r(*(dataptr
+12), mm1
); /* tmp01=x1+x6 /* Stage 1 */ */
589 movq_r2r(mm2
, mm3
); /* copy first intermediate result */
591 psubw_m2r(*(dataptr
+12), mm6
); /* tmp06=x1-x6 /* Stage 1 */ */
592 punpckldq_r2r(mm4
, mm2
); /* m32:m22|m12:m02 - interleave to produce result 3 */
595 movq_r2r(mm2
, mm5
); /* write result 3 */
598 punpckhdq_r2r(mm4
, mm3
); /* m33:m23|m13:m03 - interleave to produce result 4 */
600 paddw_m2r(*(dataptr
+10), mm2
); /* tmp02=x2+5 /* Stage 1 */ */
601 movq_r2r(mm3
, mm4
); /* write result 4 */
603 /************************************************************************************************
605 ************************************************************************************************/
608 paddw_m2r(*(dataptr
+8), mm3
); /* tmp03=x3+x4 /* stage 1*/ */
611 psubw_m2r(*(dataptr
+8), mm4
); /* tmp04=x3-x4 /* stage 1*/ */
614 paddw_r2r(mm3
, mm0
); /* tmp10 = tmp00 + tmp03 /* even 2 */ */
615 psubw_r2r(mm3
, mm7
); /* tmp13 = tmp00 - tmp03 /* even 2 */ */
617 psubw_r2r(mm2
, mm6
); /* tmp12 = tmp01 - tmp02 /* even 2 */ */
618 paddw_r2r(mm2
, mm1
); /* tmp11 = tmp01 + tmp02 /* even 2 */ */
620 psubw_m2r(*(dataptr
+10), mm5
); /* tmp05=x2-x5 /* stage 1*/ */
621 paddw_r2r(mm7
, mm6
); /* tmp12 + tmp13 */
628 psllw_i2r(2, mm6
); /* m8 * 2^2 */
631 pmulhw_m2r(RTjpeg_C4
, mm6
); /* z1 */
634 movq_r2m(mm0
, *dataptr
);
638 movq_r2m(mm3
, *(dataptr
+8));
639 paddw_r2r(mm5
, mm4
); /* tmp10 */
642 paddw_r2r(mm6
, mm0
); /* tmp32 */
644 paddw_r2r(mm2
, mm5
); /* tmp11 */
645 psubw_r2r(mm6
, mm7
); /* tmp33 */
647 movq_r2m(mm0
, *(dataptr
+4));
648 paddw_r2r(mm3
, mm2
); /* tmp12 */
652 movq_r2m(mm7
, *(dataptr
+12));
653 movq_r2r(mm4
, mm1
); /* copy of tmp10 */
655 psubw_r2r(mm2
, mm1
); /* tmp10 - tmp12 */
656 psllw_i2r(2, mm4
); /* m8 * 2^2 */
658 movq_m2r(RTjpeg_C2mC6
, mm0
);
661 pmulhw_m2r(RTjpeg_C6
, mm1
); /* z5 */
664 pmulhw_r2r(mm0
, mm4
); /* z5 */
668 pmulhw_m2r(RTjpeg_C2pC6
, mm2
);
671 pmulhw_m2r(RTjpeg_C4
, mm5
); /* z3 */
672 movq_r2r(mm3
, mm0
); /* copy tmp7 */
674 movq_m2r(*(dataptr
+1), mm7
);
675 paddw_r2r(mm1
, mm4
); /* z2 */
677 paddw_r2r(mm1
, mm2
); /* z4 */
679 paddw_r2r(mm5
, mm0
); /* z11 */
680 psubw_r2r(mm5
, mm3
); /* z13 */
684 movq_r2r(mm3
, mm5
); /* copy z13 */
685 psubw_r2r(mm4
, mm3
); /* y3=z13 - z2 */
687 paddw_r2r(mm4
, mm5
); /* y5=z13 + z2 */
688 movq_r2r(mm0
, mm6
); /* copy z11 */
690 movq_r2m(mm3
, *(dataptr
+6)); /*save y3 */
691 psubw_r2r(mm2
, mm0
); /* y7=z11 - z4 */
693 movq_r2m(mm5
, *(dataptr
+10)); /*save y5 */
694 paddw_r2r(mm2
, mm6
); /* y1=z11 + z4 */
696 movq_r2m(mm0
, *(dataptr
+14)); /*save y7 */
698 /************************************************
700 ************************************************/
702 movq_m2r(*(dataptr
+3), mm1
); /* load x1 /* stage 1 */ */
703 movq_r2r(mm7
, mm0
); /* copy x0 */
705 movq_r2m(mm6
, *(dataptr
+2)); /*save y1 */
707 movq_m2r(*(dataptr
+5), mm2
); /* load x2 /* stage 1 */ */
708 movq_r2r(mm1
, mm6
); /* copy x1 */
710 paddw_m2r(*(dataptr
+15), mm0
); /* tmp00 = x0 + x7 */
712 movq_m2r(*(dataptr
+7), mm3
); /* load x3 /* stage 1 */ */
713 movq_r2r(mm2
, mm5
); /* copy x2 */
715 psubw_m2r(*(dataptr
+15), mm7
); /* tmp07 = x0 - x7 */
716 movq_r2r(mm3
, mm4
); /* copy x3 */
718 paddw_m2r(*(dataptr
+13), mm1
); /* tmp01 = x1 + x6 */
720 movq_r2m(mm7
, tmp7
); /* save tmp07 */
721 movq_r2r(mm0
, mm7
); /* copy tmp00 */
723 psubw_m2r(*(dataptr
+13), mm6
); /* tmp06 = x1 - x6 */
725 /* stage 2, Even Part */
727 paddw_m2r(*(dataptr
+9), mm3
); /* tmp03 = x3 + x4 */
729 movq_r2m(mm6
, tmp6
); /* save tmp07 */
730 movq_r2r(mm1
, mm6
); /* copy tmp01 */
732 paddw_m2r(*(dataptr
+11), mm2
); /* tmp02 = x2 + x5 */
733 paddw_r2r(mm3
, mm0
); /* tmp10 = tmp00 + tmp03 */
735 psubw_r2r(mm3
, mm7
); /* tmp13 = tmp00 - tmp03 */
737 psubw_m2r(*(dataptr
+9), mm4
); /* tmp04 = x3 - x4 */
738 psubw_r2r(mm2
, mm6
); /* tmp12 = tmp01 - tmp02 */
740 paddw_r2r(mm2
, mm1
); /* tmp11 = tmp01 + tmp02 */
742 psubw_m2r(*(dataptr
+11), mm5
); /* tmp05 = x2 - x5 */
743 paddw_r2r(mm7
, mm6
); /* tmp12 + tmp13 */
745 /* stage 3, Even and stage 4 & 5 even */
747 movq_m2r(tmp6
, mm2
); /* load tmp6 */
748 movq_r2r(mm0
, mm3
); /* copy tmp10 */
750 psllw_i2r(2, mm6
); /* shift z1 */
751 paddw_r2r(mm1
, mm0
); /* y0=tmp10 + tmp11 */
753 pmulhw_m2r(RTjpeg_C4
, mm6
); /* z1 */
754 psubw_r2r(mm1
, mm3
); /* y4=tmp10 - tmp11 */
756 movq_r2m(mm0
, *(dataptr
+1)); /*save y0 */
757 movq_r2r(mm7
, mm0
); /* copy tmp13 */
761 movq_r2m(mm3
, *(dataptr
+9)); /*save y4 */
762 paddw_r2r(mm5
, mm4
); /* tmp10 = tmp4 + tmp5 */
764 movq_m2r(tmp7
, mm3
); /* load tmp7 */
765 paddw_r2r(mm6
, mm0
); /* tmp32 = tmp13 + z1 */
767 paddw_r2r(mm2
, mm5
); /* tmp11 = tmp5 + tmp6 */
768 psubw_r2r(mm6
, mm7
); /* tmp33 = tmp13 - z1 */
770 movq_r2m(mm0
, *(dataptr
+5)); /*save y2 */
771 paddw_r2r(mm3
, mm2
); /* tmp12 = tmp6 + tmp7 */
775 movq_r2m(mm7
, *(dataptr
+13)); /*save y6 */
776 movq_r2r(mm4
, mm1
); /* copy tmp10 */
778 psubw_r2r(mm2
, mm1
); /* tmp10 - tmp12 */
779 psllw_i2r(2, mm4
); /* shift tmp10 */
781 movq_m2r(RTjpeg_C2mC6
, mm0
); /* load C2mC6 */
782 psllw_i2r(2, mm1
); /* shift (tmp10-tmp12) */
784 pmulhw_m2r(RTjpeg_C6
, mm1
); /* z5 */
785 psllw_i2r(2, mm5
); /* prepare for multiply */
787 pmulhw_r2r(mm0
, mm4
); /* multiply by converted real */
791 pmulhw_m2r(RTjpeg_C4
, mm5
); /* z3 */
792 psllw_i2r(2, mm2
); /* prepare for multiply */
794 pmulhw_m2r(RTjpeg_C2pC6
, mm2
); /* multiply */
795 movq_r2r(mm3
, mm0
); /* copy tmp7 */
797 movq_m2r(*(dataptr
+9), mm7
); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
798 paddw_r2r(mm1
, mm4
); /* z2 */
800 paddw_r2r(mm5
, mm0
); /* z11 */
801 psubw_r2r(mm5
, mm3
); /* z13 */
805 movq_r2r(mm3
, mm5
); /* copy z13 */
806 paddw_r2r(mm1
, mm2
); /* z4 */
808 movq_r2r(mm0
, mm6
); /* copy z11 */
809 psubw_r2r(mm4
, mm5
); /* y3 */
811 paddw_r2r(mm2
, mm6
); /* y1 */
812 paddw_r2r(mm4
, mm3
); /* y5 */
814 movq_r2m(mm5
, *(dataptr
+7)); /*save y3 */
816 movq_r2m(mm6
, *(dataptr
+3)); /*save y1 */
817 psubw_r2r(mm2
, mm0
); /* y7 */
819 /************************************************************************************************
821 ************************************************************************************************/
823 movq_m2r(*(dataptr
+13), mm6
); /* m23:m22|m21:m20 - third line (line 6)and copy into m2 */
824 movq_r2r(mm7
, mm5
); /* copy first line */
826 punpcklwd_r2r(mm3
, mm7
); /* m11:m01|m10:m00 - interleave first and second lines */
827 movq_r2r(mm6
, mm2
); /* copy third line */
829 punpcklwd_r2r(mm0
, mm6
); /* m31:m21|m30:m20 - interleave third and fourth lines */
830 movq_r2r(mm7
, mm1
); /* copy first intermediate result */
832 punpckldq_r2r(mm6
, mm7
); /* m30:m20|m10:m00 - interleave to produce result 1 */
834 punpckhdq_r2r(mm6
, mm1
); /* m31:m21|m11:m01 - interleave to produce result 2 */
836 movq_r2m(mm7
, *(dataptr
+9)); /* write result 1 */
837 punpckhwd_r2r(mm3
, mm5
); /* m13:m03|m12:m02 - interleave first and second lines */
839 movq_r2m(mm1
, *(dataptr
+11)); /* write result 2 */
840 punpckhwd_r2r(mm0
, mm2
); /* m33:m23|m32:m22 - interleave third and fourth lines */
842 movq_r2r(mm5
, mm1
); /* copy first intermediate result */
843 punpckldq_r2r(mm2
, mm5
); /* m32:m22|m12:m02 - interleave to produce result 3 */
845 movq_m2r(*(dataptr
+1), mm0
); /* m03:m02|m01:m00 - first line, 4x4 */
846 punpckhdq_r2r(mm2
, mm1
); /* m33:m23|m13:m03 - interleave to produce result 4 */
848 movq_r2m(mm5
, *(dataptr
+13)); /* write result 3 */
850 /****** last 4x4 done */
852 movq_r2m(mm1
, *(dataptr
+15)); /* write result 4, last 4x4 */
854 movq_m2r(*(dataptr
+5), mm2
); /* m23:m22|m21:m20 - third line */
855 movq_r2r(mm0
, mm6
); /* copy first line */
857 punpcklwd_m2r(*(dataptr
+3), mm0
); /* m11:m01|m10:m00 - interleave first and second lines */
858 movq_r2r(mm2
, mm7
); /* copy third line */
860 punpcklwd_m2r(*(dataptr
+7), mm2
); /* m31:m21|m30:m20 - interleave third and fourth lines */
861 movq_r2r(mm0
, mm4
); /* copy first intermediate result */
865 movq_m2r(*(dataptr
+8), mm1
); /* n03:n02|n01:n00 - first line */
866 punpckldq_r2r(mm2
, mm0
); /* m30:m20|m10:m00 - interleave to produce first result */
868 movq_m2r(*(dataptr
+12), mm3
); /* n23:n22|n21:n20 - third line */
869 punpckhdq_r2r(mm2
, mm4
); /* m31:m21|m11:m01 - interleave to produce second result */
871 punpckhwd_m2r(*(dataptr
+3), mm6
); /* m13:m03|m12:m02 - interleave first and second lines */
872 movq_r2r(mm1
, mm2
); /* copy first line */
874 punpckhwd_m2r(*(dataptr
+7), mm7
); /* m33:m23|m32:m22 - interleave third and fourth lines */
875 movq_r2r(mm6
, mm5
); /* copy first intermediate result */
877 movq_r2m(mm0
, *(dataptr
+8)); /* write result 1 */
878 punpckhdq_r2r(mm7
, mm5
); /* m33:m23|m13:m03 - produce third result */
880 punpcklwd_m2r(*(dataptr
+10), mm1
); /* n11:n01|n10:n00 - interleave first and second lines */
881 movq_r2r(mm3
, mm0
); /* copy third line */
883 punpckhwd_m2r(*(dataptr
+10), mm2
); /* n13:n03|n12:n02 - interleave first and second lines */
885 movq_r2m(mm4
, *(dataptr
+10)); /* write result 2 out */
886 punpckldq_r2r(mm7
, mm6
); /* m32:m22|m12:m02 - produce fourth result */
888 punpcklwd_m2r(*(dataptr
+14), mm3
); /* n33:n23|n32:n22 - interleave third and fourth lines */
889 movq_r2r(mm1
, mm4
); /* copy second intermediate result */
891 movq_r2m(mm6
, *(dataptr
+12)); /* write result 3 out */
892 punpckldq_r2r(mm3
, mm1
); /* */
894 punpckhwd_m2r(*(dataptr
+14), mm0
); /* n33:n23|n32:n22 - interleave third and fourth lines */
895 movq_r2r(mm2
, mm6
); /* copy second intermediate result */
897 movq_r2m(mm5
, *(dataptr
+14)); /* write result 4 out */
898 punpckhdq_r2r(mm3
, mm4
); /* n31:n21|n11:n01- produce second result */
900 movq_r2m(mm1
, *(dataptr
+1)); /* write result 5 out - (first result for other 4 x 4 block) */
901 punpckldq_r2r(mm0
, mm2
); /* n32:n22|n12:n02- produce third result */
903 movq_r2m(mm4
, *(dataptr
+3)); /* write result 6 out */
904 punpckhdq_r2r(mm0
, mm6
); /* n33:n23|n13:n03 - produce fourth result */
906 movq_r2m(mm2
, *(dataptr
+5)); /* write result 7 out */
908 movq_m2r(*dataptr
, mm0
); /* m03:m02|m01:m00 - first line, first 4x4 */
910 movq_r2m(mm6
, *(dataptr
+7)); /* write result 8 out */
912 /* Do first 4x4 quadrant, which is used in the beginning of the DCT: */
914 movq_m2r(*(dataptr
+4), mm7
); /* m23:m22|m21:m20 - third line */
915 movq_r2r(mm0
, mm2
); /* copy first line */
917 punpcklwd_m2r(*(dataptr
+2), mm0
); /* m11:m01|m10:m00 - interleave first and second lines */
918 movq_r2r(mm7
, mm4
); /* copy third line */
920 punpcklwd_m2r(*(dataptr
+6), mm7
); /* m31:m21|m30:m20 - interleave third and fourth lines */
921 movq_r2r(mm0
, mm1
); /* copy first intermediate result */
923 movq_m2r(*(dataptr
+2), mm6
); /* m13:m12|m11:m10 - second line */
924 punpckldq_r2r(mm7
, mm0
); /* m30:m20|m10:m00 - interleave to produce result 1 */
926 movq_m2r(*(dataptr
+6), mm5
); /* m33:m32|m31:m30 - fourth line */
927 punpckhdq_r2r(mm7
, mm1
); /* m31:m21|m11:m01 - interleave to produce result 2 */
929 movq_r2r(mm0
, mm7
); /* write result 1 */
930 punpckhwd_r2r(mm6
, mm2
); /* m13:m03|m12:m02 - interleave first and second lines */
932 psubw_m2r(*(dataptr
+14), mm7
); /* tmp07=x0-x7 /* Stage 1 */ */
933 movq_r2r(mm1
, mm6
); /* write result 2 */
935 paddw_m2r(*(dataptr
+14), mm0
); /* tmp00=x0+x7 /* Stage 1 */ */
936 punpckhwd_r2r(mm5
, mm4
); /* m33:m23|m32:m22 - interleave third and fourth lines */
938 paddw_m2r(*(dataptr
+12), mm1
); /* tmp01=x1+x6 /* Stage 1 */ */
939 movq_r2r(mm2
, mm3
); /* copy first intermediate result */
941 psubw_m2r(*(dataptr
+12), mm6
); /* tmp06=x1-x6 /* Stage 1 */ */
942 punpckldq_r2r(mm4
, mm2
); /* m32:m22|m12:m02 - interleave to produce result 3 */
944 movq_r2m(mm7
, tmp7
); /* save tmp07 */
945 movq_r2r(mm2
, mm5
); /* write result 3 */
947 movq_r2m(mm6
, tmp6
); /* save tmp06 */
949 punpckhdq_r2r(mm4
, mm3
); /* m33:m23|m13:m03 - interleave to produce result 4 */
951 paddw_m2r(*(dataptr
+10), mm2
); /* tmp02=x2+x5 /* stage 1 */ */
952 movq_r2r(mm3
, mm4
); /* write result 4 */
954 /************************************************************************************************
956 ************************************************************************************************/
958 paddw_m2r(*(dataptr
+8), mm3
); /* tmp03=x3+x4 /* stage 1*/ */
961 psubw_m2r(*(dataptr
+8), mm4
); /* tmp04=x3-x4 /* stage 1*/ */
964 paddw_r2r(mm3
, mm0
); /* tmp10 = tmp00 + tmp03 /* even 2 */ */
965 psubw_r2r(mm3
, mm7
); /* tmp13 = tmp00 - tmp03 /* even 2 */ */
967 psubw_r2r(mm2
, mm6
); /* tmp12 = tmp01 - tmp02 /* even 2 */ */
968 paddw_r2r(mm2
, mm1
); /* tmp11 = tmp01 + tmp02 /* even 2 */ */
970 psubw_m2r(*(dataptr
+10), mm5
); /* tmp05=x2-x5 /* stage 1*/ */
971 paddw_r2r(mm7
, mm6
); /* tmp12 + tmp13 */
978 psllw_i2r(2, mm6
); /* m8 * 2^2 */
981 pmulhw_m2r(RTjpeg_C4
, mm6
); /* z1 */
984 movq_r2m(mm0
, *dataptr
);
988 movq_r2m(mm3
, *(dataptr
+8));
989 paddw_r2r(mm5
, mm4
); /* tmp10 */
992 paddw_r2r(mm6
, mm0
); /* tmp32 */
994 paddw_r2r(mm2
, mm5
); /* tmp11 */
995 psubw_r2r(mm6
, mm7
); /* tmp33 */
997 movq_r2m(mm0
, *(dataptr
+4));
998 paddw_r2r(mm3
, mm2
); /* tmp12 */
1001 movq_r2m(mm7
, *(dataptr
+12));
1002 movq_r2r(mm4
, mm1
); /* copy of tmp10 */
1004 psubw_r2r(mm2
, mm1
); /* tmp10 - tmp12 */
1005 psllw_i2r(2, mm4
); /* m8 * 2^2 */
1007 movq_m2r(RTjpeg_C2mC6
, mm0
);
1010 pmulhw_m2r(RTjpeg_C6
, mm1
); /* z5 */
1013 pmulhw_r2r(mm0
, mm4
); /* z5 */
1017 pmulhw_m2r(RTjpeg_C2pC6
, mm2
);
1020 pmulhw_m2r(RTjpeg_C4
, mm5
); /* z3 */
1021 movq_r2r(mm3
, mm0
); /* copy tmp7 */
1023 movq_m2r(*(dataptr
+1), mm7
);
1024 paddw_r2r(mm1
, mm4
); /* z2 */
1026 paddw_r2r(mm1
, mm2
); /* z4 */
1028 paddw_r2r(mm5
, mm0
); /* z11 */
1029 psubw_r2r(mm5
, mm3
); /* z13 */
1033 movq_r2r(mm3
, mm5
); /* copy z13 */
1034 psubw_r2r(mm4
, mm3
); /* y3=z13 - z2 */
1036 paddw_r2r(mm4
, mm5
); /* y5=z13 + z2 */
1037 movq_r2r(mm0
, mm6
); /* copy z11 */
1039 movq_r2m(mm3
, *(dataptr
+6)); /*save y3 */
1040 psubw_r2r(mm2
, mm0
); /* y7=z11 - z4 */
1042 movq_r2m(mm5
, *(dataptr
+10)); /*save y5 */
1043 paddw_r2r(mm2
, mm6
); /* y1=z11 + z4 */
1045 movq_r2m(mm0
, *(dataptr
+14)); /*save y7 */
1047 /************************************************
1049 ************************************************/
1051 movq_m2r(*(dataptr
+3), mm1
); /* load x1 /* stage 1 */ */
1052 movq_r2r(mm7
, mm0
); /* copy x0 */
1054 movq_r2m(mm6
, *(dataptr
+2)); /*save y1 */
1056 movq_m2r(*(dataptr
+5), mm2
); /* load x2 /* stage 1 */ */
1057 movq_r2r(mm1
, mm6
); /* copy x1 */
1059 paddw_m2r(*(dataptr
+15), mm0
); /* tmp00 = x0 + x7 */
1061 movq_m2r(*(dataptr
+7), mm3
); /* load x3 /* stage 1 */ */
1062 movq_r2r(mm2
, mm5
); /* copy x2 */
1064 psubw_m2r(*(dataptr
+15), mm7
); /* tmp07 = x0 - x7 */
1065 movq_r2r(mm3
, mm4
); /* copy x3 */
1067 paddw_m2r(*(dataptr
+13), mm1
); /* tmp01 = x1 + x6 */
1069 movq_r2m(mm7
, tmp7
); /* save tmp07 */
1070 movq_r2r(mm0
, mm7
); /* copy tmp00 */
1072 psubw_m2r(*(dataptr
+13), mm6
); /* tmp06 = x1 - x6 */
1074 /* stage 2, Even Part */
1076 paddw_m2r(*(dataptr
+9), mm3
); /* tmp03 = x3 + x4 */
1078 movq_r2m(mm6
, tmp6
); /* save tmp07 */
1079 movq_r2r(mm1
, mm6
); /* copy tmp01 */
1081 paddw_m2r(*(dataptr
+11), mm2
); /* tmp02 = x2 + x5 */
1082 paddw_r2r(mm3
, mm0
); /* tmp10 = tmp00 + tmp03 */
1084 psubw_r2r(mm3
, mm7
); /* tmp13 = tmp00 - tmp03 */
1086 psubw_m2r(*(dataptr
+9), mm4
); /* tmp04 = x3 - x4 */
1087 psubw_r2r(mm2
, mm6
); /* tmp12 = tmp01 - tmp02 */
1089 paddw_r2r(mm2
, mm1
); /* tmp11 = tmp01 + tmp02 */
1091 psubw_m2r(*(dataptr
+11), mm5
); /* tmp05 = x2 - x5 */
1092 paddw_r2r(mm7
, mm6
); /* tmp12 + tmp13 */
1094 /* stage 3, Even and stage 4 & 5 even */
1096 movq_m2r(tmp6
, mm2
); /* load tmp6 */
1097 movq_r2r(mm0
, mm3
); /* copy tmp10 */
1099 psllw_i2r(2, mm6
); /* shift z1 */
1100 paddw_r2r(mm1
, mm0
); /* y0=tmp10 + tmp11 */
1102 pmulhw_m2r(RTjpeg_C4
, mm6
); /* z1 */
1103 psubw_r2r(mm1
, mm3
); /* y4=tmp10 - tmp11 */
1105 movq_r2m(mm0
, *(dataptr
+1)); /*save y0 */
1106 movq_r2r(mm7
, mm0
); /* copy tmp13 */
1110 movq_r2m(mm3
, *(dataptr
+9)); /*save y4 */
1111 paddw_r2r(mm5
, mm4
); /* tmp10 = tmp4 + tmp5 */
1113 movq_m2r(tmp7
, mm3
); /* load tmp7 */
1114 paddw_r2r(mm6
, mm0
); /* tmp32 = tmp13 + z1 */
1116 paddw_r2r(mm2
, mm5
); /* tmp11 = tmp5 + tmp6 */
1117 psubw_r2r(mm6
, mm7
); /* tmp33 = tmp13 - z1 */
1119 movq_r2m(mm0
, *(dataptr
+5)); /*save y2 */
1120 paddw_r2r(mm3
, mm2
); /* tmp12 = tmp6 + tmp7 */
1124 movq_r2m(mm7
, *(dataptr
+13)); /*save y6 */
1125 movq_r2r(mm4
, mm1
); /* copy tmp10 */
1127 psubw_r2r(mm2
, mm1
); /* tmp10 - tmp12 */
1128 psllw_i2r(2, mm4
); /* shift tmp10 */
1130 movq_m2r(RTjpeg_C2mC6
, mm0
); /* load C2mC6 */
1131 psllw_i2r(2, mm1
); /* shift (tmp10-tmp12) */
1133 pmulhw_m2r(RTjpeg_C6
, mm1
); /* z5 */
1134 psllw_i2r(2, mm5
); /* prepare for multiply */
1136 pmulhw_r2r(mm0
, mm4
); /* multiply by converted real */
1140 pmulhw_m2r(RTjpeg_C4
, mm5
); /* z3 */
1141 psllw_i2r(2, mm2
); /* prepare for multiply */
1143 pmulhw_m2r(RTjpeg_C2pC6
, mm2
); /* multiply */
1144 movq_r2r(mm3
, mm0
); /* copy tmp7 */
1146 movq_m2r(*(dataptr
+9), mm7
); /* m03:m02|m01:m00 - first line (line 4)and copy into mm7 */
1147 paddw_r2r(mm1
, mm4
); /* z2 */
1149 paddw_r2r(mm5
, mm0
); /* z11 */
1150 psubw_r2r(mm5
, mm3
); /* z13 */
1154 movq_r2r(mm3
, mm5
); /* copy z13 */
1155 paddw_r2r(mm1
, mm2
); /* z4 */
1157 movq_r2r(mm0
, mm6
); /* copy z11 */
1158 psubw_r2r(mm4
, mm5
); /* y3 */
1160 paddw_r2r(mm2
, mm6
); /* y1 */
1161 paddw_r2r(mm4
, mm3
); /* y5 */
1163 movq_r2m(mm5
, *(dataptr
+7)); /*save y3 */
1164 psubw_r2r(mm2
, mm0
); /* yè=z11 - z4 */
1166 movq_r2m(mm3
, *(dataptr
+11)); /*save y5 */
1168 movq_r2m(mm6
, *(dataptr
+3)); /*save y1 */
1170 movq_r2m(mm0
, *(dataptr
+15)); /*save y7 */
1176 #define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */
1177 #define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */
1178 #define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */
1179 #define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */
1181 #define DESCALE(x) (__s16)( ((x)+4) >> 3)
1183 /* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */
1185 #define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
1186 #define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8)
1188 void RTjpeg_idct_init(void)
1194 RTjpeg_liqt
[i
]=((__u64
)RTjpeg_liqt
[i
]*RTjpeg_aan_tab
[i
])>>32;
1195 RTjpeg_ciqt
[i
]=((__u64
)RTjpeg_ciqt
[i
]*RTjpeg_aan_tab
[i
])>>32;
1199 void RTjpeg_idct(__u8
*odata
, __s16
*data
, int rskip
)
1203 static mmx_t fix_141
= (mmx_t
)(long long)0x5a825a825a825a82LL
;
1204 static mmx_t fix_184n261
= (mmx_t
)(long long)0xcf04cf04cf04cf04LL
;
1205 static mmx_t fix_184
= (mmx_t
)(long long)0x7641764176417641LL
;
1206 static mmx_t fix_n184
= (mmx_t
)(long long)0x896f896f896f896fLL
;
1207 static mmx_t fix_108n184
= (mmx_t
)(long long)0xcf04cf04cf04cf04LL
;
1209 mmx_t workspace
[64];
1210 mmx_t
*wsptr
= workspace
;
1211 register mmx_t
*dataptr
= (mmx_t
*)odata
;
1212 mmx_t
*idata
= (mmx_t
*)data
;
1216 * Perform inverse DCT on one block of coefficients.
1221 movq_m2r(*(idata
+10), mm1
); /* load idata[DCTSIZE*5] */
1223 movq_m2r(*(idata
+6), mm0
); /* load idata[DCTSIZE*3] */
1225 movq_m2r(*(idata
+2), mm3
); /* load idata[DCTSIZE*1] */
1227 movq_r2r(mm1
, mm2
); /* copy tmp6 /* phase 6 */ */
1229 movq_m2r(*(idata
+14), mm4
); /* load idata[DCTSIZE*7] */
1231 paddw_r2r(mm0
, mm1
); /* z13 = tmp6 + tmp5; */
1233 psubw_r2r(mm0
, mm2
); /* z10 = tmp6 - tmp5 */
1235 psllw_i2r(2, mm2
); /* shift z10 */
1236 movq_r2r(mm2
, mm0
); /* copy z10 */
1238 pmulhw_m2r(fix_184n261
, mm2
); /* MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ */
1239 movq_r2r(mm3
, mm5
); /* copy tmp4 */
1241 pmulhw_m2r(fix_n184
, mm0
); /* MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ */
1242 paddw_r2r(mm4
, mm3
); /* z11 = tmp4 + tmp7; */
1244 movq_r2r(mm3
, mm6
); /* copy z11 /* phase 5 */ */
1245 psubw_r2r(mm4
, mm5
); /* z12 = tmp4 - tmp7; */
1247 psubw_r2r(mm1
, mm6
); /* z11-z13 */
1248 psllw_i2r(2, mm5
); /* shift z12 */
1250 movq_m2r(*(idata
+12), mm4
); /* load idata[DCTSIZE*6], even part */
1251 movq_r2r(mm5
, mm7
); /* copy z12 */
1253 pmulhw_m2r(fix_108n184
, mm5
); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part */
1254 paddw_r2r(mm1
, mm3
); /* tmp7 = z11 + z13; */
1259 pmulhw_m2r(fix_184
, mm7
); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ */
1262 movq_m2r(*(idata
+4), mm1
); /* load idata[DCTSIZE*2] */
1264 paddw_r2r(mm5
, mm0
); /* tmp10 */
1266 paddw_r2r(mm7
, mm2
); /* tmp12 */
1268 pmulhw_m2r(fix_141
, mm6
); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ */
1269 psubw_r2r(mm3
, mm2
); /* tmp6 = tmp12 - tmp7 */
1271 movq_r2r(mm1
, mm5
); /* copy tmp1 */
1272 paddw_r2r(mm4
, mm1
); /* tmp13= tmp1 + tmp3; /* phases 5-3 */ */
1274 psubw_r2r(mm4
, mm5
); /* tmp1-tmp3 */
1275 psubw_r2r(mm2
, mm6
); /* tmp5 = tmp11 - tmp6; */
1277 movq_r2m(mm1
, *(wsptr
)); /* save tmp13 in workspace */
1278 psllw_i2r(2, mm5
); /* shift tmp1-tmp3 */
1280 movq_m2r(*(idata
), mm7
); /* load idata[DCTSIZE*0] */
1282 pmulhw_m2r(fix_141
, mm5
); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
1283 paddw_r2r(mm6
, mm0
); /* tmp4 = tmp10 + tmp5; */
1285 movq_m2r(*(idata
+8), mm4
); /* load idata[DCTSIZE*4] */
1287 psubw_r2r(mm1
, mm5
); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ */
1289 movq_r2m(mm0
, *(wsptr
+4)); /* save tmp4 in workspace */
1290 movq_r2r(mm7
, mm1
); /* copy tmp0 /* phase 3 */ */
1292 movq_r2m(mm5
, *(wsptr
+2)); /* save tmp12 in workspace */
1293 psubw_r2r(mm4
, mm1
); /* tmp11 = tmp0 - tmp2; */
1295 paddw_r2r(mm4
, mm7
); /* tmp10 = tmp0 + tmp2; */
1296 movq_r2r(mm1
, mm5
); /* copy tmp11 */
1298 paddw_m2r(*(wsptr
+2), mm1
); /* tmp1 = tmp11 + tmp12; */
1299 movq_r2r(mm7
, mm4
); /* copy tmp10 /* phase 2 */ */
1301 paddw_m2r(*(wsptr
), mm7
); /* tmp0 = tmp10 + tmp13; */
1303 psubw_m2r(*(wsptr
), mm4
); /* tmp3 = tmp10 - tmp13; */
1304 movq_r2r(mm7
, mm0
); /* copy tmp0 */
1306 psubw_m2r(*(wsptr
+2), mm5
); /* tmp2 = tmp11 - tmp12; */
1307 paddw_r2r(mm3
, mm7
); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
1309 psubw_r2r(mm3
, mm0
); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
1311 movq_r2m(mm7
, *(wsptr
)); /* wsptr[DCTSIZE*0] */
1312 movq_r2r(mm1
, mm3
); /* copy tmp1 */
1314 movq_r2m(mm0
, *(wsptr
+14)); /* wsptr[DCTSIZE*7] */
1315 paddw_r2r(mm2
, mm1
); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
1317 psubw_r2r(mm2
, mm3
); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
1319 movq_r2m(mm1
, *(wsptr
+2)); /* wsptr[DCTSIZE*1] */
1320 movq_r2r(mm4
, mm1
); /* copy tmp3 */
1322 movq_r2m(mm3
, *(wsptr
+12)); /* wsptr[DCTSIZE*6] */
1324 paddw_m2r(*(wsptr
+4), mm4
); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
1326 psubw_m2r(*(wsptr
+4), mm1
); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
1328 movq_r2m(mm4
, *(wsptr
+8));
1329 movq_r2r(mm5
, mm7
); /* copy tmp2 */
1331 paddw_r2r(mm6
, mm5
); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
1333 movq_r2m(mm1
, *(wsptr
+6));
1334 psubw_r2r(mm6
, mm7
); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
1336 movq_r2m(mm5
, *(wsptr
+4));
1338 movq_r2m(mm7
, *(wsptr
+10));
1343 /*****************************************************************/
1348 /*****************************************************************/
1350 movq_m2r(*(idata
+10), mm1
); /* load idata[DCTSIZE*5] */
1352 movq_m2r(*(idata
+6), mm0
); /* load idata[DCTSIZE*3] */
1354 movq_m2r(*(idata
+2), mm3
); /* load idata[DCTSIZE*1] */
1355 movq_r2r(mm1
, mm2
); /* copy tmp6 /* phase 6 */ */
1357 movq_m2r(*(idata
+14), mm4
); /* load idata[DCTSIZE*7] */
1358 paddw_r2r(mm0
, mm1
); /* z13 = tmp6 + tmp5; */
1360 psubw_r2r(mm0
, mm2
); /* z10 = tmp6 - tmp5 */
1362 psllw_i2r(2, mm2
); /* shift z10 */
1363 movq_r2r(mm2
, mm0
); /* copy z10 */
1365 pmulhw_m2r(fix_184n261
, mm2
); /* MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */ */
1366 movq_r2r(mm3
, mm5
); /* copy tmp4 */
1368 pmulhw_m2r(fix_n184
, mm0
); /* MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */ */
1369 paddw_r2r(mm4
, mm3
); /* z11 = tmp4 + tmp7; */
1371 movq_r2r(mm3
, mm6
); /* copy z11 /* phase 5 */ */
1372 psubw_r2r(mm4
, mm5
); /* z12 = tmp4 - tmp7; */
1374 psubw_r2r(mm1
, mm6
); /* z11-z13 */
1375 psllw_i2r(2, mm5
); /* shift z12 */
1377 movq_m2r(*(idata
+12), mm4
); /* load idata[DCTSIZE*6], even part */
1378 movq_r2r(mm5
, mm7
); /* copy z12 */
1380 pmulhw_m2r(fix_108n184
, mm5
); /* MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part */
1381 paddw_r2r(mm1
, mm3
); /* tmp7 = z11 + z13; */
1386 pmulhw_m2r(fix_184
, mm7
); /* MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */ */
1389 movq_m2r(*(idata
+4), mm1
); /* load idata[DCTSIZE*2] */
1391 paddw_r2r(mm5
, mm0
); /* tmp10 */
1393 paddw_r2r(mm7
, mm2
); /* tmp12 */
1395 pmulhw_m2r(fix_141
, mm6
); /* tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */ */
1396 psubw_r2r(mm3
, mm2
); /* tmp6 = tmp12 - tmp7 */
1398 movq_r2r(mm1
, mm5
); /* copy tmp1 */
1399 paddw_r2r(mm4
, mm1
); /* tmp13= tmp1 + tmp3; /* phases 5-3 */ */
1401 psubw_r2r(mm4
, mm5
); /* tmp1-tmp3 */
1402 psubw_r2r(mm2
, mm6
); /* tmp5 = tmp11 - tmp6; */
1404 movq_r2m(mm1
, *(wsptr
)); /* save tmp13 in workspace */
1405 psllw_i2r(2, mm5
); /* shift tmp1-tmp3 */
1407 movq_m2r(*(idata
), mm7
); /* load idata[DCTSIZE*0] */
1408 paddw_r2r(mm6
, mm0
); /* tmp4 = tmp10 + tmp5; */
1410 pmulhw_m2r(fix_141
, mm5
); /* MULTIPLY(tmp1 - tmp3, FIX_1_414213562) */
1412 movq_m2r(*(idata
+8), mm4
); /* load idata[DCTSIZE*4] */
1414 psubw_r2r(mm1
, mm5
); /* tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */ */
1416 movq_r2m(mm0
, *(wsptr
+4)); /* save tmp4 in workspace */
1417 movq_r2r(mm7
, mm1
); /* copy tmp0 /* phase 3 */ */
1419 movq_r2m(mm5
, *(wsptr
+2)); /* save tmp12 in workspace */
1420 psubw_r2r(mm4
, mm1
); /* tmp11 = tmp0 - tmp2; */
1422 paddw_r2r(mm4
, mm7
); /* tmp10 = tmp0 + tmp2; */
1423 movq_r2r(mm1
, mm5
); /* copy tmp11 */
1425 paddw_m2r(*(wsptr
+2), mm1
); /* tmp1 = tmp11 + tmp12; */
1426 movq_r2r(mm7
, mm4
); /* copy tmp10 /* phase 2 */ */
1428 paddw_m2r(*(wsptr
), mm7
); /* tmp0 = tmp10 + tmp13; */
1430 psubw_m2r(*(wsptr
), mm4
); /* tmp3 = tmp10 - tmp13; */
1431 movq_r2r(mm7
, mm0
); /* copy tmp0 */
1433 psubw_m2r(*(wsptr
+2), mm5
); /* tmp2 = tmp11 - tmp12; */
1434 paddw_r2r(mm3
, mm7
); /* wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7); */
1436 psubw_r2r(mm3
, mm0
); /* wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7); */
1438 movq_r2m(mm7
, *(wsptr
)); /* wsptr[DCTSIZE*0] */
1439 movq_r2r(mm1
, mm3
); /* copy tmp1 */
1441 movq_r2m(mm0
, *(wsptr
+14)); /* wsptr[DCTSIZE*7] */
1442 paddw_r2r(mm2
, mm1
); /* wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6); */
1444 psubw_r2r(mm2
, mm3
); /* wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6); */
1446 movq_r2m(mm1
, *(wsptr
+2)); /* wsptr[DCTSIZE*1] */
1447 movq_r2r(mm4
, mm1
); /* copy tmp3 */
1449 movq_r2m(mm3
, *(wsptr
+12)); /* wsptr[DCTSIZE*6] */
1451 paddw_m2r(*(wsptr
+4), mm4
); /* wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4); */
1453 psubw_m2r(*(wsptr
+4), mm1
); /* wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4); */
1455 movq_r2m(mm4
, *(wsptr
+8));
1456 movq_r2r(mm5
, mm7
); /* copy tmp2 */
1458 paddw_r2r(mm6
, mm5
); /* wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5) */
1460 movq_r2m(mm1
, *(wsptr
+6));
1461 psubw_r2r(mm6
, mm7
); /* wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5); */
1463 movq_r2m(mm5
, *(wsptr
+4));
1465 movq_r2m(mm7
, *(wsptr
+10));
1467 /*****************************************************************/
1469 /* Pass 2: process rows from work array, store into output array. */
1470 /* Note that we must descale the results by a factor of 8 == 2**3, */
1471 /* and also undo the PASS1_BITS scaling. */
1473 /*****************************************************************/
1478 /* tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); */
1479 /* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */
1480 /* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */
1481 /* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */
1482 movq_m2r(*(wsptr
), mm0
); /* wsptr[0,0],[0,1],[0,2],[0,3] */
1484 movq_m2r(*(wsptr
+1), mm1
); /* wsptr[0,4],[0,5],[0,6],[0,7] */
1487 movq_m2r(*(wsptr
+2), mm3
); /* wsptr[1,0],[1,1],[1,2],[1,3] */
1488 paddw_r2r(mm1
, mm0
); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
1490 movq_m2r(*(wsptr
+3), mm4
); /* wsptr[1,4],[1,5],[1,6],[1,7] */
1491 psubw_r2r(mm1
, mm2
); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
1496 paddw_r2r(mm4
, mm3
); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
1499 psubw_r2r(mm4
, mm5
); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
1500 punpcklwd_r2r(mm3
, mm0
); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
1502 movq_m2r(*(wsptr
+7), mm7
); /* wsptr[3,4],[3,5],[3,6],[3,7] */
1503 punpckhwd_r2r(mm3
, mm6
); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
1505 movq_m2r(*(wsptr
+4), mm3
); /* wsptr[2,0],[2,1],[2,2],[2,3] */
1506 punpckldq_r2r(mm6
, mm0
); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
1508 punpcklwd_r2r(mm5
, mm1
); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
1511 movq_m2r(*(wsptr
+6), mm6
); /* wsptr[3,0],[3,1],[3,2],[3,3] */
1512 punpckhwd_r2r(mm5
, mm2
); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
1514 movq_m2r(*(wsptr
+5), mm5
); /* wsptr[2,4],[2,5],[2,6],[2,7] */
1515 punpckldq_r2r(mm2
, mm1
); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
1518 paddw_r2r(mm5
, mm3
); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
1521 psubw_r2r(mm5
, mm4
); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
1522 paddw_r2r(mm7
, mm6
); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
1525 punpcklwd_r2r(mm6
, mm3
); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
1527 psubw_r2r(mm7
, mm2
); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
1528 punpckhwd_r2r(mm6
, mm5
); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
1531 punpckldq_r2r(mm5
, mm3
); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
1533 punpcklwd_r2r(mm2
, mm4
); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
1535 punpckhwd_r2r(mm2
, mm7
); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
1537 punpckldq_r2r(mm7
, mm4
); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
1542 /* mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
1543 /* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
1547 punpckhdq_r2r(mm4
, mm6
); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
1549 punpckldq_r2r(mm4
, mm1
); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
1552 pmulhw_m2r(fix_141
, mm6
);
1553 punpckldq_r2r(mm3
, mm0
); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
1555 punpckhdq_r2r(mm3
, mm2
); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
1558 /* tmp0 = tmp10 + tmp13; */
1559 /* tmp3 = tmp10 - tmp13; */
1560 paddw_r2r(mm2
, mm0
); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
1561 psubw_r2r(mm2
, mm7
); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
1563 /* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */
1564 psubw_r2r(mm2
, mm6
); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
1565 /* tmp1 = tmp11 + tmp12; */
1566 /* tmp2 = tmp11 - tmp12; */
1573 /* z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; */
1574 /* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */
1575 /* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */
1576 /* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */
1577 movq_m2r(*(wsptr
), mm3
); /* wsptr[0,0],[0,1],[0,2],[0,3] */
1578 paddw_r2r(mm6
, mm1
); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
1580 movq_m2r(*(wsptr
+1), mm4
); /* wsptr[0,4],[0,5],[0,6],[0,7] */
1581 psubw_r2r(mm6
, mm5
); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
1584 punpckldq_r2r(mm4
, mm3
); /* wsptr[0,0],[0,1],[0,4],[0,5] */
1586 punpckhdq_r2r(mm6
, mm4
); /* wsptr[0,6],[0,7],[0,2],[0,3] */
1589 /*Save tmp0 and tmp1 in wsptr */
1590 movq_r2m(mm0
, *(wsptr
)); /* save tmp0 */
1591 paddw_r2r(mm4
, mm2
); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
1594 /*Continue with z10 --- z13 */
1595 movq_m2r(*(wsptr
+2), mm6
); /* wsptr[1,0],[1,1],[1,2],[1,3] */
1596 psubw_r2r(mm4
, mm3
); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
1598 movq_m2r(*(wsptr
+3), mm0
); /* wsptr[1,4],[1,5],[1,6],[1,7] */
1601 movq_r2m(mm1
, *(wsptr
+1)); /* save tmp1 */
1602 punpckldq_r2r(mm0
, mm6
); /* wsptr[1,0],[1,1],[1,4],[1,5] */
1604 punpckhdq_r2r(mm4
, mm0
); /* wsptr[1,6],[1,7],[1,2],[1,3] */
1607 /*Save tmp2 and tmp3 in wsptr */
1608 paddw_r2r(mm0
, mm6
); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
1611 /*Continue with z10 --- z13 */
1612 movq_r2m(mm5
, *(wsptr
+2)); /* save tmp2 */
1613 punpcklwd_r2r(mm6
, mm2
); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
1615 psubw_r2r(mm0
, mm1
); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
1616 punpckhwd_r2r(mm6
, mm4
); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
1619 punpcklwd_r2r(mm1
, mm3
); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
1621 movq_r2m(mm7
, *(wsptr
+3)); /* save tmp3 */
1622 punpckhwd_r2r(mm1
, mm0
); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
1624 movq_m2r(*(wsptr
+4), mm6
); /* wsptr[2,0],[2,1],[2,2],[2,3] */
1625 punpckhdq_r2r(mm2
, mm0
); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
1627 movq_m2r(*(wsptr
+5), mm7
); /* wsptr[2,4],[2,5],[2,6],[2,7] */
1628 punpckhdq_r2r(mm4
, mm3
); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
1630 movq_m2r(*(wsptr
+6), mm1
); /* wsptr[3,0],[3,1],[3,2],[3,3] */
1633 punpckldq_r2r(mm7
, mm6
); /* wsptr[2,0],[2,1],[2,4],[2,5] */
1636 punpckhdq_r2r(mm4
, mm7
); /* wsptr[2,6],[2,7],[2,2],[2,3] */
1639 movq_m2r(*(wsptr
+7), mm4
); /* wsptr[3,4],[3,5],[3,6],[3,7] */
1640 paddw_r2r(mm7
, mm6
); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
1642 psubw_r2r(mm7
, mm2
); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
1643 punpckldq_r2r(mm4
, mm1
); /* wsptr[3,0],[3,1],[3,4],[3,5] */
1645 punpckhdq_r2r(mm5
, mm4
); /* wsptr[3,6],[3,7],[3,2],[3,3] */
1648 paddw_r2r(mm4
, mm1
); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
1649 psubw_r2r(mm4
, mm7
); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
1652 punpcklwd_r2r(mm1
, mm6
); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
1654 punpckhwd_r2r(mm1
, mm5
); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
1657 punpcklwd_r2r(mm7
, mm2
); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
1659 punpckhwd_r2r(mm7
, mm4
); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
1661 punpckhdq_r2r(mm6
, mm4
); /*/ wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
1663 punpckhdq_r2r(mm5
, mm2
); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
1666 punpckldq_r2r(mm4
, mm0
); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
1668 punpckhdq_r2r(mm4
, mm5
); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
1671 punpckhdq_r2r(mm2
, mm4
); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
1674 punpckldq_r2r(mm2
, mm3
); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
1675 /* tmp7 = z11 + z13; /* phase 5 */ */
1676 /* tmp8 = z11 - z13; /* phase 5 */ */
1677 psubw_r2r(mm4
, mm1
); /* tmp8 */
1679 paddw_r2r(mm4
, mm5
); /* tmp7 */
1680 /* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ */
1685 pmulhw_m2r(fix_141
, mm1
); /* tmp21 */
1686 /* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ */
1687 /* + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ */
1691 pmulhw_m2r(fix_n184
, mm7
);
1694 movq_m2r(*(wsptr
), mm2
); /* tmp0,final1 */
1696 pmulhw_m2r(fix_108n184
, mm6
);
1697 /* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ */
1698 /* + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ */
1699 movq_r2r(mm2
, mm4
); /* final1 */
1701 pmulhw_m2r(fix_184n261
, mm0
);
1702 paddw_r2r(mm5
, mm2
); /* tmp0+tmp7,final1 */
1704 pmulhw_m2r(fix_184
, mm3
);
1705 psubw_r2r(mm5
, mm4
); /* tmp0-tmp7,final1 */
1707 /* tmp6 = tmp22 - tmp7; /* phase 2 */ */
1708 psraw_i2r(3, mm2
); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
1710 paddw_r2r(mm6
, mm7
); /* tmp20 */
1711 psraw_i2r(3, mm4
); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
1713 paddw_r2r(mm0
, mm3
); /* tmp22 */
1715 /* tmp5 = tmp21 - tmp6; */
1716 psubw_r2r(mm5
, mm3
); /* tmp6 */
1718 /* tmp4 = tmp20 + tmp5; */
1719 movq_m2r(*(wsptr
+1), mm0
); /* tmp1,final2 */
1720 psubw_r2r(mm3
, mm1
); /* tmp5 */
1722 movq_r2r(mm0
, mm6
); /* final2 */
1723 paddw_r2r(mm3
, mm0
); /* tmp1+tmp6,final2 */
1725 /* Final output stage: scale down by a factor of 8 and range-limit */
1728 /* outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) */
1729 /* & RANGE_MASK]; */
1730 /* outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) */
1731 /* & RANGE_MASK]; final1 */
1734 /* outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) */
1735 /* & RANGE_MASK]; */
1736 /* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */
1737 /* & RANGE_MASK]; final2 */
1738 psubw_r2r(mm3
, mm6
); /* tmp1-tmp6,final2 */
1739 psraw_i2r(3, mm0
); /* outptr[0,1],[1,1],[2,1],[3,1] */
1741 psraw_i2r(3, mm6
); /* outptr[0,6],[1,6],[2,6],[3,6] */
1743 packuswb_r2r(mm4
, mm0
); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
1745 movq_m2r(*(wsptr
+2), mm5
); /* tmp2,final3 */
1746 packuswb_r2r(mm6
, mm2
); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
1748 /* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */
1749 /* & RANGE_MASK]; */
1750 /* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */
1751 /* & RANGE_MASK]; final3 */
1752 paddw_r2r(mm1
, mm7
); /* tmp4 */
1755 paddw_r2r(mm1
, mm5
); /* tmp2+tmp5 */
1756 psubw_r2r(mm1
, mm3
); /* tmp2-tmp5 */
1758 psraw_i2r(3, mm5
); /* outptr[0,2],[1,2],[2,2],[3,2] */
1760 movq_m2r(*(wsptr
+3), mm4
); /* tmp3,final4 */
1761 psraw_i2r(3, mm3
); /* outptr[0,5],[1,5],[2,5],[3,5] */
1765 /* outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) */
1766 /* & RANGE_MASK]; */
1767 /* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */
1768 /* & RANGE_MASK]; final4 */
1770 paddw_r2r(mm7
, mm4
); /* tmp3+tmp4 */
1772 psubw_r2r(mm7
, mm6
); /* tmp3-tmp4 */
1773 psraw_i2r(3, mm4
); /* outptr[0,4],[1,4],[2,4],[3,4] */
1775 /* mov ecx, [dataptr] */
1777 psraw_i2r(3, mm6
); /* outptr[0,3],[1,3],[2,3],[3,3] */
1779 packuswb_r2r(mm4
, mm5
); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
1781 packuswb_r2r(mm3
, mm6
); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
1785 punpcklbw_r2r(mm0
, mm2
); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
1787 punpckhbw_r2r(mm0
, mm4
); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
1790 punpcklbw_r2r(mm6
, mm5
); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
1792 /* add dataptr, 4 */
1794 punpckhbw_r2r(mm6
, mm7
); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
1796 punpcklwd_r2r(mm5
, mm2
); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
1798 /* add ecx, output_col */
1801 punpckhwd_r2r(mm5
, mm1
); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
1804 punpcklwd_r2r(mm4
, mm6
); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
1806 /* mov idata, [dataptr] */
1808 punpckldq_r2r(mm6
, mm2
); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
1810 /* add dataptr, 4 */
1814 /* add idata, output_col */
1816 punpckhwd_r2r(mm4
, mm7
); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
1818 movq_r2m(mm2
, *(dataptr
));
1820 punpckhdq_r2r(mm6
, mm0
); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
1823 movq_r2m(mm0
, *(dataptr
));
1825 punpckldq_r2r(mm7
, mm1
); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
1826 punpckhdq_r2r(mm7
, mm3
); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
1829 movq_r2m(mm1
, *(dataptr
));
1832 movq_r2m(mm3
, *(dataptr
));
1834 /*******************************************************************/
1838 /*******************************************************************/
1840 /* tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); */
1841 /* tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); */
1842 /* tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); */
1843 /* tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]); */
1844 movq_m2r(*(wsptr
), mm0
); /* wsptr[0,0],[0,1],[0,2],[0,3] */
1846 movq_m2r(*(wsptr
+1), mm1
); /* wsptr[0,4],[0,5],[0,6],[0,7] */
1849 movq_m2r(*(wsptr
+2), mm3
); /* wsptr[1,0],[1,1],[1,2],[1,3] */
1850 paddw_r2r(mm1
, mm0
); /* wsptr[0,tmp10],[xxx],[0,tmp13],[xxx] */
1852 movq_m2r(*(wsptr
+3), mm4
); /* wsptr[1,4],[1,5],[1,6],[1,7] */
1853 psubw_r2r(mm1
, mm2
); /* wsptr[0,tmp11],[xxx],[0,tmp14],[xxx] */
1858 paddw_r2r(mm4
, mm3
); /* wsptr[1,tmp10],[xxx],[1,tmp13],[xxx] */
1861 psubw_r2r(mm4
, mm5
); /* wsptr[1,tmp11],[xxx],[1,tmp14],[xxx] */
1862 punpcklwd_r2r(mm3
, mm0
); /* wsptr[0,tmp10],[1,tmp10],[xxx],[xxx] */
1864 movq_m2r(*(wsptr
+7), mm7
); /* wsptr[3,4],[3,5],[3,6],[3,7] */
1865 punpckhwd_r2r(mm3
, mm6
); /* wsptr[0,tmp13],[1,tmp13],[xxx],[xxx] */
1867 movq_m2r(*(wsptr
+4), mm3
); /* wsptr[2,0],[2,1],[2,2],[2,3] */
1868 punpckldq_r2r(mm6
, mm0
); /* wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
1870 punpcklwd_r2r(mm5
, mm1
); /* wsptr[0,tmp11],[1,tmp11],[xxx],[xxx] */
1873 movq_m2r(*(wsptr
+6), mm6
); /* wsptr[3,0],[3,1],[3,2],[3,3] */
1874 punpckhwd_r2r(mm5
, mm2
); /* wsptr[0,tmp14],[1,tmp14],[xxx],[xxx] */
1876 movq_m2r(*(wsptr
+5), mm5
); /* wsptr[2,4],[2,5],[2,6],[2,7] */
1877 punpckldq_r2r(mm2
, mm1
); /* wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
1879 paddw_r2r(mm5
, mm3
); /* wsptr[2,tmp10],[xxx],[2,tmp13],[xxx] */
1882 psubw_r2r(mm5
, mm4
); /* wsptr[2,tmp11],[xxx],[2,tmp14],[xxx] */
1883 paddw_r2r(mm7
, mm6
); /* wsptr[3,tmp10],[xxx],[3,tmp13],[xxx] */
1886 punpcklwd_r2r(mm6
, mm3
); /* wsptr[2,tmp10],[3,tmp10],[xxx],[xxx] */
1888 psubw_r2r(mm7
, mm2
); /* wsptr[3,tmp11],[xxx],[3,tmp14],[xxx] */
1889 punpckhwd_r2r(mm6
, mm5
); /* wsptr[2,tmp13],[3,tmp13],[xxx],[xxx] */
1892 punpckldq_r2r(mm5
, mm3
); /* wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13] */
1894 punpcklwd_r2r(mm2
, mm4
); /* wsptr[2,tmp11],[3,tmp11],[xxx],[xxx] */
1896 punpckhwd_r2r(mm2
, mm7
); /* wsptr[2,tmp14],[3,tmp14],[xxx],[xxx] */
1898 punpckldq_r2r(mm7
, mm4
); /* wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14] */
1903 /* mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13] */
1904 /* mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14] */
1907 punpckhdq_r2r(mm4
, mm6
); /* wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14] */
1909 punpckldq_r2r(mm4
, mm1
); /* wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11] */
1912 pmulhw_m2r(fix_141
, mm6
);
1913 punpckldq_r2r(mm3
, mm0
); /* wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10] */
1915 punpckhdq_r2r(mm3
, mm2
); /* wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13] */
1918 /* tmp0 = tmp10 + tmp13; */
1919 /* tmp3 = tmp10 - tmp13; */
1920 paddw_r2r(mm2
, mm0
); /* [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0] */
1921 psubw_r2r(mm2
, mm7
); /* [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3] */
1923 /* tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13; */
1924 psubw_r2r(mm2
, mm6
); /* wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12] */
1925 /* tmp1 = tmp11 + tmp12; */
1926 /* tmp2 = tmp11 - tmp12; */
1934 /* z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; */
1935 /* z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; */
1936 /* z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; */
1937 /* z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; */
1938 movq_m2r(*(wsptr
), mm3
); /* wsptr[0,0],[0,1],[0,2],[0,3] */
1939 paddw_r2r(mm6
, mm1
); /* [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1] */
1941 movq_m2r(*(wsptr
+1), mm4
); /* wsptr[0,4],[0,5],[0,6],[0,7] */
1942 psubw_r2r(mm6
, mm5
); /* [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2] */
1945 punpckldq_r2r(mm4
, mm3
); /* wsptr[0,0],[0,1],[0,4],[0,5] */
1947 punpckhdq_r2r(mm6
, mm4
); /* wsptr[0,6],[0,7],[0,2],[0,3] */
1950 /*Save tmp0 and tmp1 in wsptr */
1951 movq_r2m(mm0
, *(wsptr
)); /* save tmp0 */
1952 paddw_r2r(mm4
, mm2
); /* wsptr[xxx],[0,z11],[xxx],[0,z13] */
1955 /*Continue with z10 --- z13 */
1956 movq_m2r(*(wsptr
+2), mm6
); /* wsptr[1,0],[1,1],[1,2],[1,3] */
1957 psubw_r2r(mm4
, mm3
); /* wsptr[xxx],[0,z12],[xxx],[0,z10] */
1959 movq_m2r(*(wsptr
+3), mm0
); /* wsptr[1,4],[1,5],[1,6],[1,7] */
1962 movq_r2m(mm1
, *(wsptr
+1)); /* save tmp1 */
1963 punpckldq_r2r(mm0
, mm6
); /* wsptr[1,0],[1,1],[1,4],[1,5] */
1965 punpckhdq_r2r(mm4
, mm0
); /* wsptr[1,6],[1,7],[1,2],[1,3] */
1968 /*Save tmp2 and tmp3 in wsptr */
1969 paddw_r2r(mm0
, mm6
); /* wsptr[xxx],[1,z11],[xxx],[1,z13] */
1972 /*Continue with z10 --- z13 */
1973 movq_r2m(mm5
, *(wsptr
+2)); /* save tmp2 */
1974 punpcklwd_r2r(mm6
, mm2
); /* wsptr[xxx],[xxx],[0,z11],[1,z11] */
1976 psubw_r2r(mm0
, mm1
); /* wsptr[xxx],[1,z12],[xxx],[1,z10] */
1977 punpckhwd_r2r(mm6
, mm4
); /* wsptr[xxx],[xxx],[0,z13],[1,z13] */
1980 punpcklwd_r2r(mm1
, mm3
); /* wsptr[xxx],[xxx],[0,z12],[1,z12] */
1982 movq_r2m(mm7
, *(wsptr
+3)); /* save tmp3 */
1983 punpckhwd_r2r(mm1
, mm0
); /* wsptr[xxx],[xxx],[0,z10],[1,z10] */
1985 movq_m2r(*(wsptr
+4), mm6
); /* wsptr[2,0],[2,1],[2,2],[2,3] */
1986 punpckhdq_r2r(mm2
, mm0
); /* wsptr[0,z10],[1,z10],[0,z11],[1,z11] */
1988 movq_m2r(*(wsptr
+5), mm7
); /* wsptr[2,4],[2,5],[2,6],[2,7] */
1989 punpckhdq_r2r(mm4
, mm3
); /* wsptr[0,z12],[1,z12],[0,z13],[1,z13] */
1991 movq_m2r(*(wsptr
+6), mm1
); /* wsptr[3,0],[3,1],[3,2],[3,3] */
1994 punpckldq_r2r(mm7
, mm6
); /* wsptr[2,0],[2,1],[2,4],[2,5] */
1997 punpckhdq_r2r(mm4
, mm7
); /* wsptr[2,6],[2,7],[2,2],[2,3] */
2000 movq_m2r(*(wsptr
+7), mm4
); /* wsptr[3,4],[3,5],[3,6],[3,7] */
2001 paddw_r2r(mm7
, mm6
); /* wsptr[xxx],[2,z11],[xxx],[2,z13] */
2003 psubw_r2r(mm7
, mm2
); /* wsptr[xxx],[2,z12],[xxx],[2,z10] */
2004 punpckldq_r2r(mm4
, mm1
); /* wsptr[3,0],[3,1],[3,4],[3,5] */
2006 punpckhdq_r2r(mm5
, mm4
); /* wsptr[3,6],[3,7],[3,2],[3,3] */
2009 paddw_r2r(mm4
, mm1
); /* wsptr[xxx],[3,z11],[xxx],[3,z13] */
2010 psubw_r2r(mm4
, mm7
); /* wsptr[xxx],[3,z12],[xxx],[3,z10] */
2013 punpcklwd_r2r(mm1
, mm6
); /* wsptr[xxx],[xxx],[2,z11],[3,z11] */
2015 punpckhwd_r2r(mm1
, mm5
); /* wsptr[xxx],[xxx],[2,z13],[3,z13] */
2018 punpcklwd_r2r(mm7
, mm2
); /* wsptr[xxx],[xxx],[2,z12],[3,z12] */
2020 punpckhwd_r2r(mm7
, mm4
); /* wsptr[xxx],[xxx],[2,z10],[3,z10] */
2022 punpckhdq_r2r(mm6
, mm4
); /* wsptr[2,z10],[3,z10],[2,z11],[3,z11] */
2024 punpckhdq_r2r(mm5
, mm2
); /* wsptr[2,z12],[3,z12],[2,z13],[3,z13] */
2027 punpckldq_r2r(mm4
, mm0
); /* wsptr[0,z10],[1,z10],[2,z10],[3,z10] */
2029 punpckhdq_r2r(mm4
, mm5
); /* wsptr[0,z11],[1,z11],[2,z11],[3,z11] */
2032 punpckhdq_r2r(mm2
, mm4
); /* wsptr[0,z13],[1,z13],[2,z13],[3,z13] */
2035 punpckldq_r2r(mm2
, mm3
); /* wsptr[0,z12],[1,z12],[2,z12],[3,z12] */
2036 /* tmp7 = z11 + z13; /* phase 5 */ */
2037 /* tmp8 = z11 - z13; /* phase 5 */ */
2038 psubw_r2r(mm4
, mm1
); /* tmp8 */
2040 paddw_r2r(mm4
, mm5
); /* tmp7 */
2041 /* tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */ */
2046 pmulhw_m2r(fix_141
, mm1
); /* tmp21 */
2047 /* tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */ */
2048 /* + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */ */
2052 pmulhw_m2r(fix_n184
, mm7
);
2055 movq_m2r(*(wsptr
), mm2
); /* tmp0,final1 */
2057 pmulhw_m2r(fix_108n184
, mm6
);
2058 /* tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */ */
2059 /* + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */ */
2060 movq_r2r(mm2
, mm4
); /* final1 */
2062 pmulhw_m2r(fix_184n261
, mm0
);
2063 paddw_r2r(mm5
, mm2
); /* tmp0+tmp7,final1 */
2065 pmulhw_m2r(fix_184
, mm3
);
2066 psubw_r2r(mm5
, mm4
); /* tmp0-tmp7,final1 */
2068 /* tmp6 = tmp22 - tmp7; /* phase 2 */ */
2069 psraw_i2r(3, mm2
); /* outptr[0,0],[1,0],[2,0],[3,0],final1 */
2071 paddw_r2r(mm6
, mm7
); /* tmp20 */
2072 psraw_i2r(3, mm4
); /* outptr[0,7],[1,7],[2,7],[3,7],final1 */
2074 paddw_r2r(mm0
, mm3
); /* tmp22 */
2076 /* tmp5 = tmp21 - tmp6; */
2077 psubw_r2r(mm5
, mm3
); /* tmp6 */
2079 /* tmp4 = tmp20 + tmp5; */
2080 movq_m2r(*(wsptr
+1), mm0
); /* tmp1,final2 */
2081 psubw_r2r(mm3
, mm1
); /* tmp5 */
2083 movq_r2r(mm0
, mm6
); /* final2 */
2084 paddw_r2r(mm3
, mm0
); /* tmp1+tmp6,final2 */
2086 /* Final output stage: scale down by a factor of 8 and range-limit */
2088 /* outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) */
2089 /* & RANGE_MASK]; */
2090 /* outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) */
2091 /* & RANGE_MASK]; final1 */
2094 /* outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) */
2095 /* & RANGE_MASK]; */
2096 /* outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) */
2097 /* & RANGE_MASK]; final2 */
2098 psubw_r2r(mm3
, mm6
); /* tmp1-tmp6,final2 */
2099 psraw_i2r(3, mm0
); /* outptr[0,1],[1,1],[2,1],[3,1] */
2101 psraw_i2r(3, mm6
); /* outptr[0,6],[1,6],[2,6],[3,6] */
2103 packuswb_r2r(mm4
, mm0
); /* out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7] */
2105 movq_m2r(*(wsptr
+2), mm5
); /* tmp2,final3 */
2106 packuswb_r2r(mm6
, mm2
); /* out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6] */
2108 /* outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) */
2109 /* & RANGE_MASK]; */
2110 /* outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) */
2111 /* & RANGE_MASK]; final3 */
2112 paddw_r2r(mm1
, mm7
); /* tmp4 */
2115 paddw_r2r(mm1
, mm5
); /* tmp2+tmp5 */
2116 psubw_r2r(mm1
, mm3
); /* tmp2-tmp5 */
2118 psraw_i2r(3, mm5
); /* outptr[0,2],[1,2],[2,2],[3,2] */
2120 movq_m2r(*(wsptr
+3), mm4
); /* tmp3,final4 */
2121 psraw_i2r(3, mm3
); /* outptr[0,5],[1,5],[2,5],[3,5] */
2125 /* outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) */
2126 /* & RANGE_MASK]; */
2127 /* outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) */
2128 /* & RANGE_MASK]; final4 */
2130 paddw_r2r(mm7
, mm4
); /* tmp3+tmp4 */
2132 psubw_r2r(mm7
, mm6
); /* tmp3-tmp4 */
2133 psraw_i2r(3, mm4
); /* outptr[0,4],[1,4],[2,4],[3,4] */
2135 psraw_i2r(3, mm6
); /* outptr[0,3],[1,3],[2,3],[3,3] */
2138 movq_r2m(mm4, *dummy);
2139 fprintf(stderr, "3-4 %016llx\n", dummy);
2140 movq_r2m(mm4, *dummy);
2141 fprintf(stderr, "3+4 %016llx\n", dummy);
2145 packuswb_r2r(mm4
, mm5
); /* out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4] */
2147 packuswb_r2r(mm3
, mm6
); /* out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5] */
2151 punpcklbw_r2r(mm0
, mm2
); /* out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1] */
2153 punpckhbw_r2r(mm0
, mm4
); /* out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7] */
2156 punpcklbw_r2r(mm6
, mm5
); /* out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3] */
2158 punpckhbw_r2r(mm6
, mm7
); /* out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5] */
2160 punpcklwd_r2r(mm5
, mm2
); /* out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3] */
2163 punpckhwd_r2r(mm5
, mm1
); /* out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3] */
2166 punpcklwd_r2r(mm4
, mm6
); /* out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7] */
2168 punpckldq_r2r(mm6
, mm2
); /* out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7] */
2172 punpckhwd_r2r(mm4
, mm7
); /* out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7] */
2175 movq_r2m(mm2
, *(dataptr
));
2177 punpckhdq_r2r(mm6
, mm0
); /* out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7] */
2180 movq_r2m(mm0
, *(dataptr
));
2182 punpckldq_r2r(mm7
, mm1
); /* out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7] */
2184 punpckhdq_r2r(mm7
, mm3
); /* out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7] */
2187 movq_r2m(mm1
, *(dataptr
));
2190 movq_r2m(mm3
, *(dataptr
));
2193 __s32 tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
2194 __s32 tmp10
, tmp11
, tmp12
, tmp13
;
2195 __s32 z5
, z10
, z11
, z12
, z13
;
2201 __s32 workspace
[64];
2205 for (ctr
= 8; ctr
> 0; ctr
--) {
2207 if ((inptr
[8] | inptr
[16] | inptr
[24] |
2208 inptr
[32] | inptr
[40] | inptr
[48] | inptr
[56]) == 0) {
2229 tmp10
= tmp0
+ tmp2
;
2230 tmp11
= tmp0
- tmp2
;
2232 tmp13
= tmp1
+ tmp3
;
2233 tmp12
= MULTIPLY(tmp1
- tmp3
, FIX_1_414213562
) - tmp13
;
2235 tmp0
= tmp10
+ tmp13
;
2236 tmp3
= tmp10
- tmp13
;
2237 tmp1
= tmp11
+ tmp12
;
2238 tmp2
= tmp11
- tmp12
;
2251 tmp11
= MULTIPLY(z11
- z13
, FIX_1_414213562
);
2253 z5
= MULTIPLY(z10
+ z12
, FIX_1_847759065
);
2254 tmp10
= MULTIPLY(z12
, FIX_1_082392200
) - z5
;
2255 tmp12
= MULTIPLY(z10
, - FIX_2_613125930
) + z5
;
2257 tmp6
= tmp12
- tmp7
;
2258 tmp5
= tmp11
- tmp6
;
2259 tmp4
= tmp10
+ tmp5
;
2261 wsptr
[0] = (__s32
) (tmp0
+ tmp7
);
2262 wsptr
[56] = (__s32
) (tmp0
- tmp7
);
2263 wsptr
[8] = (__s32
) (tmp1
+ tmp6
);
2264 wsptr
[48] = (__s32
) (tmp1
- tmp6
);
2265 wsptr
[16] = (__s32
) (tmp2
+ tmp5
);
2266 wsptr
[40] = (__s32
) (tmp2
- tmp5
);
2267 wsptr
[32] = (__s32
) (tmp3
+ tmp4
);
2268 wsptr
[24] = (__s32
) (tmp3
- tmp4
);
2275 for (ctr
= 0; ctr
< 8; ctr
++) {
2276 outptr
= &(odata
[ctr
*rskip
]);
2278 tmp10
= wsptr
[0] + wsptr
[4];
2279 tmp11
= wsptr
[0] - wsptr
[4];
2281 tmp13
= wsptr
[2] + wsptr
[6];
2282 tmp12
= MULTIPLY(wsptr
[2] - wsptr
[6], FIX_1_414213562
) - tmp13
;
2284 tmp0
= tmp10
+ tmp13
;
2285 tmp3
= tmp10
- tmp13
;
2286 tmp1
= tmp11
+ tmp12
;
2287 tmp2
= tmp11
- tmp12
;
2289 z13
= wsptr
[5] + wsptr
[3];
2290 z10
= wsptr
[5] - wsptr
[3];
2291 z11
= wsptr
[1] + wsptr
[7];
2292 z12
= wsptr
[1] - wsptr
[7];
2295 tmp11
= MULTIPLY(z11
- z13
, FIX_1_414213562
);
2297 z5
= MULTIPLY(z10
+ z12
, FIX_1_847759065
);
2298 tmp10
= MULTIPLY(z12
, FIX_1_082392200
) - z5
;
2299 tmp12
= MULTIPLY(z10
, - FIX_2_613125930
) + z5
;
2301 tmp6
= tmp12
- tmp7
;
2302 tmp5
= tmp11
- tmp6
;
2303 tmp4
= tmp10
+ tmp5
;
2305 outptr
[0] = RL(DESCALE(tmp0
+ tmp7
));
2306 outptr
[7] = RL(DESCALE(tmp0
- tmp7
));
2307 outptr
[1] = RL(DESCALE(tmp1
+ tmp6
));
2308 outptr
[6] = RL(DESCALE(tmp1
- tmp6
));
2309 outptr
[2] = RL(DESCALE(tmp2
+ tmp5
));
2310 outptr
[5] = RL(DESCALE(tmp2
- tmp5
));
2311 outptr
[4] = RL(DESCALE(tmp3
+ tmp4
));
2312 outptr
[3] = RL(DESCALE(tmp3
- tmp4
));
2322 This file contains most of the initialisation and control functions
2324 (C) Justin Schoeman 1998
2332 Initialise all the cache-aliged data blocks
2336 void RTjpeg_init_data(void)
2340 dptr
=(unsigned long)&(RTjpeg_alldata
[0]);
2343 dptr
=dptr
<<5; /* cache align data */
2345 RTjpeg_block
=(__s16
*)dptr
;
2346 dptr
+=sizeof(__s16
)*64;
2347 RTjpeg_lqt
=(__s32
*)dptr
;
2348 dptr
+=sizeof(__s32
)*64;
2349 RTjpeg_cqt
=(__s32
*)dptr
;
2350 dptr
+=sizeof(__s32
)*64;
2351 RTjpeg_liqt
=(__u32
*)dptr
;
2352 dptr
+=sizeof(__u32
)*64;
2353 RTjpeg_ciqt
=(__u32
*)dptr
;
2360 Re-set quality factor
2362 Input: buf -> pointer to 128 ints for quant values store to pass back to
2364 Q -> quality factor (192=best, 32=worst)
2367 void RTjpeg_init_Q(__u8 Q
)
2372 qual
=(__u64
)Q
<<(32-7); /* 32 bit FP, 255=2, 0=0 */
2376 RTjpeg_lqt
[i
]=(__s32
)((qual
/((__u64
)RTjpeg_lum_quant_tbl
[i
]<<16))>>3);
2377 if(RTjpeg_lqt
[i
]==0)RTjpeg_lqt
[i
]=1;
2378 RTjpeg_cqt
[i
]=(__s32
)((qual
/((__u64
)RTjpeg_chrom_quant_tbl
[i
]<<16))>>3);
2379 if(RTjpeg_cqt
[i
]==0)RTjpeg_cqt
[i
]=1;
2380 RTjpeg_liqt
[i
]=(1<<16)/(RTjpeg_lqt
[i
]<<3);
2381 RTjpeg_ciqt
[i
]=(1<<16)/(RTjpeg_cqt
[i
]<<3);
2382 RTjpeg_lqt
[i
]=((1<<16)/RTjpeg_liqt
[i
])>>3;
2383 RTjpeg_cqt
[i
]=((1<<16)/RTjpeg_ciqt
[i
])>>3;
2387 while(RTjpeg_liqt
[RTjpeg_ZZ
[++RTjpeg_lb8
]]<=8);
2390 while(RTjpeg_ciqt
[RTjpeg_ZZ
[++RTjpeg_cb8
]]<=8);
2395 RTjpeg_quant_init();
2402 Initialise compression.
2404 Input: buf -> pointer to 128 ints for quant values store to pass back to
2406 width -> width of image
2407 height -> height of image
2408 Q -> quality factor (192=best, 32=worst)
2412 void RTjpeg_init_compress(__u32
*buf
, int width
, int height
, __u8 Q
)
2420 RTjpeg_height
=height
;
2421 RTjpeg_Ywidth
= RTjpeg_width
>>3;
2422 RTjpeg_Ysize
=width
* height
;
2423 RTjpeg_Cwidth
= RTjpeg_width
>>4;
2424 RTjpeg_Csize
= (width
>>1) * height
;
2426 qual
=(__u64
)Q
<<(32-7); /* 32 bit FP, 255=2, 0=0 */
2430 RTjpeg_lqt
[i
]=(__s32
)((qual
/((__u64
)RTjpeg_lum_quant_tbl
[i
]<<16))>>3);
2431 if(RTjpeg_lqt
[i
]==0)RTjpeg_lqt
[i
]=1;
2432 RTjpeg_cqt
[i
]=(__s32
)((qual
/((__u64
)RTjpeg_chrom_quant_tbl
[i
]<<16))>>3);
2433 if(RTjpeg_cqt
[i
]==0)RTjpeg_cqt
[i
]=1;
2434 RTjpeg_liqt
[i
]=(1<<16)/(RTjpeg_lqt
[i
]<<3);
2435 RTjpeg_ciqt
[i
]=(1<<16)/(RTjpeg_cqt
[i
]<<3);
2436 RTjpeg_lqt
[i
]=((1<<16)/RTjpeg_liqt
[i
])>>3;
2437 RTjpeg_cqt
[i
]=((1<<16)/RTjpeg_ciqt
[i
])>>3;
2441 while(RTjpeg_liqt
[RTjpeg_ZZ
[++RTjpeg_lb8
]]<=8);
2444 while(RTjpeg_ciqt
[RTjpeg_ZZ
[++RTjpeg_cb8
]]<=8);
2448 RTjpeg_quant_init();
2451 buf
[i
]=RTjpeg_liqt
[i
];
2453 buf
[64+i
]=RTjpeg_ciqt
[i
];
2456 void RTjpeg_init_decompress(__u32
*buf
, int width
, int height
)
2463 RTjpeg_height
=height
;
2464 RTjpeg_Ywidth
= RTjpeg_width
>>3;
2465 RTjpeg_Ysize
=width
* height
;
2466 RTjpeg_Cwidth
= RTjpeg_width
>>4;
2467 RTjpeg_Csize
= (width
>>1) * height
;
2471 RTjpeg_liqt
[i
]=buf
[i
];
2472 RTjpeg_ciqt
[i
]=buf
[i
+64];
2476 while(RTjpeg_liqt
[RTjpeg_ZZ
[++RTjpeg_lb8
]]<=8);
2479 while(RTjpeg_ciqt
[RTjpeg_ZZ
[++RTjpeg_cb8
]]<=8);
2484 /* RTjpeg_color_init(); */
2487 int RTjpeg_compressYUV420(__s8
*sp
, unsigned char *bp
)
2490 register __s8
* bp1
= bp
+ (RTjpeg_width
<<3);
2491 register __s8
* bp2
= bp
+ RTjpeg_Ysize
;
2492 register __s8
* bp3
= bp2
+ (RTjpeg_Csize
>>1);
2493 register int i
, j
, k
;
2500 for(i
=RTjpeg_height
>>1; i
; i
-=8)
2502 for(j
=0, k
=0; j
<RTjpeg_width
; j
+=16, k
+=8)
2504 RTjpeg_dctY(bp
+j
, RTjpeg_block
, RTjpeg_Ywidth
);
2505 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2506 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2508 RTjpeg_dctY(bp
+j
+8, RTjpeg_block
, RTjpeg_Ywidth
);
2509 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2510 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2512 RTjpeg_dctY(bp1
+j
, RTjpeg_block
, RTjpeg_Ywidth
);
2513 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2514 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2516 RTjpeg_dctY(bp1
+j
+8, RTjpeg_block
, RTjpeg_Ywidth
);
2517 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2518 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2520 RTjpeg_dctY(bp2
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2521 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2522 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2524 RTjpeg_dctY(bp3
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2525 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2526 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2529 bp
+=RTjpeg_width
<<4;
2530 bp1
+=RTjpeg_width
<<4;
2531 bp2
+=RTjpeg_width
<<2;
2532 bp3
+=RTjpeg_width
<<2;
2541 int RTjpeg_compressYUV422(__s8
*sp
, unsigned char *bp
)
2544 register __s8
* bp2
= bp
+ RTjpeg_Ysize
;
2545 register __s8
* bp3
= bp2
+ RTjpeg_Csize
;
2546 register int i
, j
, k
;
2553 for(i
=RTjpeg_height
; i
; i
-=8)
2555 for(j
=0, k
=0; j
<RTjpeg_width
; j
+=16, k
+=8)
2557 RTjpeg_dctY(bp
+j
, RTjpeg_block
, RTjpeg_Ywidth
);
2558 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2559 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2561 RTjpeg_dctY(bp
+j
+8, RTjpeg_block
, RTjpeg_Ywidth
);
2562 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2563 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2565 RTjpeg_dctY(bp2
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2566 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2567 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2569 RTjpeg_dctY(bp3
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2570 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2571 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2574 bp
+=RTjpeg_width
<<3;
2575 bp2
+=RTjpeg_width
<<2;
2576 bp3
+=RTjpeg_width
<<2;
2585 int RTjpeg_compress8(__s8
*sp
, unsigned char *bp
)
2596 for(i
=0; i
<RTjpeg_height
; i
+=8)
2598 for(j
=0; j
<RTjpeg_width
; j
+=8)
2600 RTjpeg_dctY(bp
+j
, RTjpeg_block
, RTjpeg_width
);
2601 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2602 sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2613 void RTjpeg_decompressYUV422(__s8
*sp
, __u8
*bp
)
2615 register __s8
* bp2
= bp
+ RTjpeg_Ysize
;
2616 register __s8
* bp3
= bp2
+ (RTjpeg_Csize
);
2624 for(i
=RTjpeg_height
; i
; i
-=8)
2626 for(k
=0, j
=0; j
<RTjpeg_width
; j
+=16, k
+=8) {
2630 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2631 RTjpeg_idct(bp
+j
, RTjpeg_block
, RTjpeg_width
);
2636 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2637 RTjpeg_idct(bp
+j
+8, RTjpeg_block
, RTjpeg_width
);
2642 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_cb8
, RTjpeg_ciqt
);
2643 RTjpeg_idct(bp2
+k
, RTjpeg_block
, RTjpeg_width
>>1);
2648 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_cb8
, RTjpeg_ciqt
);
2649 RTjpeg_idct(bp3
+k
, RTjpeg_block
, RTjpeg_width
>>1);
2652 bp
+=RTjpeg_width
<<3;
2653 bp2
+=RTjpeg_width
<<2;
2654 bp3
+=RTjpeg_width
<<2;
2661 void RTjpeg_decompressYUV420(__s8
*sp
, __u8
*bp
)
2663 register __s8
* bp1
= bp
+ (RTjpeg_width
<<3);
2664 register __s8
* bp2
= bp
+ RTjpeg_Ysize
;
2665 register __s8
* bp3
= bp2
+ (RTjpeg_Csize
>>1);
2673 for(i
=RTjpeg_height
>>1; i
; i
-=8)
2675 for(k
=0, j
=0; j
<RTjpeg_width
; j
+=16, k
+=8) {
2679 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2680 RTjpeg_idct(bp
+j
, RTjpeg_block
, RTjpeg_width
);
2685 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2686 RTjpeg_idct(bp
+j
+8, RTjpeg_block
, RTjpeg_width
);
2691 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2692 RTjpeg_idct(bp1
+j
, RTjpeg_block
, RTjpeg_width
);
2697 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2698 RTjpeg_idct(bp1
+j
+8, RTjpeg_block
, RTjpeg_width
);
2703 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_cb8
, RTjpeg_ciqt
);
2704 RTjpeg_idct(bp2
+k
, RTjpeg_block
, RTjpeg_width
>>1);
2709 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_cb8
, RTjpeg_ciqt
);
2710 RTjpeg_idct(bp3
+k
, RTjpeg_block
, RTjpeg_width
>>1);
2713 bp
+=RTjpeg_width
<<4;
2714 bp1
+=RTjpeg_width
<<4;
2715 bp2
+=RTjpeg_width
<<2;
2716 bp3
+=RTjpeg_width
<<2;
2723 void RTjpeg_decompress8(__s8
*sp
, __u8
*bp
)
2732 for(i
=0; i
<RTjpeg_height
; i
+=8)
2734 for(j
=0; j
<RTjpeg_width
; j
+=8)
2738 sp
+=RTjpeg_s2b(RTjpeg_block
, sp
, RTjpeg_lb8
, RTjpeg_liqt
);
2739 RTjpeg_idct(bp
+j
, RTjpeg_block
, RTjpeg_width
);
2741 bp
+=RTjpeg_width
<<3;
2748 Initialise additional data structures for motion compensation
2752 void RTjpeg_init_mcompress(void)
2758 RTjpeg_old
=malloc((4*RTjpeg_width
*RTjpeg_height
)+32);
2759 tmp
=(unsigned long)RTjpeg_old
;
2762 RTjpeg_old
=(__s16
*)(tmp
<<5);
2766 fprintf(stderr
, "RTjpeg: Could not allocate memory\n");
2769 bzero(RTjpeg_old
, ((4*RTjpeg_width
*RTjpeg_height
)));
2774 int RTjpeg_bcomp(__s16
*old
, mmx_t
*mask
)
2777 mmx_t
*mold
=(mmx_t
*)old
;
2778 mmx_t
*mblock
=(mmx_t
*)RTjpeg_block
;
2780 static mmx_t neg
=(mmx_t
)(unsigned long long)0xffffffffffffffffULL
;
2782 movq_m2r(*mask
, mm7
);
2788 movq_m2r(*(mblock
++), mm0
);
2789 movq_m2r(*(mblock
++), mm2
);
2790 movq_m2r(*(mold
++), mm1
);
2791 movq_m2r(*(mold
++), mm3
);
2792 psubsw_r2r(mm1
, mm0
);
2793 psubsw_r2r(mm3
, mm2
);
2796 pcmpgtw_r2r(mm7
, mm0
);
2797 pcmpgtw_r2r(mm7
, mm2
);
2800 pcmpgtw_r2r(mm7
, mm1
);
2801 pcmpgtw_r2r(mm7
, mm3
);
2807 movq_r2m(mm5
, result
);
2812 for(i
=0; i
<16; i
++)((__u64
*)old
)[i
]=((__u64
*)RTjpeg_block
)[i
];
2820 int RTjpeg_bcomp(__s16
*old
, __u16
*mask
)
2825 if(abs(old
[i
]-RTjpeg_block
[i
])>*mask
)
2828 for(i
=0; i
<16; i
++)((__u64
*)old
)[i
]=((__u64
*)RTjpeg_block
)[i
];
2835 void RTjpeg_set_test(int i
)
2840 int RTjpeg_mcompressYUV420(__s8
*sp
, unsigned char *bp
, __u16 lmask
, __u16 cmask
)
2844 register __s8
* bp1
= bp
+ (RTjpeg_width
<<3);
2845 register __s8
* bp2
= bp
+ RTjpeg_Ysize
;
2846 register __s8
* bp3
= bp2
+ (RTjpeg_Csize
>>1);
2847 register int i
, j
, k
;
2851 RTjpeg_lmask
=(mmx_t
)(((__u64
)lmask
<<48)|((__u64
)lmask
<<32)|((__u64
)lmask
<<16)|lmask
);
2852 RTjpeg_cmask
=(mmx_t
)(((__u64
)cmask
<<48)|((__u64
)cmask
<<32)|((__u64
)cmask
<<16)|cmask
);
2861 for(i
=RTjpeg_height
>>1; i
; i
-=8)
2863 for(j
=0, k
=0; j
<RTjpeg_width
; j
+=16, k
+=8)
2865 RTjpeg_dctY(bp
+j
, RTjpeg_block
, RTjpeg_Ywidth
);
2866 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2867 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
2869 *((__u8
*)sp
++)=255;
2871 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2874 RTjpeg_dctY(bp
+j
+8, RTjpeg_block
, RTjpeg_Ywidth
);
2875 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2876 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
2878 *((__u8
*)sp
++)=255;
2880 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2883 RTjpeg_dctY(bp1
+j
, RTjpeg_block
, RTjpeg_Ywidth
);
2884 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2885 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
2887 *((__u8
*)sp
++)=255;
2889 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2892 RTjpeg_dctY(bp1
+j
+8, RTjpeg_block
, RTjpeg_Ywidth
);
2893 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2894 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
2896 *((__u8
*)sp
++)=255;
2898 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2901 RTjpeg_dctY(bp2
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2902 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2903 if(RTjpeg_bcomp(block
, &RTjpeg_cmask
))
2905 *((__u8
*)sp
++)=255;
2907 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2910 RTjpeg_dctY(bp3
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2911 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2912 if(RTjpeg_bcomp(block
, &RTjpeg_cmask
))
2914 *((__u8
*)sp
++)=255;
2916 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2919 bp
+=RTjpeg_width
<<4;
2920 bp1
+=RTjpeg_width
<<4;
2921 bp2
+=RTjpeg_width
<<2;
2922 bp3
+=RTjpeg_width
<<2;
2932 int RTjpeg_mcompressYUV422(__s8
*sp
, unsigned char *bp
, __u16 lmask
, __u16 cmask
)
2936 register __s8
* bp2
;
2937 register __s8
* bp3
;
2938 register int i
, j
, k
;
2942 RTjpeg_lmask
=(mmx_t
)(((__u64
)lmask
<<48)|((__u64
)lmask
<<32)|((__u64
)lmask
<<16)|lmask
);
2943 RTjpeg_cmask
=(mmx_t
)(((__u64
)cmask
<<48)|((__u64
)cmask
<<32)|((__u64
)cmask
<<16)|cmask
);
2949 bp
= bp
- RTjpeg_width
*0;
2950 bp2
= bp
+ RTjpeg_Ysize
-RTjpeg_width
*0;
2951 bp3
= bp2
+ RTjpeg_Csize
;
2956 for(i
=RTjpeg_height
; i
; i
-=8)
2958 for(j
=0, k
=0; j
<RTjpeg_width
; j
+=16, k
+=8)
2960 RTjpeg_dctY(bp
+j
, RTjpeg_block
, RTjpeg_Ywidth
);
2961 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2962 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
2964 *((__u8
*)sp
++)=255;
2966 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2969 RTjpeg_dctY(bp
+j
+8, RTjpeg_block
, RTjpeg_Ywidth
);
2970 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
2971 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
2973 *((__u8
*)sp
++)=255;
2975 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
2978 RTjpeg_dctY(bp2
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2979 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2980 if(RTjpeg_bcomp(block
, &RTjpeg_cmask
))
2982 *((__u8
*)sp
++)=255;
2984 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2987 RTjpeg_dctY(bp3
+k
, RTjpeg_block
, RTjpeg_Cwidth
);
2988 RTjpeg_quant(RTjpeg_block
, RTjpeg_cqt
);
2989 if(RTjpeg_bcomp(block
, &RTjpeg_cmask
))
2991 *((__u8
*)sp
++)=255;
2993 else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_cb8
);
2997 bp
+=RTjpeg_width
<<3;
2998 bp2
+=RTjpeg_width
<<2;
2999 bp3
+=RTjpeg_width
<<2;
3001 printf ("%d\n", block
- RTjpeg_old
);
3008 int RTjpeg_mcompress8(__s8
*sp
, unsigned char *bp
, __u16 lmask
)
3016 RTjpeg_lmask
=(mmx_t
)(((__u64
)lmask
<<48)|((__u64
)lmask
<<32)|((__u64
)lmask
<<16)|lmask
);
3025 for(i
=0; i
<RTjpeg_height
; i
+=8)
3027 for(j
=0; j
<RTjpeg_width
; j
+=8)
3029 RTjpeg_dctY(bp
+j
, RTjpeg_block
, RTjpeg_width
);
3030 RTjpeg_quant(RTjpeg_block
, RTjpeg_lqt
);
3031 if(RTjpeg_bcomp(block
, &RTjpeg_lmask
))
3033 *((__u8
*)sp
++)=255;
3034 /* printf("* %d ", sp[-1]); */
3035 } else sp
+=RTjpeg_b2s(RTjpeg_block
, sp
, RTjpeg_lb8
);
3038 bp
+=RTjpeg_width
<<3;
3046 void RTjpeg_color_init(void)
3056 void RTjpeg_yuv422rgb(__u8
*buf
, __u8
*rgb
, int stride
)
3060 __s32 y
, crR
, crG
, cbG
, cbB
;
3061 __u8
*bufcr
, *bufcb
, *bufy
, *bufoute
;
3066 bufcb
=&buf
[RTjpeg_width
*RTjpeg_height
];
3067 bufcr
=&buf
[RTjpeg_width
*RTjpeg_height
+(RTjpeg_width
*RTjpeg_height
)/2];
3071 for(i
=0; i
<(RTjpeg_height
); i
++)
3073 for(j
=0; j
<RTjpeg_width
; j
+=2)
3075 crR
=(*bufcr
-128)*KcrR
;
3076 crG
=(*(bufcr
++)-128)*KcrG
;
3077 cbG
=(*bufcb
-128)*KcbG
;
3078 cbB
=(*(bufcb
++)-128)*KcbB
;
3083 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3084 tmp
=(y
-crG
-cbG
)>>16;
3085 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3087 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3089 y
=(bufy
[j
+1]-16)*Ky
;
3092 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3093 tmp
=(y
-crG
-cbG
)>>16;
3094 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3096 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3104 void RTjpeg_yuv420rgb(__u8
*buf
, __u8
*rgb
, int stride
)
3108 __s32 y
, crR
, crG
, cbG
, cbB
;
3109 __u8
*bufcr
, *bufcb
, *bufy
, *bufoute
, *bufouto
;
3113 oskip
=RTjpeg_width
*3;
3115 oskip
=2*stride
-RTjpeg_width
*3;
3119 bufcb
=&buf
[RTjpeg_width
*RTjpeg_height
];
3120 bufcr
=&buf
[RTjpeg_width
*RTjpeg_height
+(RTjpeg_width
*RTjpeg_height
)/4];
3123 bufouto
=rgb
+RTjpeg_width
*3;
3125 for(i
=0; i
<(RTjpeg_height
>>1); i
++)
3127 for(j
=0; j
<RTjpeg_width
; j
+=2)
3129 crR
=(*bufcr
-128)*KcrR
;
3130 crG
=(*(bufcr
++)-128)*KcrG
;
3131 cbG
=(*bufcb
-128)*KcbG
;
3132 cbB
=(*(bufcb
++)-128)*KcbB
;
3137 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3138 tmp
=(y
-crG
-cbG
)>>16;
3139 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3141 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3143 y
=(bufy
[j
+1]-16)*Ky
;
3146 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3147 tmp
=(y
-crG
-cbG
)>>16;
3148 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3150 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3152 y
=(bufy
[j
+yskip
]-16)*Ky
;
3155 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3156 tmp
=(y
-crG
-cbG
)>>16;
3157 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3159 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3161 y
=(bufy
[j
+1+yskip
]-16)*Ky
;
3164 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3165 tmp
=(y
-crG
-cbG
)>>16;
3166 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3168 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3178 void RTjpeg_yuvrgb32(__u8
*buf
, __u8
*rgb
, int stride
)
3182 __s32 y
, crR
, crG
, cbG
, cbB
;
3183 __u8
*bufcr
, *bufcb
, *bufy
, *bufoute
, *bufouto
;
3187 oskip
=RTjpeg_width
*4;
3189 oskip
= 2*stride
-RTjpeg_width
*4;
3192 bufcb
=&buf
[RTjpeg_width
*RTjpeg_height
];
3193 bufcr
=&buf
[RTjpeg_width
*RTjpeg_height
+(RTjpeg_width
*RTjpeg_height
)/2];
3196 bufouto
=rgb
+RTjpeg_width
*4;
3198 for(i
=0; i
<(RTjpeg_height
>>1); i
++)
3200 for(j
=0; j
<RTjpeg_width
; j
+=2)
3202 crR
=(*bufcr
-128)*KcrR
;
3203 crG
=(*(bufcr
++)-128)*KcrG
;
3204 cbG
=(*bufcb
-128)*KcbG
;
3205 cbB
=(*(bufcb
++)-128)*KcbB
;
3210 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3211 tmp
=(y
-crG
-cbG
)>>16;
3212 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3214 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3217 y
=(bufy
[j
+1]-16)*Ky
;
3220 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3221 tmp
=(y
-crG
-cbG
)>>16;
3222 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3224 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3227 y
=(bufy
[j
+yskip
]-16)*Ky
;
3230 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3231 tmp
=(y
-crG
-cbG
)>>16;
3232 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3234 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3237 y
=(bufy
[j
+1+yskip
]-16)*Ky
;
3240 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3241 tmp
=(y
-crG
-cbG
)>>16;
3242 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3244 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3254 void RTjpeg_yuvrgb24(__u8
*buf
, __u8
*rgb
, int stride
)
3258 __s32 y
, crR
, crG
, cbG
, cbB
;
3259 __u8
*bufcr
, *bufcb
, *bufy
, *bufoute
, *bufouto
;
3263 oskip
=RTjpeg_width
*3;
3265 oskip
=2*stride
- RTjpeg_width
*3;
3269 bufcb
=&buf
[RTjpeg_width
*RTjpeg_height
];
3270 bufcr
=&buf
[RTjpeg_width
*RTjpeg_height
+(RTjpeg_width
*RTjpeg_height
)/4];
3273 bufouto
=rgb
+RTjpeg_width
*3;
3275 for(i
=0; i
<(RTjpeg_height
>>1); i
++)
3277 for(j
=0; j
<RTjpeg_width
; j
+=2)
3279 crR
=(*bufcr
-128)*KcrR
;
3280 crG
=(*(bufcr
++)-128)*KcrG
;
3281 cbG
=(*bufcb
-128)*KcbG
;
3282 cbB
=(*(bufcb
++)-128)*KcbB
;
3287 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3288 tmp
=(y
-crG
-cbG
)>>16;
3289 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3291 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3293 y
=(bufy
[j
+1]-16)*Ky
;
3296 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3297 tmp
=(y
-crG
-cbG
)>>16;
3298 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3300 *(bufoute
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3302 y
=(bufy
[j
+yskip
]-16)*Ky
;
3305 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3306 tmp
=(y
-crG
-cbG
)>>16;
3307 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3309 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3311 y
=(bufy
[j
+1+yskip
]-16)*Ky
;
3314 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3315 tmp
=(y
-crG
-cbG
)>>16;
3316 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3318 *(bufouto
++)=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3327 void RTjpeg_yuvrgb16(__u8
*buf
, __u8
*rgb
, int stride
)
3331 __s32 y
, crR
, crG
, cbG
, cbB
;
3332 __u8
*bufcr
, *bufcb
, *bufy
, *bufoute
, *bufouto
;
3334 unsigned char r
, g
, b
;
3337 oskip
=RTjpeg_width
*2;
3339 oskip
=2*stride
-RTjpeg_width
*2;
3343 bufcb
=&buf
[RTjpeg_width
*RTjpeg_height
];
3344 bufcr
=&buf
[RTjpeg_width
*RTjpeg_height
+(RTjpeg_width
*RTjpeg_height
)/4];
3347 bufouto
=rgb
+RTjpeg_width
*2;
3349 for(i
=0; i
<(RTjpeg_height
>>1); i
++)
3351 for(j
=0; j
<RTjpeg_width
; j
+=2)
3353 crR
=(*bufcr
-128)*KcrR
;
3354 crG
=(*(bufcr
++)-128)*KcrG
;
3355 cbG
=(*bufcb
-128)*KcbG
;
3356 cbB
=(*(bufcb
++)-128)*KcbB
;
3361 b
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3362 tmp
=(y
-crG
-cbG
)>>16;
3363 g
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3365 r
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3366 tmp
=(int)((int)b
>> 3);
3367 tmp
|=(int)(((int)g
>> 2) << 5);
3368 tmp
|=(int)(((int)r
>> 3) << 11);
3369 *(bufoute
++)=tmp
&0xff;
3370 *(bufoute
++)=tmp
>>8;
3373 y
=(bufy
[j
+1]-16)*Ky
;
3376 b
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3377 tmp
=(y
-crG
-cbG
)>>16;
3378 g
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3380 r
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3381 tmp
=(int)((int)b
>> 3);
3382 tmp
|=(int)(((int)g
>> 2) << 5);
3383 tmp
|=(int)(((int)r
>> 3) << 11);
3384 *(bufoute
++)=tmp
&0xff;
3385 *(bufoute
++)=tmp
>>8;
3387 y
=(bufy
[j
+yskip
]-16)*Ky
;
3390 b
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3391 tmp
=(y
-crG
-cbG
)>>16;
3392 g
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3394 r
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3395 tmp
=(int)((int)b
>> 3);
3396 tmp
|=(int)(((int)g
>> 2) << 5);
3397 tmp
|=(int)(((int)r
>> 3) << 11);
3398 *(bufouto
++)=tmp
&0xff;
3399 *(bufouto
++)=tmp
>>8;
3401 y
=(bufy
[j
+1+yskip
]-16)*Ky
;
3404 b
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3405 tmp
=(y
-crG
-cbG
)>>16;
3406 g
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3408 r
=(tmp
>255)?255:((tmp
<0)?0:tmp
);
3409 tmp
=(int)((int)b
>> 3);
3410 tmp
|=(int)(((int)g
>> 2) << 5);
3411 tmp
|=(int)(((int)r
>> 3) << 11);
3412 *(bufouto
++)=tmp
&0xff;
3413 *(bufouto
++)=tmp
>>8;
3424 void RTjpeg_yuvrgb8(__u8
*buf
, __u8
*rgb
, int stride
)
3426 bcopy(buf
, rgb
, RTjpeg_width
*RTjpeg_height
);