3 #include <math.h> // floorf, ceilf
4 #include <string.h> // memcpy
6 #include "bsp_common.h"
10 #define assert(x) ASSERT(x)
11 #define tjei_min(a, b) ((a) < b) ? (a) : (b);
12 #define tjei_max(a, b) ((a) < b) ? (b) : (a);
16 #define TJEI_FORCE_INLINE __forceinline
17 // #define TJEI_FORCE_INLINE __declspec(noinline) // For profiling
19 #define TJEI_FORCE_INLINE static // TODO: equivalent for gcc & clang
22 // Only use zero for debugging and/or inspection.
23 #define TJE_USE_FAST_DCT 1
28 // ============================================================
31 // The spec defines tjei_default reasonably good quantization matrices and huffman
32 // specification tables.
35 // Instead of hard-coding the final huffman table, we only hard-code the table
36 // spec suggested by the specification, and then derive the full table from
37 // there. This is only for didactic purposes but it might be useful if there
38 // ever is the case that we need to swap huffman tables from various sources.
39 // ============================================================
42 // K.1 - suggested luminance QT
43 static const uint8_t tjei_default_qt_luma_from_spec
[] =
45 16,11,10,16, 24, 40, 51, 61,
46 12,12,14,19, 26, 58, 60, 55,
47 14,13,16,24, 40, 57, 69, 56,
48 14,17,22,29, 51, 87, 80, 62,
49 18,22,37,56, 68,109,103, 77,
50 24,35,55,64, 81,104,113, 92,
51 49,64,78,87,103,121,120,101,
52 72,92,95,98,112,100,103, 99,
57 static const uint8_t tjei_default_qt_chroma_from_spec
[] =
59 // K.1 - suggested chrominance QT
60 17,18,24,47,99,99,99,99,
61 18,21,26,66,99,99,99,99,
62 24,26,56,99,99,99,99,99,
63 47,66,99,99,99,99,99,99,
64 99,99,99,99,99,99,99,99,
65 99,99,99,99,99,99,99,99,
66 99,99,99,99,99,99,99,99,
67 99,99,99,99,99,99,99,99,
71 static const uint8_t tjei_default_qt_chroma_from_paper
[] =
73 // Example QT from JPEG paper
74 16, 12, 14, 14, 18, 24, 49, 72,
75 11, 10, 16, 24, 40, 51, 61, 12,
76 13, 17, 22, 35, 64, 92, 14, 16,
77 22, 37, 55, 78, 95, 19, 24, 29,
78 56, 64, 87, 98, 26, 40, 51, 68,
79 81, 103, 112, 58, 57, 87, 109, 104,
80 121,100, 60, 69, 80, 103, 113, 120,
81 103, 55, 56, 62, 77, 92, 101, 99,
84 // == Procedure to 'deflate' the huffman tree: JPEG spec, C.2
86 // Number of 16 bit values for every code length. (K.3.3.1)
87 static const uint8_t tjei_default_ht_luma_dc_len
[16] =
89 0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0
92 static const uint8_t tjei_default_ht_luma_dc
[12] =
94 0,1,2,3,4,5,6,7,8,9,10,11
97 // Number of 16 bit values for every code length. (K.3.3.1)
98 static const uint8_t tjei_default_ht_chroma_dc_len
[16] =
100 0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0
103 static const uint8_t tjei_default_ht_chroma_dc
[12] =
105 0,1,2,3,4,5,6,7,8,9,10,11
108 // Same as above, but AC coefficients.
109 static const uint8_t tjei_default_ht_luma_ac_len
[16] =
111 0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d
113 static const uint8_t tjei_default_ht_luma_ac
[] =
115 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
116 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0,
117 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28,
118 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
119 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
120 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
121 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
122 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5,
123 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2,
124 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
128 static const uint8_t tjei_default_ht_chroma_ac_len
[16] =
130 0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77
132 static const uint8_t tjei_default_ht_chroma_ac
[] =
134 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
135 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0,
136 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26,
137 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
138 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
139 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
140 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5,
141 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3,
142 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA,
143 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
146 static float aan_scales
[] = {
147 1.0f
, 1.387039845f
, 1.306562965f
, 1.175875602f
,
148 1.0f
, 0.785694958f
, 0.541196100f
, 0.275899379f
151 // ============================================================
153 // ============================================================
156 static const uint8_t tjei_zig_zag
[64] =
158 0, 1, 5, 6, 14, 15, 27, 28,
159 2, 4, 7, 13, 16, 26, 29, 42,
160 3, 8, 12, 17, 25, 30, 41, 43,
161 9, 11, 18, 24, 31, 40, 44, 53,
162 10, 19, 23, 32, 39, 45, 52, 54,
163 20, 22, 33, 38, 46, 51, 55, 60,
164 21, 34, 37, 47, 50, 56, 59, 61,
165 35, 36, 48, 49, 57, 58, 62, 63,
167 #define tjei_be_word BSP_Swap16
169 // ============================================================
170 // The following structs exist only for code clarity, debugability, and
171 // readability. They are used when writing to disk, but it is useful to have
172 // 1-packed-structs to document how the format works, and to inspect memory
174 // ============================================================
176 static const uint8_t tjeik_jfif_id
[] = "JFIF";
177 static const uint8_t tjeik_com_str
[] = "Created by JPEG Encoder";
179 // TODO: Get rid of packed structs!
201 char com_str
[sizeof(tjeik_com_str
) - 1];
207 tje_write_func
* func
;
213 uint8_t ehuffsize
[4][257];
214 uint16_t ehuffcode
[4][256];
215 uint8_t const * ht_bits
[4];
216 uint8_t const * ht_vals
[4];
218 // Cuantization tables.
220 uint8_t qt_chroma
[64];
222 // fwrite by default. User-defined when using tje_encode_with_func.
223 TJEWriteContext write_context
;
225 // Buffered output. Big performance win when using the usual stdlib implementations.
226 size_t output_buffer_count
;
227 uint8_t output_buffer
[TJEI_BUFFER_SIZE
];
230 // Helper struct for TJEFrameHeader (below).
233 uint8_t component_id
;
234 uint8_t sampling_factors
; // most significant 4 bits: horizontal. 4 LSB: vertical (A.1.1)
235 uint8_t qt
; // Quantization table selector.
241 uint16_t len
; // 8 + 3 * frame.num_components
242 uint8_t precision
; // Sample precision (bits per sample).
245 uint8_t num_components
; // For this implementation, will be equal to 3.
246 TJEComponentSpec component_spec
[3];
251 uint8_t component_id
; // Just as with TJEComponentSpec
252 uint8_t dc_ac
; // (dc|ac)
253 } TJEFrameComponentSpec
;
259 uint8_t num_components
; // 3.
260 TJEFrameComponentSpec component_spec
[3];
270 static void tjei_write(TJEState
* state
, const void* data
, size_t num_bytes
, size_t num_elements
)
272 size_t to_write
= num_bytes
* num_elements
;
274 // Cap to the buffer available size and copy memory.
275 size_t capped_count
= tjei_min(to_write
, TJEI_BUFFER_SIZE
- 1 - state
->output_buffer_count
);
277 memcpy(state
->output_buffer
+ state
->output_buffer_count
, data
, capped_count
);
278 state
->output_buffer_count
+= capped_count
;
280 assert (state
->output_buffer_count
<= TJEI_BUFFER_SIZE
- 1);
282 if ( state
->output_buffer_count
== TJEI_BUFFER_SIZE
- 1 ) {
283 state
->write_context
.func(state
->write_context
.context
, state
->output_buffer
, (int)state
->output_buffer_count
);
284 state
->output_buffer_count
= 0;
287 // Recursively calling ourselves with the rest of the buffer.
288 if (capped_count
< to_write
) {
289 tjei_write(state
, (uint8_t*)data
+capped_count
, to_write
- capped_count
, 1);
293 static void tjei_write_DQT(TJEState
* state
, const uint8_t* matrix
, uint8_t id
)
295 uint16_t DQT
= tjei_be_word(0xffdb);
296 tjei_write(state
, &DQT
, sizeof(uint16_t), 1);
297 uint16_t len
= tjei_be_word(0x0043); // 2(len) + 1(id) + 64(matrix) = 67 = 0x43
298 tjei_write(state
, &len
, sizeof(uint16_t), 1);
300 uint8_t precision_and_id
= id
; // 0x0000 8 bits | 0x00id
301 tjei_write(state
, &precision_and_id
, sizeof(uint8_t), 1);
303 tjei_write(state
, matrix
, 64*sizeof(uint8_t), 1);
310 } TJEHuffmanTableClass
;
312 static void tjei_write_DHT(TJEState
* state
,
313 uint8_t const * matrix_len
,
314 uint8_t const * matrix_val
,
315 TJEHuffmanTableClass ht_class
,
319 for ( int i
= 0; i
< 16; ++i
) {
320 num_values
+= matrix_len
[i
];
322 assert(num_values
<= 0xffff);
324 uint16_t DHT
= tjei_be_word(0xffc4);
325 // 2(len) + 1(Tc|th) + 16 (num lengths) + ?? (num values)
326 uint16_t len
= tjei_be_word(2 + 1 + 16 + (uint16_t)num_values
);
328 uint8_t tc_th
= (uint8_t)((((uint8_t)ht_class
) << 4) | id
);
330 tjei_write(state
, &DHT
, sizeof(uint16_t), 1);
331 tjei_write(state
, &len
, sizeof(uint16_t), 1);
332 tjei_write(state
, &tc_th
, sizeof(uint8_t), 1);
333 tjei_write(state
, matrix_len
, sizeof(uint8_t), 16);
334 tjei_write(state
, matrix_val
, sizeof(uint8_t), (size_t)num_values
);
336 // ============================================================
337 // Huffman deflation code.
338 // ============================================================
340 // Returns all code sizes from the BITS specification (JPEG C.3)
341 static uint8_t* tjei_huff_get_code_lengths(uint8_t huffsize
[/*256*/], uint8_t const * bits
)
344 for ( int i
= 0; i
< 16; ++i
) {
345 for ( int j
= 0; j
< bits
[i
]; ++j
) {
346 huffsize
[k
++] = (uint8_t)(i
+ 1);
353 // Fills out the prefixes for each code.
354 static uint16_t* tjei_huff_get_codes(uint16_t codes
[], uint8_t* huffsize
, int64_t count
)
358 uint8_t sz
= huffsize
[0];
363 } while (huffsize
[k
] == sz
);
364 if (huffsize
[k
] == 0) {
368 code
= (uint16_t)(code
<< 1);
370 } while( huffsize
[k
] != sz
);
374 static void tjei_huff_get_extended(uint8_t* out_ehuffsize
,
375 uint16_t* out_ehuffcode
,
376 uint8_t const * huffval
,
378 uint16_t* huffcode
, int64_t count
)
382 uint8_t val
= huffval
[k
];
383 out_ehuffcode
[val
] = huffcode
[k
];
384 out_ehuffsize
[val
] = huffsize
[k
];
386 } while ( k
< count
);
388 // ============================================================
391 // out[1] : number of bits
393 TJEI_FORCE_INLINE
void tjei_calculate_variable_length_int(int value
, uint16_t out
[2])
401 while( abs_val
>>= 1 ) {
404 out
[0] = (uint16_t)(value
& ((1 << out
[1]) - 1));
407 // Write bits to file.
408 TJEI_FORCE_INLINE
void tjei_write_bits(TJEState
* state
,
409 uint32_t* bitbuffer
, uint32_t* location
,
410 uint16_t num_bits
, uint16_t bits
)
413 // [ ] <-- bit buffer
416 // This call pushes to the bitbuffer and saves the location. Data is pushed
417 // from most significant to less significant.
418 // When we can write a full byte, we write a byte and shift.
421 uint32_t nloc
= *location
+ num_bits
;
422 *bitbuffer
|= (uint32_t)(bits
<< (32 - nloc
));
424 while ( *location
>= 8 ) {
425 // Grab the most significant byte.
426 uint8_t c
= (uint8_t)((*bitbuffer
) >> 24);
428 tjei_write(state
, &c
, 1, 1);
430 // Special case: tell JPEG this is not a marker.
432 tjei_write(state
, &z
, 1, 1);
440 // DCT implementation by Thomas G. Lane.
441 // Obtained through NVIDIA
442 // http://developer.download.nvidia.com/SDK/9.5/Samples/vidimaging_samples.html#gpgpu_dct
445 // This implementation is based on Arai, Agui, and Nakajima's algorithm for
446 // scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
447 // Japanese, but the algorithm is described in the Pennebaker & Mitchell
448 // JPEG textbook (see REFERENCES section in file README). The following code
449 // is based directly on figure 4-8 in P&M.
451 static void tjei_fdct (float * data
)
453 float tmp0
, tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
;
454 float tmp10
, tmp11
, tmp12
, tmp13
;
455 float z1
, z2
, z3
, z4
, z5
, z11
, z13
;
459 /* Pass 1: process rows. */
462 for ( ctr
= 7; ctr
>= 0; ctr
-- ) {
463 tmp0
= dataptr
[0] + dataptr
[7];
464 tmp7
= dataptr
[0] - dataptr
[7];
465 tmp1
= dataptr
[1] + dataptr
[6];
466 tmp6
= dataptr
[1] - dataptr
[6];
467 tmp2
= dataptr
[2] + dataptr
[5];
468 tmp5
= dataptr
[2] - dataptr
[5];
469 tmp3
= dataptr
[3] + dataptr
[4];
470 tmp4
= dataptr
[3] - dataptr
[4];
474 tmp10
= tmp0
+ tmp3
; /* phase 2 */
479 dataptr
[0] = tmp10
+ tmp11
; /* phase 3 */
480 dataptr
[4] = tmp10
- tmp11
;
482 z1
= (tmp12
+ tmp13
) * ((float) 0.707106781); /* c4 */
483 dataptr
[2] = tmp13
+ z1
; /* phase 5 */
484 dataptr
[6] = tmp13
- z1
;
488 tmp10
= tmp4
+ tmp5
; /* phase 2 */
492 /* The rotator is modified from fig 4-8 to avoid extra negations. */
493 z5
= (tmp10
- tmp12
) * ((float) 0.382683433); /* c6 */
494 z2
= ((float) 0.541196100) * tmp10
+ z5
; /* c2-c6 */
495 z4
= ((float) 1.306562965) * tmp12
+ z5
; /* c2+c6 */
496 z3
= tmp11
* ((float) 0.707106781); /* c4 */
498 z11
= tmp7
+ z3
; /* phase 5 */
501 dataptr
[5] = z13
+ z2
; /* phase 6 */
502 dataptr
[3] = z13
- z2
;
503 dataptr
[1] = z11
+ z4
;
504 dataptr
[7] = z11
- z4
;
506 dataptr
+= 8; /* advance pointer to next row */
509 /* Pass 2: process columns. */
512 for ( ctr
= 8-1; ctr
>= 0; ctr
-- ) {
513 tmp0
= dataptr
[8*0] + dataptr
[8*7];
514 tmp7
= dataptr
[8*0] - dataptr
[8*7];
515 tmp1
= dataptr
[8*1] + dataptr
[8*6];
516 tmp6
= dataptr
[8*1] - dataptr
[8*6];
517 tmp2
= dataptr
[8*2] + dataptr
[8*5];
518 tmp5
= dataptr
[8*2] - dataptr
[8*5];
519 tmp3
= dataptr
[8*3] + dataptr
[8*4];
520 tmp4
= dataptr
[8*3] - dataptr
[8*4];
524 tmp10
= tmp0
+ tmp3
; /* phase 2 */
529 dataptr
[8*0] = tmp10
+ tmp11
; /* phase 3 */
530 dataptr
[8*4] = tmp10
- tmp11
;
532 z1
= (tmp12
+ tmp13
) * ((float) 0.707106781); /* c4 */
533 dataptr
[8*2] = tmp13
+ z1
; /* phase 5 */
534 dataptr
[8*6] = tmp13
- z1
;
538 tmp10
= tmp4
+ tmp5
; /* phase 2 */
542 /* The rotator is modified from fig 4-8 to avoid extra negations. */
543 z5
= (tmp10
- tmp12
) * ((float) 0.382683433); /* c6 */
544 z2
= ((float) 0.541196100) * tmp10
+ z5
; /* c2-c6 */
545 z4
= ((float) 1.306562965) * tmp12
+ z5
; /* c2+c6 */
546 z3
= tmp11
* ((float) 0.707106781); /* c4 */
548 z11
= tmp7
+ z3
; /* phase 5 */
551 dataptr
[8*5] = z13
+ z2
; /* phase 6 */
552 dataptr
[8*3] = z13
- z2
;
553 dataptr
[8*1] = z11
+ z4
;
554 dataptr
[8*7] = z11
- z4
;
556 dataptr
++; /* advance pointer to next column */
559 #if !TJE_USE_FAST_DCT
560 static float slow_fdct(int u
, int v
, float* data
)
562 #define kPI 3.14159265f
564 float cu
= (u
== 0) ? 0.70710678118654f
: 1;
565 float cv
= (v
== 0) ? 0.70710678118654f
: 1;
566 for ( int y
= 0; y
< 8; ++y
) {
567 for ( int x
= 0; x
< 8; ++x
) {
568 res
+= (data
[y
* 8 + x
]) *
569 cosf(((2.0f
* x
+ 1.0f
) * u
* kPI
) / 16.0f
) *
570 cosf(((2.0f
* y
+ 1.0f
) * v
* kPI
) / 16.0f
);
573 res
*= 0.25f
* cu
* cv
;
579 #define ABS(x) ((x) < 0 ? -(x) : (x))
581 static void tjei_encode_and_write_MCU(TJEState
* state
,
584 float* qt
, // Pre-processed quantization matrix.
588 uint8_t* huff_dc_len
, uint16_t* huff_dc_code
, // Huffman tables
589 uint8_t* huff_ac_len
, uint16_t* huff_ac_code
,
590 int* pred
, // Previous DC coefficient
591 uint32_t* bitbuffer
, // Bitstack.
594 int du
[64]; // Data unit in zig-zag order
597 memcpy(dct_mcu
, mcu
, 64 * sizeof(float));
601 for ( int i
= 0; i
< 64; ++i
) {
602 float fval
= dct_mcu
[i
];
605 fval
= (fval
> 0) ? floorf(fval
+ 0.5f
) : ceilf(fval
- 0.5f
);
607 fval
= floorf(fval
+ 1024 + 0.5f
);
611 du
[tjei_zig_zag
[i
]] = val
;
614 for ( int v
= 0; v
< 8; ++v
) {
615 for ( int u
= 0; u
< 8; ++u
) {
616 dct_mcu
[v
* 8 + u
] = slow_fdct(u
, v
, mcu
);
619 for ( int i
= 0; i
< 64; ++i
) {
620 float fval
= dct_mcu
[i
] / (qt
[i
]);
621 int val
= (int)((fval
> 0) ? floorf(fval
+ 0.5f
) : ceilf(fval
- 0.5f
));
622 du
[tjei_zig_zag
[i
]] = val
;
628 // Encode DC coefficient.
629 int diff
= du
[0] - *pred
;
632 tjei_calculate_variable_length_int(diff
, vli
);
633 // Write number of bits with Huffman coding
634 tjei_write_bits(state
, bitbuffer
, location
, huff_dc_len
[vli
[1]], huff_dc_code
[vli
[1]]);
636 tjei_write_bits(state
, bitbuffer
, location
, vli
[1], vli
[0]);
638 tjei_write_bits(state
, bitbuffer
, location
, huff_dc_len
[0], huff_dc_code
[0]);
641 // ==== Encode AC coefficients ====
643 int last_non_zero_i
= 0;
644 // Find the last non-zero element.
645 for ( int i
= 63; i
> 0; --i
) {
652 for ( int i
= 1; i
<= last_non_zero_i
; ++i
) {
653 // If zero, increase count. If >=15, encode (FF,00)
655 while ( du
[i
] == 0 ) {
658 if (zero_count
== 16) {
659 // encode (ff,00) == 0xf0
660 tjei_write_bits(state
, bitbuffer
, location
, huff_ac_len
[0xf0], huff_ac_code
[0xf0]);
664 tjei_calculate_variable_length_int(du
[i
], vli
);
666 assert(zero_count
< 0x10);
667 assert(vli
[1] <= 10);
669 uint16_t sym1
= (uint16_t)((uint16_t)zero_count
<< 4) | vli
[1];
671 assert(huff_ac_len
[sym1
] != 0);
673 // Write symbol 1 --- (RUNLENGTH, SIZE)
674 tjei_write_bits(state
, bitbuffer
, location
, huff_ac_len
[sym1
], huff_ac_code
[sym1
]);
675 // Write symbol 2 --- (AMPLITUDE)
676 tjei_write_bits(state
, bitbuffer
, location
, vli
[1], vli
[0]);
679 if (last_non_zero_i
!= 63) {
680 // write EOB HUFF(00,00)
681 tjei_write_bits(state
, bitbuffer
, location
, huff_ac_len
[0], huff_ac_code
[0]);
694 struct TJEProcessedQT
701 // Set up huffman tables in state.
702 static void tjei_huff_expand(TJEState
* state
)
706 state
->ht_bits
[TJEI_LUMA_DC
] = tjei_default_ht_luma_dc_len
;
707 state
->ht_bits
[TJEI_LUMA_AC
] = tjei_default_ht_luma_ac_len
;
708 state
->ht_bits
[TJEI_CHROMA_DC
] = tjei_default_ht_chroma_dc_len
;
709 state
->ht_bits
[TJEI_CHROMA_AC
] = tjei_default_ht_chroma_ac_len
;
711 state
->ht_vals
[TJEI_LUMA_DC
] = tjei_default_ht_luma_dc
;
712 state
->ht_vals
[TJEI_LUMA_AC
] = tjei_default_ht_luma_ac
;
713 state
->ht_vals
[TJEI_CHROMA_DC
] = tjei_default_ht_chroma_dc
;
714 state
->ht_vals
[TJEI_CHROMA_AC
] = tjei_default_ht_chroma_ac
;
716 // How many codes in total for each of LUMA_(DC|AC) and CHROMA_(DC|AC)
717 int32_t spec_tables_len
[4] = { 0 };
719 for ( int i
= 0; i
< 4; ++i
) {
720 for ( int k
= 0; k
< 16; ++k
) {
721 spec_tables_len
[i
] += state
->ht_bits
[i
][k
];
725 // Fill out the extended tables..
726 uint8_t huffsize
[4][257];
727 uint16_t huffcode
[4][256];
728 for ( int i
= 0; i
< 4; ++i
) {
729 assert (256 >= spec_tables_len
[i
]);
730 tjei_huff_get_code_lengths(huffsize
[i
], state
->ht_bits
[i
]);
731 tjei_huff_get_codes(huffcode
[i
], huffsize
[i
], spec_tables_len
[i
]);
733 for ( int i
= 0; i
< 4; ++i
) {
734 int64_t count
= spec_tables_len
[i
];
735 tjei_huff_get_extended(state
->ehuffsize
[i
],
739 &huffcode
[i
][0], count
);
743 //static int tjei_encode_main(TJEState* state,
744 // const unsigned char* src_data,
747 // const int src_num_components)
749 // if (src_num_components != 3 && src_num_components != 4) {
753 // if (width > 0xffff || height > 0xffff) {
757 //#if TJE_USE_FAST_DCT
758 // struct TJEProcessedQT pqt;
759 // // Again, taken from classic japanese implementation.
761 // /* For float AA&N IDCT method, divisors are equal to quantization
762 // * coefficients scaled by scalefactor[row]*scalefactor[col], where
763 // * scalefactor[0] = 1
764 // * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
765 // * We apply a further scale factor of 8.
766 // * What's actually stored is 1/divisor so that the inner loop can
767 // * use a multiplication rather than a division.
771 // // build (de)quantization tables
772 // for(int y=0; y<8; y++) {
773 // for(int x=0; x<8; x++) {
775 // pqt.luma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_luma[tjei_zig_zag[i]]);
776 // pqt.chroma[y*8+x] = 1.0f / (8 * aan_scales[x] * aan_scales[y] * state->qt_chroma[tjei_zig_zag[i]]);
782 // TJEJPEGHeader header;
784 // header.SOI = tjei_be_word(0xffd8); // Sequential DCT
785 // header.APP0 = tjei_be_word(0xffe0);
787 // uint16_t jfif_len = sizeof(TJEJPEGHeader) - 4 /*SOI & APP0 markers*/;
788 // header.jfif_len = tjei_be_word(jfif_len);
789 // memcpy(header.jfif_id, (void*)tjeik_jfif_id, 5);
790 // header.version = tjei_be_word(0x0102);
791 // header.units = 0x01; // Dots-per-inch
792 // header.x_density = tjei_be_word(0x0060); // 96 DPI
793 // header.y_density = tjei_be_word(0x0060); // 96 DPI
794 // header.x_thumb = 0;
795 // header.y_thumb = 0;
796 // tjei_write(state, &header, sizeof(TJEJPEGHeader), 1);
798 // { // Write comment
799 // TJEJPEGComment com;
800 // uint16_t com_len = 2 + sizeof(tjeik_com_str) - 1;
802 // com.com = tjei_be_word(0xfffe);
803 // com.com_len = tjei_be_word(com_len);
804 // memcpy(com.com_str, (void*)tjeik_com_str, sizeof(tjeik_com_str)-1);
805 // tjei_write(state, &com, sizeof(TJEJPEGComment), 1);
808 // // Write quantization tables.
809 // tjei_write_DQT(state, state->qt_luma, 0x00);
810 // tjei_write_DQT(state, state->qt_chroma, 0x01);
812 // { // Write the frame marker.
813 // TJEFrameHeader header;
814 // header.SOF = tjei_be_word(0xffc0);
815 // header.len = tjei_be_word(8 + 3 * 3);
816 // header.precision = 8;
817 // assert(width <= 0xffff);
818 // assert(height <= 0xffff);
819 // header.width = tjei_be_word((uint16_t)width);
820 // header.height = tjei_be_word((uint16_t)height);
821 // header.num_components = 3;
822 // uint8_t tables[3] = {
823 // 0, // Luma component gets luma table (see tjei_write_DQT call above.)
824 // 1, // Chroma component gets chroma table
825 // 1, // Chroma component gets chroma table
827 // for (int i = 0; i < 3; ++i) {
828 // TJEComponentSpec spec;
829 // spec.component_id = (uint8_t)(i + 1); // No particular reason. Just 1, 2, 3.
830 // spec.sampling_factors = (uint8_t)0x11;
831 // spec.qt = tables[i];
833 // header.component_spec[i] = spec;
836 // tjei_write(state, &header, sizeof(TJEFrameHeader), 1);
839 // tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_DC], state->ht_vals[TJEI_LUMA_DC], TJEI_DC, 0);
840 // tjei_write_DHT(state, state->ht_bits[TJEI_LUMA_AC], state->ht_vals[TJEI_LUMA_AC], TJEI_AC, 0);
841 // tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_DC], state->ht_vals[TJEI_CHROMA_DC], TJEI_DC, 1);
842 // tjei_write_DHT(state, state->ht_bits[TJEI_CHROMA_AC], state->ht_vals[TJEI_CHROMA_AC], TJEI_AC, 1);
844 // // Write start of scan
846 // TJEScanHeader header;
847 // header.SOS = tjei_be_word(0xffda);
848 // header.len = tjei_be_word((uint16_t)(6 + (sizeof(TJEFrameComponentSpec) * 3)));
849 // header.num_components = 3;
851 // uint8_t tables[3] = {
856 // for (int i = 0; i < 3; ++i) {
857 // TJEFrameComponentSpec cs;
858 // // Must be equal to component_id from frame header above.
859 // cs.component_id = (uint8_t)(i + 1);
860 // cs.dc_ac = (uint8_t)tables[i];
862 // header.component_spec[i] = cs;
867 // tjei_write(state, &header, sizeof(TJEScanHeader), 1);
870 // // Write compressed data.
882 // uint32_t bitbuffer = 0;
883 // uint32_t location = 0;
886 // for ( int y = 0; y < height; y += 8 ) {
887 // for ( int x = 0; x < width; x += 8 ) {
888 // // Block loop: ====
889 // for ( int off_y = 0; off_y < 8; ++off_y ) {
890 // for ( int off_x = 0; off_x < 8; ++off_x ) {
891 // int block_index = (off_y * 8 + off_x);
893 // int src_index = (((y + off_y) * width) + (x + off_x)) * src_num_components;
895 // int col = x + off_x;
896 // int row = y + off_y;
898 // if(row >= height) {
899 // src_index -= (width * (row - height + 1)) * src_num_components;
901 // if(col >= width) {
902 // src_index -= (col - width + 1) * src_num_components;
904 // assert(src_index < width * height * src_num_components);
906 // uint8_t r = src_data[src_index + 0];
907 // uint8_t g = src_data[src_index + 1];
908 // uint8_t b = src_data[src_index + 2];
910 // float luma = 0.299f * r + 0.587f * g + 0.114f * b - 128;
911 // float cb = -0.1687f * r - 0.3313f * g + 0.5f * b;
912 // float cr = 0.5f * r - 0.4187f * g - 0.0813f * b;
914 // du_y[block_index] = luma;
915 // du_b[block_index] = cb;
916 // du_r[block_index] = cr;
920 // tjei_encode_and_write_MCU(state, du_y,
921 //#if TJE_USE_FAST_DCT
926 // state->ehuffsize[TJEI_LUMA_DC], state->ehuffcode[TJEI_LUMA_DC],
927 // state->ehuffsize[TJEI_LUMA_AC], state->ehuffcode[TJEI_LUMA_AC],
928 // &pred_y, &bitbuffer, &location);
929 // tjei_encode_and_write_MCU(state, du_b,
930 //#if TJE_USE_FAST_DCT
935 // state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
936 // state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
937 // &pred_b, &bitbuffer, &location);
938 // tjei_encode_and_write_MCU(state, du_r,
939 //#if TJE_USE_FAST_DCT
944 // state->ehuffsize[TJEI_CHROMA_DC], state->ehuffcode[TJEI_CHROMA_DC],
945 // state->ehuffsize[TJEI_CHROMA_AC], state->ehuffcode[TJEI_CHROMA_AC],
946 // &pred_r, &bitbuffer, &location);
952 // // Finish the image.
954 // if (location > 0 && location < 8) {
955 // tjei_write_bits(state, &bitbuffer, &location, (uint16_t)(8 - location), 0);
958 // uint16_t EOI = tjei_be_word(0xffd9);
959 // tjei_write(state, &EOI, sizeof(uint16_t), 1);
961 // if (state->output_buffer_count) {
962 // state->write_context.func(state->write_context.context, state->output_buffer, (int)state->output_buffer_count);
963 // state->output_buffer_count = 0;
970 //int tje_encode_with_func(tje_write_func* func,
972 // const int quality,
975 // const int num_components,
976 // const unsigned char* src_data)
978 // if (quality < 1 || quality > 3) {
979 // tje_log("[ERROR] -- Valid 'quality' values are 1 (lowest), 2, or 3 (highest)");
983 // TJEState state = { 0 };
987 // TJEWriteContext wc = { 0 };
989 // wc.context = context;
992 // state.write_context = wc;
995 // tjei_huff_expand(&state);
997 // int result = tjei_encode_main(&state, src_data, width, height, num_components);
1001 // ============================================================
1005 TJEState encode_state
;
1006 #if TJE_USE_FAST_DCT
1007 struct TJEProcessedQT pqt
;
1011 uint32_t num_components
;
1012 uint32_t cur_height
;
1022 void *jpeg_encode_init(tje_write_func
* func
, void* context
, uint8_t quality
, uint32_t width
, uint32_t height
, uint8_t src_num_components
)
1024 if (quality
< 1 || quality
> 3) {
1025 tje_log("Valid 'quality' %d values are 1 (lowest), 2, or 3 (highest)", quality
);
1028 if (src_num_components
!= 3 && src_num_components
!= 4) {
1032 if (width
> 0xffff || height
> 0xffff) {
1036 TJE_ContextStruct
*ctx
= calloc(1, sizeof(TJE_ContextStruct
));
1037 uint8_t qt_factor
= 1;
1040 for ( int i
= 0; i
< 64; ++i
) {
1041 ctx
->encode_state
.qt_luma
[i
] = 1;
1042 ctx
->encode_state
.qt_chroma
[i
] = 1;
1047 // don't break. fall through.
1049 for ( int i
= 0; i
< 64; ++i
) {
1050 ctx
->encode_state
.qt_luma
[i
] = tjei_default_qt_luma_from_spec
[i
] / qt_factor
;
1051 if (ctx
->encode_state
.qt_luma
[i
] == 0) {
1052 ctx
->encode_state
.qt_luma
[i
] = 1;
1054 ctx
->encode_state
.qt_chroma
[i
] = tjei_default_qt_chroma_from_paper
[i
] / qt_factor
;
1055 if (ctx
->encode_state
.qt_chroma
[i
] == 0) {
1056 ctx
->encode_state
.qt_chroma
[i
] = 1;
1061 assert(!"invalid code path");
1065 ctx
->encode_state
.write_context
.func
= func
;
1066 ctx
->encode_state
.write_context
.context
= context
;
1068 ctx
->height
= height
;
1069 ctx
->num_components
= src_num_components
;
1070 tjei_huff_expand(&ctx
->encode_state
);
1071 TJEState
* state
= &ctx
->encode_state
;
1072 #if TJE_USE_FAST_DCT
1073 // Again, taken from classic japanese implementation.
1075 /* For float AA&N IDCT method, divisors are equal to quantization
1076 * coefficients scaled by scalefactor[row]*scalefactor[col], where
1077 * scalefactor[0] = 1
1078 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7
1079 * We apply a further scale factor of 8.
1080 * What's actually stored is 1/divisor so that the inner loop can
1081 * use a multiplication rather than a division.
1085 // build (de)quantization tables
1086 for(int y
=0; y
<8; y
++) {
1087 for(int x
=0; x
<8; x
++) {
1089 ctx
->pqt
.luma
[y
*8+x
] = 1.0f
/ (8 * aan_scales
[x
] * aan_scales
[y
] * state
->qt_luma
[tjei_zig_zag
[i
]]);
1090 ctx
->pqt
.chroma
[y
*8+x
] = 1.0f
/ (8 * aan_scales
[x
] * aan_scales
[y
] * state
->qt_chroma
[tjei_zig_zag
[i
]]);
1096 TJEJPEGHeader header
;
1098 header
.SOI
= tjei_be_word(0xffd8); // Sequential DCT
1099 header
.APP0
= tjei_be_word(0xffe0);
1101 uint16_t jfif_len
= sizeof(TJEJPEGHeader
) - 4 /*SOI & APP0 markers*/;
1102 header
.jfif_len
= tjei_be_word(jfif_len
);
1103 memcpy(header
.jfif_id
, (void*)tjeik_jfif_id
, 5);
1104 header
.version
= tjei_be_word(0x0102);
1105 header
.units
= 0x01; // Dots-per-inch
1106 header
.x_density
= tjei_be_word(0x0060); // 96 DPI
1107 header
.y_density
= tjei_be_word(0x0060); // 96 DPI
1110 tjei_write(state
, &header
, sizeof(TJEJPEGHeader
), 1);
1114 uint16_t com_len
= 2 + sizeof(tjeik_com_str
) - 1;
1116 com
.com
= tjei_be_word(0xfffe);
1117 com
.com_len
= tjei_be_word(com_len
);
1118 memcpy(com
.com_str
, (void*)tjeik_com_str
, sizeof(tjeik_com_str
)-1);
1119 tjei_write(state
, &com
, sizeof(TJEJPEGComment
), 1);
1122 // Write quantization tables.
1123 tjei_write_DQT(state
, state
->qt_luma
, 0x00);
1124 tjei_write_DQT(state
, state
->qt_chroma
, 0x01);
1126 { // Write the frame marker.
1127 TJEFrameHeader header
;
1128 header
.SOF
= tjei_be_word(0xffc0);
1129 header
.len
= tjei_be_word(8 + 3 * 3);
1130 header
.precision
= 8;
1131 assert(width
<= 0xffff);
1132 assert(height
<= 0xffff);
1133 header
.width
= tjei_be_word((uint16_t)width
);
1134 header
.height
= tjei_be_word((uint16_t)height
);
1135 header
.num_components
= 3;
1136 uint8_t tables
[3] = {
1137 0, // Luma component gets luma table (see tjei_write_DQT call above.)
1138 1, // Chroma component gets chroma table
1139 1, // Chroma component gets chroma table
1141 for (int i
= 0; i
< 3; ++i
) {
1142 TJEComponentSpec spec
;
1143 spec
.component_id
= (uint8_t)(i
+ 1); // No particular reason. Just 1, 2, 3.
1144 spec
.sampling_factors
= (uint8_t)0x11;
1145 spec
.qt
= tables
[i
];
1147 header
.component_spec
[i
] = spec
;
1150 tjei_write(state
, &header
, sizeof(TJEFrameHeader
), 1);
1153 tjei_write_DHT(state
, state
->ht_bits
[TJEI_LUMA_DC
], state
->ht_vals
[TJEI_LUMA_DC
], TJEI_DC
, 0);
1154 tjei_write_DHT(state
, state
->ht_bits
[TJEI_LUMA_AC
], state
->ht_vals
[TJEI_LUMA_AC
], TJEI_AC
, 0);
1155 tjei_write_DHT(state
, state
->ht_bits
[TJEI_CHROMA_DC
], state
->ht_vals
[TJEI_CHROMA_DC
], TJEI_DC
, 1);
1156 tjei_write_DHT(state
, state
->ht_bits
[TJEI_CHROMA_AC
], state
->ht_vals
[TJEI_CHROMA_AC
], TJEI_AC
, 1);
1158 // Write start of scan
1160 TJEScanHeader header
;
1161 header
.SOS
= tjei_be_word(0xffda);
1162 header
.len
= tjei_be_word((uint16_t)(6 + (sizeof(TJEFrameComponentSpec
) * 3)));
1163 header
.num_components
= 3;
1165 uint8_t tables
[3] = {
1170 for (int i
= 0; i
< 3; ++i
) {
1171 TJEFrameComponentSpec cs
;
1172 // Must be equal to component_id from frame header above.
1173 cs
.component_id
= (uint8_t)(i
+ 1);
1174 cs
.dc_ac
= (uint8_t)tables
[i
];
1176 header
.component_spec
[i
] = cs
;
1181 tjei_write(state
, &header
, sizeof(TJEScanHeader
), 1);
1187 void jpeg_encode_run(void *ctx
, uint8_t *src_data
)
1192 TJE_ContextStruct
*handle
= (TJE_ContextStruct
*)ctx
;
1193 TJEState
* state
= &handle
->encode_state
;
1194 uint32_t width
= handle
->width
;
1195 uint32_t height
= handle
->height
;
1196 uint32_t src_num_components
= handle
->num_components
;
1197 uint32_t block_index
, src_index
, col
, row
;
1199 for ( uint32_t x
= 0; x
< width
; x
+= 8 ) {
1201 for ( uint32_t off_y
= 0; off_y
< 8; ++off_y
) {
1202 for ( uint32_t off_x
= 0; off_x
< 8; ++off_x
) {
1203 block_index
= (off_y
* 8 + off_x
);
1204 src_index
= (((0 + off_y
) * width
) + (x
+ off_x
)) * src_num_components
;
1207 // r = src_data[src_index + 0];
1208 // g = src_data[src_index + 1];
1209 // b = src_data[src_index + 2];
1210 // du_y[block_index] = 0.299f * r + 0.587f * g + 0.114f * b - 128;
1211 // du_b[block_index] = -0.1687f * r - 0.3313f * g + 0.5f * b;
1212 // du_r[block_index] = 0.5f * r - 0.4187f * g - 0.0813f * b;
1216 // du_y[block_index] = src_data[src_index + 0];
1217 // du_b[block_index] = src_data[src_index + 1];
1218 // du_r[block_index] = src_data[src_index + 2];
1219 // du_y[block_index] -= 128;
1220 // du_b[block_index] -= 128;
1221 // du_r[block_index] -= 128;
1223 du_y
[block_index
] = src_data
[src_index
+ 0];
1224 du_b
[block_index
] = src_data
[src_index
+ 1];
1225 du_r
[block_index
] = src_data
[src_index
+ 2];
1226 du_y
[block_index
] -= 128;
1227 du_b
[block_index
] -= 128;
1228 du_r
[block_index
] -= 128;
1232 tjei_encode_and_write_MCU(state
, du_y
,
1233 #if TJE_USE_FAST_DCT
1238 state
->ehuffsize
[TJEI_LUMA_DC
], state
->ehuffcode
[TJEI_LUMA_DC
],
1239 state
->ehuffsize
[TJEI_LUMA_AC
], state
->ehuffcode
[TJEI_LUMA_AC
],
1240 &handle
->pred_y
, &handle
->bitbuffer
, &handle
->location
);
1241 tjei_encode_and_write_MCU(state
, du_b
,
1242 #if TJE_USE_FAST_DCT
1247 state
->ehuffsize
[TJEI_CHROMA_DC
], state
->ehuffcode
[TJEI_CHROMA_DC
],
1248 state
->ehuffsize
[TJEI_CHROMA_AC
], state
->ehuffcode
[TJEI_CHROMA_AC
],
1249 &handle
->pred_b
, &handle
->bitbuffer
, &handle
->location
);
1250 tjei_encode_and_write_MCU(state
, du_r
,
1251 #if TJE_USE_FAST_DCT
1256 state
->ehuffsize
[TJEI_CHROMA_DC
], state
->ehuffcode
[TJEI_CHROMA_DC
],
1257 state
->ehuffsize
[TJEI_CHROMA_AC
], state
->ehuffcode
[TJEI_CHROMA_AC
],
1258 &handle
->pred_r
, &handle
->bitbuffer
, &handle
->location
);
1262 handle
->cur_height
+= 8;
1265 void jpeg_encode_end(void *ctx
)
1267 uint16_t EOI
= tjei_be_word(0xffd9);
1268 TJE_ContextStruct
*handle
= (TJE_ContextStruct
*)ctx
;
1269 TJEState
* state
= &handle
->encode_state
;
1270 tjei_write(state
, &EOI
, sizeof(uint16_t), 1);
1272 if (state
->output_buffer_count
) {
1273 state
->write_context
.func(state
->write_context
.context
, state
->output_buffer
, (int)state
->output_buffer_count
);
1274 state
->output_buffer_count
= 0;