1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
31 #include <apr_file_io.h>
32 #include <apr_strings.h>
33 #include <apr_base64.h>
35 #include "ap_provider.h"
37 #include "util_filter.h"
38 #include "util_script.h"
39 #include "util_charset.h"
42 #include "ap_wrapper.h"
45 #include "mod_crccache_server.h"
47 #include <crcsync/crcsync.h>
49 #include <openssl/evp.h>
51 module AP_MODULE_DECLARE_DATA crccache_server_module
;
53 // Possible states for the output compression
55 COMPRESSION_BUFFER_EMPTY
,
56 COMPRESSION_FIRST_DATA_RECEIVED
,
57 COMPRESSION_FIRST_BLOCK_WRITTEN
,
59 } compression_state_t
;
61 static void *crccache_server_create_config(apr_pool_t
*p
, server_rec
*s
) {
62 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
64 conf
->decoder_modules
= NULL
;
65 conf
->decoder_modules_cnt
= 0;
69 typedef enum { GS_INIT
, GS_HEADERS_SAVED
, GS_ENCODING
} global_state_t
;
71 typedef struct crccache_ctx_t
{
72 global_state_t global_state
;
73 char *old_content_encoding
;
75 unsigned char *buffer
;
76 size_t buffer_digest_getpos
;
77 size_t buffer_read_getpos
;
80 long crc_read_block_result
;
81 size_t crc_read_block_ndigested
;
82 apr_bucket_brigade
*bb
;
85 size_t tail_block_size
;
87 struct crc_context
*crcctx
;
90 size_t tx_uncompressed_length
;
91 compression_state_t compression_state
;
92 z_stream
*compression_stream
;
94 int debug_skip_writing
; // ____
99 * Only enable CRCCache Server when requested through the config file
100 * so that the user can switch CRCCache server on in a specific virtual server
102 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
104 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
105 &crccache_server_module
);
106 conf
->enabled
= flag
;
110 static const char *set_crccache_decoder_module(cmd_parms
*parms
, void *in_struct_ptr
, const char *arg
)
112 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
113 &crccache_server_module
);
114 struct decoder_modules_t
*decoder_module
= malloc(sizeof(*decoder_module
));
115 if (decoder_module
== NULL
)
117 return "Out of memory exception while allocating decoder_module structure";
122 char *data
= strdup(arg
);
125 return "Out of memory exception while parsing DecoderModule parameter";
128 tok
= apr_strtok(data
, ": ", &last
);
131 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
134 decoder_module
->name
= strdup(tok
);
135 if (decoder_module
->name
== NULL
)
137 return "Out of memory exception while storing name in decoder_module structure";
140 tok
= apr_strtok(NULL
, ": ", &last
);
143 return "DecoderModule value must be of format: filtername:encoding[,encoding]*";
146 for (tok
= apr_strtok(tok
, ", ", &last
); tok
!= NULL
; tok
= apr_strtok(NULL
, ", ", &last
))
148 struct encodings_t
*encoding
= malloc(sizeof(*encoding
));
149 if (encoding
== NULL
)
151 return "Out of memory exception while allocating encoding structure";
154 encoding
->encoding
= strdup(tok
);
155 if (encoding
->encoding
== NULL
)
157 return "Out of memory exception while storing encoding value in encoding structure";
160 // Insert new encoding to the head of the encodings list
161 encoding
->next
= decoder_module
->encodings
;
162 decoder_module
->encodings
= encoding
;
165 // Insert (new) decoder module to the head of the decoder_modules list
166 decoder_module
->next
= conf
->decoder_modules
;
167 conf
->decoder_modules
= decoder_module
;
168 conf
->decoder_modules_cnt
++;
173 static const command_rec crccache_server_cmds
[] =
175 AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
, "Enable the CRCCache server in this virtual server"),
176 AP_INIT_TAKE1("DecoderModule", set_crccache_decoder_module
, NULL
, RSRC_CONF
, "DecoderModules to decode content-types (e.g. INFLATE:gzip,x-gzip)"),
180 static ap_filter_rec_t
*crccache_out_filter_handle
;
181 static ap_filter_rec_t
*crccache_out_save_headers_filter_handle
;
184 int decode_if_block_header(const char * header
, int * version
, size_t * file_size
, char ** hashes
)
188 *hashes
= NULL
; // this will be allocated below, make sure we free it
191 size_t headerlen
= strlen(header
);
192 for (ii
= 0; ii
< headerlen
;++ii
)
194 if (header
[ii
] == ',' || ii
== headerlen
-1)
196 sscanf(&header
[start
]," v=%d",version
);
197 sscanf(&header
[start
]," h=%as",hashes
);
198 sscanf(&header
[start
]," fs=%zu",file_size
);
205 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no hashes reported in header");
210 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE Unsupported header version, %d",*version
);
217 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no file size reported in header");
225 static int crccache_server_header_parser_handler(request_rec
*r
) {
226 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
227 &crccache_server_module
);
230 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Checking for headers");
232 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
238 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
240 // failed to decode if block header so just process request normally
243 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Block Hashes header found (hashes: %s)",hashes
);
247 crccache_ctx
*ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
248 ctx
->global_state
= GS_INIT
;
249 ctx
->old_content_encoding
= NULL
;
250 ctx
->old_etag
= NULL
;
252 // Add the filter to save the headers, so that they can be restored after an optional INFLATE or other decoder module
253 ap_add_output_filter_handle(crccache_out_save_headers_filter_handle
,
254 ctx
, r
, r
->connection
);
256 char *accept_encoding
= apr_pstrdup(r
->pool
, apr_table_get(r
->headers_in
, ACCEPT_ENCODING_HEADER
));
257 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Incoming Accept-Encoding header: %s", accept_encoding
== NULL
? "NULL" : accept_encoding
);
258 if (accept_encoding
!= NULL
)
260 struct decoder_modules_t
*required_dms
[conf
->decoder_modules_cnt
];
261 unsigned required_dms_size
= 0;
264 struct decoder_modules_t
*dm
;
265 struct encodings_t
*enc
;
267 // Build the list of filter modules to handle the requested encodings and
268 // remove all non-supported encodings from the header
269 apr_table_unset(r
->headers_in
, ACCEPT_ENCODING_HEADER
);
270 for (tok
= apr_strtok(accept_encoding
, ", ", &last
); tok
!= NULL
; tok
= apr_strtok(NULL
, ", ", &last
)) {
271 for (dm
= conf
->decoder_modules
; dm
!= NULL
; dm
= dm
->next
) {
272 for (enc
= dm
->encodings
; enc
!= NULL
; enc
= enc
->next
) {
273 if (strcmp(tok
, enc
->encoding
) == 0)
275 // This module supports the requested encoding
276 // Add it to the list if it is not already present
277 for (cnt
= 0; cnt
!= required_dms_size
; cnt
++)
279 if (required_dms
[cnt
] == dm
)
280 break; // module is already inserted in list
282 if (cnt
== required_dms_size
)
284 required_dms
[required_dms_size
++] = dm
;
286 apr_table_mergen(r
->headers_in
, ACCEPT_ENCODING_HEADER
, tok
);
291 // Enable the requested filter modules
292 for (cnt
= 0; cnt
!= required_dms_size
; cnt
++) {
293 dm
= required_dms
[cnt
];
294 ap_filter_t
*filter
= ap_add_output_filter(dm
->name
, NULL
, r
, r
->connection
);
295 if (filter
== NULL
) {
296 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Could not enable %s filter", dm
->name
);
297 // Remove the encodings handled by this filter from the list of accepted encodings
298 accept_encoding
= apr_pstrdup(r
->pool
, apr_table_get(r
->headers_in
, ACCEPT_ENCODING_HEADER
));
299 apr_table_unset(r
->headers_in
, ACCEPT_ENCODING_HEADER
);
300 for (tok
= apr_strtok(accept_encoding
, ", ", &last
); tok
!= NULL
; tok
= apr_strtok(NULL
, ", ", &last
)) {
301 for (enc
= dm
->encodings
; enc
!= NULL
; enc
= enc
->next
) {
302 if (strcmp(tok
, enc
->encoding
)==0) {
303 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Removing encoding %s", tok
);
308 // Did not find the tok encoding in the list. It can be merged back into the header
309 apr_table_mergen(r
->headers_in
, ACCEPT_ENCODING_HEADER
, tok
);
315 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Successfully enabled %s filter", dm
->name
);
318 const char *updated_accept_encoding
= apr_table_get(r
->headers_in
, ACCEPT_ENCODING_HEADER
);
319 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Modified Accept-Encoding header: %s", updated_accept_encoding
== NULL
? "NULL" : updated_accept_encoding
);
321 // Add the crccache filter itself, after the decoder modules
322 ap_add_output_filter_handle(crccache_out_filter_handle
,
323 ctx
, r
, r
->connection
);
327 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Did not detect blockheader (%s)", BLOCK_HEADER
);
330 /* // All is okay, so set response header to IM Used
331 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
333 r->status_line="226 IM Used";
339 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
341 request_rec *r = f->r;
343 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
345 // All is okay, so set response header to IM Used
347 r->status_line="HTTP/1.1 226 IM Used";
351 static void crccache_check_etag(request_rec
*r
, crccache_ctx
*ctx
, const char *transform
) {
352 const char *etag
= ctx
->old_etag
;
354 apr_table_set(r
->headers_out
, ETAG_HEADER
,
359 ctx
->old_content_encoding
== NULL
? "identity" : ctx
->old_content_encoding
,
363 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Changed ETag header to %s", apr_table_get(r
->headers_out
, ETAG_HEADER
));
367 static apr_status_t
write_compress_buffer(ap_filter_t
*f
, int flush
)
369 unsigned char compress_buf
[30000];
370 request_rec
*r
= f
->r
;
371 crccache_ctx
*ctx
= f
->ctx
;
372 z_stream
*strm
= ctx
->compression_stream
;
374 if (ctx
->debug_skip_writing
)
379 strm
->avail_out
= sizeof(compress_buf
);
380 strm
->next_out
= compress_buf
;
381 uInt avail_in_pre_deflate
= strm
->avail_in
;
382 int zRC
= deflate(strm
, flush
);
383 if (zRC
== Z_STREAM_ERROR
)
385 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate error: %d", zRC
);
388 int have
= sizeof(compress_buf
) - strm
->avail_out
;
389 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
390 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
391 zRC
, flush
, avail_in_pre_deflate
- strm
->avail_in
, have
);
394 // output buffer contains some data to be written
395 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
396 unsigned bucket_size
= have
;
397 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
399 bucket_size
+= ENCODING_COMPRESSED_HEADER_SIZE
;
401 ctx
->tx_length
+= bucket_size
;
402 char * buf
= apr_palloc(r
->pool
, bucket_size
);
404 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
406 buf
[0] = ENCODING_COMPRESSED
;
407 memcpy(buf
+ ENCODING_COMPRESSED_HEADER_SIZE
, compress_buf
, have
);
408 ctx
->compression_state
= COMPRESSION_FIRST_BLOCK_WRITTEN
;
412 memcpy(buf
, compress_buf
, have
);
414 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
415 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
418 while (strm
->avail_out
== 0);
419 if (strm
->avail_in
!= 0)
421 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm
->avail_in
);
429 static apr_status_t
flush_compress_buffer(ap_filter_t
*f
)
431 crccache_ctx
*ctx
= f
->ctx
;
432 apr_status_t rslt
= APR_SUCCESS
; // assume all will be fine
434 if (ctx
->debug_skip_writing
)
437 if (ctx
->compression_state
!= COMPRESSION_BUFFER_EMPTY
)
439 rslt
= write_compress_buffer(f
, Z_FINISH
); // take the real status
440 deflateReset(ctx
->compression_stream
);
441 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
442 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
450 static apr_status_t
write_literal(ap_filter_t
*f
, unsigned char *buffer
, long count
)
452 crccache_ctx
*ctx
= f
->ctx
;
454 if (ctx
->debug_skip_writing
)
458 if (ctx
->compression_state
== COMPRESSION_BUFFER_EMPTY
)
460 ctx
->compression_state
= COMPRESSION_FIRST_DATA_RECEIVED
;
462 ctx
->compression_stream
->avail_in
= count
;
463 ctx
->compression_stream
->next_in
= buffer
;
464 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
465 rslt
= write_compress_buffer(f
, Z_NO_FLUSH
);
466 ctx
->tx_uncompressed_length
+= count
;
473 static apr_status_t
write_hash(ap_filter_t
*f
, unsigned char *buffer
, long count
)
475 request_rec
*r
= f
->r
;
476 crccache_ctx
*ctx
= f
->ctx
;
479 rslt
= flush_compress_buffer(f
);
480 if (rslt
!= APR_SUCCESS
)
485 if (ctx
->debug_skip_writing
)
488 unsigned bucket_size
= count
+ 1;
489 ctx
->tx_length
+= bucket_size
;
490 ctx
->tx_uncompressed_length
+= bucket_size
;
491 char * buf
= apr_palloc(r
->pool
, bucket_size
);
493 buf
[0] = ENCODING_HASH
;
494 memcpy(&buf
[1],buffer
,count
);
495 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE HASH");
496 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
497 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
503 * Write a block reference
505 static apr_status_t
write_block_reference(ap_filter_t
*f
, long result
)
507 request_rec
*r
= f
->r
;
508 crccache_ctx
*ctx
= f
->ctx
;
511 rslt
= flush_compress_buffer(f
);
512 if (rslt
!= APR_SUCCESS
)
517 if (ctx
->debug_skip_writing
)
520 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
521 ctx
->tx_length
+= bucket_size
;
522 ctx
->tx_uncompressed_length
+= bucket_size
;
523 char * buf
= apr_palloc(r
->pool
, bucket_size
);
525 buf
[0] = ENCODING_BLOCK
;
526 buf
[1] = (unsigned char) ((-result
)-1); // invert and get back to zero based
527 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
528 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
529 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
534 * Process one block of data: try to match it against the CRC, append
535 * the result to the ouput ring and remember the result (e.g. was
536 * it a block-match or was a literal processed)
538 static apr_status_t
process_block(ap_filter_t
*f
)
540 request_rec
*r
= f
->r
;
541 crccache_ctx
*ctx
= f
->ctx
;
542 apr_status_t rslt
= APR_SUCCESS
;
544 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
545 if (ctx
->crcctx
== NULL
)
547 // This should never happen
548 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
553 size_t ndigested
= crc_read_block(
556 ctx
->buffer
+ctx
->buffer_digest_getpos
,
557 ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
559 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
560 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested
, rd_block_rslt
);
563 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'tail_blocksize' bytes at the end of the buffer,
564 // it will have to be moved to the beginning of the moving window so that it can be written upon the next call to
565 // crc_read_block or crc_read_flush)
566 // rd_block_rslt > 0: send literal
567 // rd_block_rslt < 0: send block
568 if (rd_block_rslt
> 0)
570 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_block_rslt
);
571 ctx
->buffer_read_getpos
+= rd_block_rslt
;
573 else if (rd_block_rslt
< 0)
575 rslt
= write_block_reference(f
, rd_block_rslt
);
576 unsigned char blocknum
= (unsigned char) ((-rd_block_rslt
)-1);
577 ctx
->buffer_read_getpos
+= (blocknum
== ctx
->block_count
-1) ? ctx
->tail_block_size
: ctx
->block_size
;
580 // Update the context with the results
581 ctx
->crc_read_block_result
= rd_block_rslt
;
582 ctx
->crc_read_block_ndigested
= ndigested
;
583 ctx
->buffer_digest_getpos
+= ndigested
;
588 * Flush one block of data: get it from the crccontext, append
589 * the result to the ouput ring and remember the result (e.g. was
590 * it a block-match or was a literal processed)
592 static apr_status_t
flush_block(ap_filter_t
*f
)
594 request_rec
*r
= f
->r
;
595 crccache_ctx
*ctx
= f
->ctx
;
596 apr_status_t rslt
= APR_SUCCESS
;
598 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
599 if (ctx
->crcctx
== NULL
)
601 // This should never happen
602 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
605 long rd_flush_rslt
= crc_read_flush(ctx
->crcctx
);
606 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt
);
608 // rd_flush_rslt = 0: do nothing
609 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
610 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
611 if (rd_flush_rslt
> 0)
613 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_flush_rslt
);
614 ctx
->buffer_read_getpos
+= rd_flush_rslt
;
616 else if (rd_flush_rslt
< 0)
618 rslt
= write_block_reference(f
, rd_flush_rslt
);
619 unsigned char blocknum
= (unsigned char) ((-rd_flush_rslt
)-1);
620 ctx
->buffer_read_getpos
+= (blocknum
== ctx
->block_count
-1) ? ctx
->tail_block_size
: ctx
->block_size
;
623 // Update the context with the results
624 ctx
->crc_read_block_result
= rd_flush_rslt
;
625 ctx
->crc_read_block_ndigested
= 0;
630 * Clean-up memory used by helper libraries, that don't know about apr_palloc
631 * and that (probably) use classical malloc/free
633 static apr_status_t
deflate_ctx_cleanup(void *data
)
635 crccache_ctx
*ctx
= (crccache_ctx
*)data
;
639 if (ctx
->compression_state
!= COMPRESSION_ENDED
)
641 deflateEnd(ctx
->compression_stream
);
642 ctx
->compression_state
= COMPRESSION_ENDED
;
644 if (ctx
->crcctx
!= NULL
)
646 crc_context_free(ctx
->crcctx
);
653 * End of stream has been reached:
654 * Process any data still in the buffer and flush all internal
655 * structures of crcsync and of zlib
656 * Furthermore, add a strong hash
658 static apr_status_t
process_eos(ap_filter_t
*f
)
660 crccache_ctx
*ctx
= f
->ctx
;
663 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,"CRCCACHE-ENCODE EOS reached for APR bucket");
666 while (ctx
->buffer_digest_getpos
< ctx
->buffer_putpos
)
668 // There is still data in the buffer. Process it.
669 rslt
= process_block(f
);
670 if (rslt
!= APR_SUCCESS
)
678 // Flush remaining block in the crcctx
679 rslt
= flush_block(f
);
680 if (rslt
!= APR_SUCCESS
)
685 while (ctx
->crc_read_block_result
!= 0);
687 // Flush anything that is remaining in the compress buffer
688 rslt
= flush_compress_buffer(f
);
689 if (rslt
!= APR_SUCCESS
)
695 unsigned char md_value
[EVP_MAX_MD_SIZE
];
696 EVP_DigestFinal_ex(&ctx
->mdctx
, md_value
, &md_len
);
697 EVP_MD_CTX_cleanup(&ctx
->mdctx
);
698 write_hash(f
, md_value
, md_len
);
700 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,
701 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_uncompressed_length
, ctx
->tx_length
, ctx
->orig_length
);
707 * Process a data bucket; append data into a moving window buffer
708 * and encode it with crcsync algorithm when window contains enough
709 * data for crcsync to find potential matches
711 static apr_status_t
process_data_bucket(ap_filter_t
*f
, apr_bucket
*e
)
713 request_rec
*r
= f
->r
;
714 crccache_ctx
*ctx
= f
->ctx
;
721 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
722 ctx
->orig_length
+= len
;
723 // update our sha1 hash
724 EVP_DigestUpdate(&ctx
->mdctx
, data
, len
);
725 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
727 // append data to the buffer and encode buffer content using the crc_read_block magic
728 size_t bucket_used_count
= 0;
729 size_t bucket_data_left
;
730 while(bucket_used_count
< len
)
732 /* Append as much data as possible into the buffer */
733 bucket_data_left
= len
- bucket_used_count
;
734 size_t copy_size
= MIN(ctx
->buffer_size
-ctx
->buffer_putpos
, bucket_data_left
);
735 memcpy(ctx
->buffer
+ctx
->buffer_putpos
, data
+bucket_used_count
, copy_size
);
736 bucket_used_count
+= copy_size
;
737 bucket_data_left
-= copy_size
;
738 ctx
->buffer_putpos
+= copy_size
;
739 /* flush the buffer if it is appropriate */
740 if (ctx
->buffer_putpos
== ctx
->buffer_size
)
742 // Buffer is filled to the end. Flush as much as possible
743 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
744 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
745 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->tail_block_size
);
746 while (ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->tail_block_size
)
748 // We can still scan at least 1 tail block + 1 byte forward: try to flush next part
749 rslt
= process_block(f
);
750 if (rslt
!= APR_SUCCESS
)
754 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
755 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
756 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->tail_block_size
);
759 if (ctx
->buffer_putpos
!= ctx
->buffer_read_getpos
)
761 // Copy the remaining part of the buffer to the start of the buffer,
762 // so that it can be filled again as new data arrive
763 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
764 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
765 ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
766 memcpy(ctx
->buffer
, ctx
->buffer
+ ctx
->buffer_read_getpos
, ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
768 // Reset getpos to the beginning of the buffer and putpos accordingly
769 ctx
->buffer_putpos
-= ctx
->buffer_read_getpos
;
770 ctx
->buffer_digest_getpos
-= ctx
->buffer_read_getpos
;
771 ctx
->buffer_read_getpos
= 0;
773 while (ctx
->crc_read_block_result
< 0 && ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->tail_block_size
)
775 // Previous block matched exactly. Let's hope the next block as well
776 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
777 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (tail_block_size: %zu)",
778 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->tail_block_size
);
779 rslt
= process_block(f
);
780 if (rslt
!= APR_SUCCESS
)
786 return APR_SUCCESS
; // Yahoo, all went well
793 * Deliver cached content (headers and body) up the stack.
795 static apr_status_t
crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
797 request_rec
*r
= f
->r
;
798 crccache_ctx
*ctx
= f
->ctx
;
800 int return_code
= APR_SUCCESS
;
802 /* Do nothing if asked to filter nothing. */
803 if (APR_BRIGADE_EMPTY(bb
)) {
804 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
805 return ap_pass_brigade(f
->next
, bb
);
808 /* If state is not yet GS_ENCODING content, we need to ensure that it is okay to send
809 * the encoded content. If the state is GS_ENCODING, that means we've done
810 * this before and we liked it.
811 * This could be not so nice if we always fail. But, if we succeed,
812 * we're in better shape.
814 if (ctx
->global_state
!= GS_ENCODING
)
816 const char *encoding
;
818 /* only work on main request/no subrequests */
819 if (r
->main
!= NULL
) {
820 ap_remove_output_filter(f
);
821 return ap_pass_brigade(f
->next
, bb
);
824 /* We can't operate on Content-Ranges */
825 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
826 ap_remove_output_filter(f
);
827 return ap_pass_brigade(f
->next
, bb
);
830 if (ctx
->global_state
!= GS_HEADERS_SAVED
)
832 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx
->global_state
, GS_HEADERS_SAVED
);
836 /* Indicate to caches that they may only re-use this response for a request
837 * with the same BLOCK_HEADER value as the current request
838 * Indicate to clients that the server supports crcsync, even if checks
839 * further down prevent this specific response from being crc-encoded
841 apr_table_mergen(r
->headers_out
, VARY_HEADER
, BLOCK_HEADER
);
843 /* If Content-Encoding is present and differs from "identity", we can't handle it */
844 encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
845 if (encoding
&& strcasecmp(encoding
, "identity")) {
846 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
847 "Not encoding with crccache. It is already encoded with: %s", encoding
);
848 ap_remove_output_filter(f
);
849 return ap_pass_brigade(f
->next
, bb
);
852 /* For a 304 or 204 response there is no entity included in
853 * the response and hence nothing to crc-encode. */
854 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
856 ap_remove_output_filter(f
);
857 return ap_pass_brigade(f
->next
, bb
);
860 /* All Ok. We're cool with filtering this. */
861 ctx
->global_state
= GS_ENCODING
;
862 ctx
->debug_skip_writing
= 0;
863 ctx
->orig_length
= 0;
865 ctx
->tx_uncompressed_length
= 0;
866 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
868 /* Parse the input headers */
870 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
874 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
876 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to decode if-block header");
877 ap_remove_output_filter(f
);
878 return ap_pass_brigade(f
->next
, bb
);
881 ctx
->block_count
= apr_base64_decode_len(hashes
)/(HASH_SIZE
/8);
882 // this may over allocate by a couple of bytes but no big deal
883 ctx
->hashes
= apr_palloc(r
->pool
, apr_base64_decode_len(hashes
));
884 apr_base64_decode((char *)ctx
->hashes
, hashes
);
888 ctx
->block_size
= file_size
/ctx
->block_count
;
889 ctx
->tail_block_size
= ctx
->block_size
+ file_size
% ctx
->block_count
;
890 size_t block_count_including_final_block
= ctx
->block_count
;// + (ctx->tail_block_size != 0);
891 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
892 "If-block header decoded, version %d: %d hashes of %d and one of %d", version
, ctx
->block_count
-1,(int)ctx
->block_size
,(int)ctx
->tail_block_size
);
894 // swap to network byte order
896 for (i
= 0; i
< block_count_including_final_block
;++i
)
898 htobe64(ctx
->hashes
[i
]);
901 // Data come in at chunks that are potentially smaller then block_size or tail_block_size
902 // Accumulate those chunks into a buffer.
903 // The buffer must be at least block_size+tail_block_size so that crc_read_block(...) can find a matching block, regardless
904 // of the data alignment compared to the original page.
905 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
906 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
907 // Though, the larger the buffer, the bigger the memory demand.
908 // A size of 3*block_size+tail_block_size+1 (20% of original file size) seems to be a good balance
910 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
911 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
912 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
913 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
914 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
916 ctx
->buffer_size
= ctx
->block_size
*3 + ctx
->tail_block_size
+ 1;
917 ctx
->buffer_digest_getpos
= 0;
918 ctx
->buffer_read_getpos
= 0;
919 ctx
->buffer_putpos
= 0;
920 ctx
->crc_read_block_result
= 0;
921 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->buffer_size
);
923 /* Setup deflate for compressing non-matched literal data */
924 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
925 // TODO: should I pass some apr_palloc based function to prevent memory leaks
926 //in case of unexpected errors?
928 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx
->compression_stream
)));
929 ctx
->compression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->compression_stream
)));
930 ctx
->compression_stream
->zalloc
= Z_NULL
;
931 ctx
->compression_stream
->zfree
= Z_NULL
;
932 ctx
->compression_stream
->opaque
= Z_NULL
;
933 zRC
= deflateInit(ctx
->compression_stream
, Z_DEFAULT_COMPRESSION
); // TODO: make compression level configurable
936 // Can't initialize the compression engine for compressing literal data
937 deflateEnd(ctx
->compression_stream
); // free memory used by deflate
938 free(ctx
->compression_stream
);
939 ctx
->compression_stream
= NULL
;
940 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
941 "unable to init Zlib: "
942 "deflateInit returned %d: URL %s",
944 ap_remove_output_filter(f
);
945 return ap_pass_brigade(f
->next
, bb
);
948 // initialise the context for our sha1 digest of the unencoded response
949 EVP_MD_CTX_init(&ctx
->mdctx
);
950 const EVP_MD
*md
= EVP_sha1();
951 EVP_DigestInit_ex(&ctx
->mdctx
, md
, NULL
);
953 // now initialise the crcsync context that will do the real work
954 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, block_count_including_final_block
, ctx
->tail_block_size
);
956 // Register a cleanup function to cleanup internal libz and crcsync resources
957 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
958 apr_pool_cleanup_null
);
960 // All checks and initializations are OK
961 // Modify headers that are impacted by this transformation
962 apr_table_setn(r
->headers_out
, ENCODING_HEADER
, CRCCACHE_ENCODING
);
963 apr_table_unset(r
->headers_out
, "Content-Length");
964 apr_table_unset(r
->headers_out
, "Content-MD5");
965 crccache_check_etag(r
, ctx
, CRCCACHE_ENCODING
);
967 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE Server end of context setup");
970 if (ctx
->global_state
!= GS_ENCODING
)
972 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE unexpected ctx-state: %d, expected: %d", ctx
->global_state
, GS_ENCODING
);
976 while (!APR_BRIGADE_EMPTY(bb
))
982 e
= APR_BRIGADE_FIRST(bb
);
984 if (APR_BUCKET_IS_EOS(e
))
986 // Process end of stream: flush data buffers, compression buffers, etc.
987 // and calculate a strong hash.
988 rslt
= process_eos(f
);
990 /* Remove EOS from the old list, and insert into the new. */
991 APR_BUCKET_REMOVE(e
);
992 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
994 /* This filter is done once it has served up its content */
995 ap_remove_output_filter(f
);
997 if (rslt
!= APR_SUCCESS
)
999 return rslt
; // A problem occurred. Abort the processing
1002 /* Okay, we've seen the EOS.
1003 * Time to pass it along down the chain.
1005 return ap_pass_brigade(f
->next
, ctx
->bb
);
1008 if (APR_BUCKET_IS_FLUSH(e
))
1010 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
1013 /* Remove flush bucket from old brigade and insert into the new. */
1014 APR_BUCKET_REMOVE(e
);
1015 // TODO: optimize; do not insert two consecutive flushes when no intermediate
1016 // output block was written
1017 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1018 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
1019 if (rv
!= APR_SUCCESS
) {
1025 if (APR_BUCKET_IS_METADATA(e
)) {
1027 * Remove meta data bucket from old brigade and insert into the
1030 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
1032 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1033 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len
,data
[0],data
[1],data
[2]);
1035 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1036 "CRCCACHE-ENCODE Metadata, read %zu",len
);
1037 APR_BUCKET_REMOVE(e
);
1038 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1042 // Bucket is non of the above types. Assume it is a data bucket
1043 // which means it can be encoded with the crcsync algorithm
1044 rslt
= process_data_bucket(f
, e
);
1046 APR_BUCKET_REMOVE(e
);
1047 if (rslt
!= APR_SUCCESS
)
1049 break; // A problem occurred. Abort the processing
1053 apr_brigade_cleanup(bb
);
1059 * CACHE_OUT_SAVE_HEADERS filter
1062 * Save headers into the context
1064 static apr_status_t
crccache_out_save_headers_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
1065 request_rec
*r
= f
->r
;
1066 crccache_ctx
*ctx
= f
->ctx
;
1068 /* Do nothing if asked to filter nothing. */
1069 if (APR_BRIGADE_EMPTY(bb
)) {
1070 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE (save headers) bucket brigade is empty -> nothing todo");
1071 return ap_pass_brigade(f
->next
, bb
);
1074 if (ctx
->global_state
!= GS_INIT
)
1076 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE (save headers) unexpected ctx-state: %d, expected: %d", ctx
->global_state
, GS_INIT
);
1077 return APR_EGENERAL
;
1080 /* only work on main request/no subrequests */
1081 if (r
->main
!= NULL
) {
1082 ap_remove_output_filter(f
);
1083 return ap_pass_brigade(f
->next
, bb
);
1086 /* We can't operate on Content-Ranges */
1087 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
1088 ap_remove_output_filter(f
);
1089 return ap_pass_brigade(f
->next
, bb
);
1092 /* Save content-encoding and etag header for later usage by the crcsync
1095 const char *encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
1096 if (encoding
!= NULL
)
1098 ctx
->old_content_encoding
= apr_pstrdup(r
->pool
, encoding
);
1099 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
1100 "Saved old content-encoding: %s", encoding
);
1102 const char *etag
= apr_table_get(r
->headers_out
, ETAG_HEADER
);
1105 ctx
->old_etag
= apr_pstrdup(r
->pool
, etag
);
1106 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
1107 "Saved old etag: %s", etag
);
1109 ctx
->global_state
= GS_HEADERS_SAVED
;
1111 /* Done saving headers. Nothing left to do */
1112 ap_remove_output_filter(f
);
1113 return ap_pass_brigade(f
->next
, bb
);
1117 static void crccache_server_register_hook(apr_pool_t
*p
) {
1118 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
1119 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
1121 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
1124 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
1125 NULL, AP_FTYPE_PROTOCOL);
1127 crccache_out_save_headers_filter_handle
= ap_register_output_filter("CRCCACHE_OUT_SAVE_HEADERS",
1128 crccache_out_save_headers_filter
, NULL
, AP_FTYPE_RESOURCE
-1); // make sure to handle it *before* INFLATE filter (or other decode modules)
1130 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
1131 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
);
1134 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
1135 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
1136 NULL
, /* merge per-directory config structures */
1137 crccache_server_create_config
, /* create per-server config structure */
1138 NULL
, /* merge per-server config structures */
1139 crccache_server_cmds
, /* command apr_table_t */
1140 crccache_server_register_hook
/* register hooks */