1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
31 #include <apr_file_io.h>
32 #include <apr_strings.h>
33 #include <apr_base64.h>
35 #include "ap_provider.h"
37 #include "util_filter.h"
38 #include "util_script.h"
39 #include "util_charset.h"
42 #include "ap_wrapper.h"
45 #include "mod_crccache_server.h"
47 #include <crcsync/crcsync.h>
49 #include <openssl/evp.h>
51 module AP_MODULE_DECLARE_DATA crccache_server_module
;
53 // Possible states for the output compression
55 COMPRESSION_BUFFER_EMPTY
,
56 COMPRESSION_FIRST_DATA_RECEIVED
,
57 COMPRESSION_FIRST_BLOCK_WRITTEN
,
59 } compression_state_t
;
61 //#define MIN(X,Y) (X<Y?X:Y)
63 static void *crccache_server_create_config(apr_pool_t
*p
, server_rec
*s
) {
64 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
68 typedef struct crccache_ctx_t
{
69 unsigned char *buffer
;
70 size_t buffer_digest_getpos
;
71 size_t buffer_read_getpos
;
74 long crc_read_block_result
;
75 size_t crc_read_block_ndigested
;
76 apr_bucket_brigade
*bb
;
79 size_t tail_block_size
;
81 struct crc_context
*crcctx
;
84 size_t tx_uncompressed_length
;
85 compression_state_t compression_state
;
86 z_stream
*compression_stream
;
88 int debug_skip_writing
; // ____
93 * Only enable CRCCache Server when requested through the config file
94 * so that the user can switch CRCCache server on in a specific virtual server
96 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
98 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
99 &crccache_server_module
);
100 conf
->enabled
= flag
;
104 static const command_rec crccache_server_cmds
[] =
106 AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
, "Enable the CRCCache server in this virtual server"),
110 static ap_filter_rec_t
*crccache_out_filter_handle
;
112 int decode_if_block_header(const char * header
, int * version
, size_t * file_size
, char ** hashes
)
116 *hashes
= NULL
; // this will be allocated below, make sure we free it
119 size_t headerlen
= strlen(header
);
120 for (ii
= 0; ii
< headerlen
;++ii
)
122 if (header
[ii
] == ',' || ii
== headerlen
-1)
124 sscanf(&header
[start
]," v=%d",version
);
125 sscanf(&header
[start
]," h=%as",hashes
);
126 sscanf(&header
[start
]," fs=%zu",file_size
);
133 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no hashes reported in header");
138 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE Unsupported header version, %d",*version
);
145 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, NULL
, "CRCCACHE-ENCODE no file size reported in header");
153 static int crccache_server_header_parser_handler(request_rec
*r
) {
154 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
155 &crccache_server_module
);
158 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Checking for headers");
160 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
166 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
168 // failed to decode if block header so just process request normally
171 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Block Hashes header found so enabling protocol: %s",hashes
);
174 // TODO: save etag and content-encoding headers before INFLATE modifies them, for later reuse in this module itself
175 // (to construct etag-crcsync-<original-encoding> header) (is it possible to pass info from here to the main module?)
176 // Insert mod_deflate's INFLATE filter in the chain to unzip content
177 // so that there is clear text available for the delta algorithm
178 ap_filter_t
*inflate_filter
= ap_add_output_filter("INFLATE", NULL
, r
, r
->connection
);
179 if (inflate_filter
== NULL
)
181 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Could not enable INFLATE filter. Will be unable to handle deflated encoded content");
185 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Successfully enabled INFLATE filter to handle deflated content");
187 // And the crccache filter itself ofcourse
188 ap_add_output_filter_handle(crccache_out_filter_handle
,
189 NULL
, r
, r
->connection
);
194 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Did not detect blockheader (%s)", BLOCK_HEADER
);
197 /* // All is okay, so set response header to IM Used
198 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
200 r->status_line="226 IM Used";
206 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
208 request_rec *r = f->r;
210 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
212 // All is okay, so set response header to IM Used
214 r->status_line="HTTP/1.1 226 IM Used";
218 /* TODO: change etag as per document (e.g. append '-crcsync-<original-encoding>' to the etag header)
220 static void crccache_check_etag(request_rec
*r
, const char *transform
) {
221 const char *etag
= apr_table_get(r
->headers_out
, "ETag");
222 if (etag
&& (((etag
[0] != 'W') && (etag
[0] != 'w')) || (etag
[1] != '/'))) {
223 apr_table_set(r
->headers_out
, "ETag", apr_pstrcat(r
->pool
, etag
, "-",
228 static apr_status_t
write_compress_buffer(ap_filter_t
*f
, int flush
)
230 unsigned char compress_buf
[30000];
231 request_rec
*r
= f
->r
;
232 crccache_ctx
*ctx
= f
->ctx
;
233 z_stream
*strm
= ctx
->compression_stream
;
235 if (ctx
->debug_skip_writing
)
240 strm
->avail_out
= sizeof(compress_buf
);
241 strm
->next_out
= compress_buf
;
242 uInt avail_in_pre_deflate
= strm
->avail_in
;
243 int zRC
= deflate(strm
, flush
);
244 if (zRC
== Z_STREAM_ERROR
)
246 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate error: %d", zRC
);
249 int have
= sizeof(compress_buf
) - strm
->avail_out
;
250 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
251 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
252 zRC
, flush
, avail_in_pre_deflate
- strm
->avail_in
, have
);
255 // output buffer contains some data to be written
256 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
257 unsigned bucket_size
= have
;
258 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
260 bucket_size
+= ENCODING_COMPRESSED_HEADER_SIZE
;
262 ctx
->tx_length
+= bucket_size
;
263 char * buf
= apr_palloc(r
->pool
, bucket_size
);
265 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
267 buf
[0] = ENCODING_COMPRESSED
;
268 memcpy(buf
+ ENCODING_COMPRESSED_HEADER_SIZE
, compress_buf
, have
);
269 ctx
->compression_state
= COMPRESSION_FIRST_BLOCK_WRITTEN
;
273 memcpy(buf
, compress_buf
, have
);
275 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
276 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
279 while (strm
->avail_out
== 0);
280 if (strm
->avail_in
!= 0)
282 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm
->avail_in
);
290 static apr_status_t
flush_compress_buffer(ap_filter_t
*f
)
292 crccache_ctx
*ctx
= f
->ctx
;
293 apr_status_t rslt
= APR_SUCCESS
; // assume all will be fine
295 if (ctx
->debug_skip_writing
)
298 if (ctx
->compression_state
!= COMPRESSION_BUFFER_EMPTY
)
300 rslt
= write_compress_buffer(f
, Z_FINISH
); // take the real status
301 deflateReset(ctx
->compression_stream
);
302 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
303 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
311 static apr_status_t
write_literal(ap_filter_t
*f
, unsigned char *buffer
, long count
)
313 crccache_ctx
*ctx
= f
->ctx
;
315 if (ctx
->debug_skip_writing
)
319 if (ctx
->compression_state
== COMPRESSION_BUFFER_EMPTY
)
321 ctx
->compression_state
= COMPRESSION_FIRST_DATA_RECEIVED
;
323 ctx
->compression_stream
->avail_in
= count
;
324 ctx
->compression_stream
->next_in
= buffer
;
325 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
326 rslt
= write_compress_buffer(f
, Z_NO_FLUSH
);
327 ctx
->tx_uncompressed_length
+= count
;
334 static apr_status_t
write_hash(ap_filter_t
*f
, unsigned char *buffer
, long count
)
336 request_rec
*r
= f
->r
;
337 crccache_ctx
*ctx
= f
->ctx
;
340 rslt
= flush_compress_buffer(f
);
341 if (rslt
!= APR_SUCCESS
)
346 if (ctx
->debug_skip_writing
)
349 unsigned bucket_size
= count
+ 1;
350 ctx
->tx_length
+= bucket_size
;
351 ctx
->tx_uncompressed_length
+= bucket_size
;
352 char * buf
= apr_palloc(r
->pool
, bucket_size
);
354 buf
[0] = ENCODING_HASH
;
355 memcpy(&buf
[1],buffer
,count
);
356 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE HASH");
357 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
358 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
364 * Write a block reference
366 static apr_status_t
write_block_reference(ap_filter_t
*f
, long result
)
368 request_rec
*r
= f
->r
;
369 crccache_ctx
*ctx
= f
->ctx
;
372 rslt
= flush_compress_buffer(f
);
373 if (rslt
!= APR_SUCCESS
)
378 if (ctx
->debug_skip_writing
)
381 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
382 ctx
->tx_length
+= bucket_size
;
383 ctx
->tx_uncompressed_length
+= bucket_size
;
384 char * buf
= apr_palloc(r
->pool
, bucket_size
);
386 buf
[0] = ENCODING_BLOCK
;
387 buf
[1] = (unsigned char) ((-result
)-1); // invert and get back to zero based
388 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
389 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
390 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
395 * Process one block of data: try to match it against the CRC, append
396 * the result to the ouput ring and remember the result (e.g. was
397 * it a block-match or was a literal processed)
399 static apr_status_t
process_block(ap_filter_t
*f
)
401 request_rec
*r
= f
->r
;
402 crccache_ctx
*ctx
= f
->ctx
;
403 apr_status_t rslt
= APR_SUCCESS
;
405 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
406 if (ctx
->crcctx
== NULL
)
408 // This should never happen
409 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
414 size_t ndigested
= crc_read_block(
417 ctx
->buffer
+ctx
->buffer_digest_getpos
,
418 ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
420 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
421 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested
, rd_block_rslt
);
424 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'blocksize' bytes at the end of the buffer, it will have to be moved
425 // to the beginning of the moving window so that it can be written upon the next call to crc_read_block or crc_read_flush)
426 // rd_block_rslt > 0: send literal
427 // rd_block_rslt < 0: send block
428 if (rd_block_rslt
> 0)
430 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_block_rslt
);
431 ctx
->buffer_read_getpos
+= rd_block_rslt
;
433 else if (rd_block_rslt
< 0)
435 rslt
= write_block_reference(f
, rd_block_rslt
);
436 unsigned char blocknum
= (unsigned char) ((-rd_block_rslt
)-1);
437 ctx
->buffer_read_getpos
+= (blocknum
== ctx
->block_count
-1) ? ctx
->tail_block_size
: ctx
->block_size
;
440 // Update the context with the results
441 ctx
->crc_read_block_result
= rd_block_rslt
;
442 ctx
->crc_read_block_ndigested
= ndigested
;
443 ctx
->buffer_digest_getpos
+= ndigested
;
448 * Flush one block of data: get it from the crccontext, append
449 * the result to the ouput ring and remember the result (e.g. was
450 * it a block-match or was a literal processed)
452 static apr_status_t
flush_block(ap_filter_t
*f
)
454 request_rec
*r
= f
->r
;
455 crccache_ctx
*ctx
= f
->ctx
;
456 apr_status_t rslt
= APR_SUCCESS
;
458 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
459 if (ctx
->crcctx
== NULL
)
461 // This should never happen
462 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
465 long rd_flush_rslt
= crc_read_flush(ctx
->crcctx
);
466 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt
);
468 // rd_flush_rslt = 0: do nothing
469 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
470 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
471 if (rd_flush_rslt
> 0)
473 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_flush_rslt
);
474 ctx
->buffer_read_getpos
+= rd_flush_rslt
;
476 else if (rd_flush_rslt
< 0)
478 rslt
= write_block_reference(f
, rd_flush_rslt
);
479 unsigned char blocknum
= (unsigned char) ((-rd_flush_rslt
)-1);
480 ctx
->buffer_read_getpos
+= (blocknum
== ctx
->block_count
-1) ? ctx
->tail_block_size
: ctx
->block_size
;
483 // Update the context with the results
484 ctx
->crc_read_block_result
= rd_flush_rslt
;
485 ctx
->crc_read_block_ndigested
= 0;
490 * Clean-up memory used by helper libraries, that don't know about apr_palloc
491 * and that (probably) use classical malloc/free
493 static apr_status_t
deflate_ctx_cleanup(void *data
)
495 crccache_ctx
*ctx
= (crccache_ctx
*)data
;
499 if (ctx
->compression_state
!= COMPRESSION_ENDED
)
501 deflateEnd(ctx
->compression_stream
);
502 ctx
->compression_state
= COMPRESSION_ENDED
;
504 if (ctx
->crcctx
!= NULL
)
506 crc_context_free(ctx
->crcctx
);
513 * End of stream has been reached:
514 * Process any data still in the buffer and flush all internal
515 * structures of crcsync and of zlib
516 * Furthermore, add a strong hash
518 static apr_status_t
process_eos(ap_filter_t
*f
)
520 crccache_ctx
*ctx
= f
->ctx
;
523 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,"CRCCACHE-ENCODE EOS reached for APR bucket");
526 while (ctx
->buffer_digest_getpos
< ctx
->buffer_putpos
)
528 // There is still data in the buffer. Process it.
529 rslt
= process_block(f
);
530 if (rslt
!= APR_SUCCESS
)
538 // Flush remaining block in the crcctx
539 rslt
= flush_block(f
);
540 if (rslt
!= APR_SUCCESS
)
545 while (ctx
->crc_read_block_result
!= 0);
547 // Flush anything that is remaining in the compress buffer
548 rslt
= flush_compress_buffer(f
);
549 if (rslt
!= APR_SUCCESS
)
555 unsigned char md_value
[EVP_MAX_MD_SIZE
];
556 EVP_DigestFinal_ex(&ctx
->mdctx
, md_value
, &md_len
);
557 EVP_MD_CTX_cleanup(&ctx
->mdctx
);
558 write_hash(f
, md_value
, md_len
);
560 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,
561 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_uncompressed_length
, ctx
->tx_length
, ctx
->orig_length
);
567 * Process a data bucket; append data into a moving window buffer
568 * and encode it with crcsync algorithm when window contains enough
569 * data for crcsync to find potential matches
571 static apr_status_t
process_data_bucket(ap_filter_t
*f
, apr_bucket
*e
)
573 request_rec
*r
= f
->r
;
574 crccache_ctx
*ctx
= f
->ctx
;
581 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
582 ctx
->orig_length
+= len
;
583 // update our sha1 hash
584 EVP_DigestUpdate(&ctx
->mdctx
, data
, len
);
585 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
587 // append data to the buffer and encode buffer content using the crc_read_block magic
588 size_t bucket_used_count
= 0;
589 size_t bucket_data_left
;
590 while(bucket_used_count
< len
)
592 /* Append as much data as possible into the buffer */
593 bucket_data_left
= len
- bucket_used_count
;
594 size_t copy_size
= MIN(ctx
->buffer_size
-ctx
->buffer_putpos
, bucket_data_left
);
595 memcpy(ctx
->buffer
+ctx
->buffer_putpos
, data
+bucket_used_count
, copy_size
);
596 bucket_used_count
+= copy_size
;
597 bucket_data_left
-= copy_size
;
598 ctx
->buffer_putpos
+= copy_size
;
599 /* flush the buffer if it is appropriate */
600 if (ctx
->buffer_putpos
== ctx
->buffer_size
)
602 // Buffer is filled to the end. Flush as much as possible
603 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
604 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
605 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
606 while (ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
608 // We can still scan at least 1 block + 1 byte forward: try to flush next part
609 rslt
= process_block(f
);
610 if (rslt
!= APR_SUCCESS
)
614 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
615 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
616 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
619 if (ctx
->buffer_putpos
!= ctx
->buffer_read_getpos
)
621 // Copy the remaining part of the buffer to the start of the buffer,
622 // so that it can be filled again as new data arrive
623 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
624 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
625 ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
626 memcpy(ctx
->buffer
, ctx
->buffer
+ ctx
->buffer_read_getpos
, ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
628 // Reset getpos to the beginning of the buffer and putpos accordingly
629 ctx
->buffer_putpos
-= ctx
->buffer_read_getpos
;
630 ctx
->buffer_digest_getpos
-= ctx
->buffer_read_getpos
;
631 ctx
->buffer_read_getpos
= 0;
633 while (ctx
->crc_read_block_result
< 0 && ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
635 // Previous block matched exactly. Let's hope the next block as well
636 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
637 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
638 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
639 rslt
= process_block(f
);
640 if (rslt
!= APR_SUCCESS
)
646 return APR_SUCCESS
; // Yahoo, all went well
653 * Deliver cached content (headers and body) up the stack.
655 static apr_status_t
crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
657 request_rec
*r
= f
->r
;
658 crccache_ctx
*ctx
= f
->ctx
;
660 int return_code
= APR_SUCCESS
;
662 /* Do nothing if asked to filter nothing. */
663 if (APR_BRIGADE_EMPTY(bb
)) {
664 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
665 return ap_pass_brigade(f
->next
, bb
);
668 /* If we don't have a context, we need to ensure that it is okay to send
669 * the deflated content. If we have a context, that means we've done
670 * this before and we liked it.
671 * This could be not so nice if we always fail. But, if we succeed,
672 * we're in better shape.
676 const char *encoding
;
678 /* only work on main request/no subrequests */
679 if (r
->main
!= NULL
) {
680 ap_remove_output_filter(f
);
681 return ap_pass_brigade(f
->next
, bb
);
684 /* We can't operate on Content-Ranges */
685 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
686 ap_remove_output_filter(f
);
687 return ap_pass_brigade(f
->next
, bb
);
690 /* Let's see what our current Content-Encoding is.
691 * If it's already encoded by crccache: don't compress again.
692 * (We could, but let's not.)
694 encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
695 if (encoding
&& strcasecmp(CRCCACHE_ENCODING
,encoding
) == 0)
697 /* Even if we don't accept this request based on it not having
698 * the Accept-Encoding, we need to note that we were looking
699 * for this header and downstream proxies should be aware of that.
701 apr_table_mergen(r
->headers_out
, "Vary", "A-IM");
702 ap_remove_output_filter(f
);
703 return ap_pass_brigade(f
->next
, bb
);
706 /* For a 304 or 204 response there is no entity included in
707 * the response and hence nothing to deflate. */
708 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
710 ap_remove_output_filter(f
);
711 return ap_pass_brigade(f
->next
, bb
);
714 /* All Ok. We're cool with filtering this. */
715 ctx
= f
->ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
716 ctx
->debug_skip_writing
= 0;
717 ctx
->orig_length
= 0;
719 ctx
->tx_uncompressed_length
= 0;
720 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
722 /* If Content-Encoding present and differs from "identity", we can't handle it */
723 if (encoding
&& strcasecmp(encoding
, "identity")) {
724 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
725 "Not encoding with crccache. It is already encoded with: %s", encoding
);
726 ap_remove_output_filter(f
);
727 return ap_pass_brigade(f
->next
, bb
);
730 /* Parse the input headers */
732 header
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
736 if (decode_if_block_header(header
,&version
,&file_size
,&hashes
) < 0)
738 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to decode if-block header");
739 ap_remove_output_filter(f
);
740 return ap_pass_brigade(f
->next
, bb
);
743 ctx
->block_count
= apr_base64_decode_len(hashes
)/(HASH_SIZE
/8);
744 // this may over allocate by a couple of bytes but no big deal
745 ctx
->hashes
= apr_palloc(r
->pool
, apr_base64_decode_len(hashes
));
746 apr_base64_decode((char *)ctx
->hashes
, hashes
);
750 ctx
->block_size
= file_size
/ctx
->block_count
;
751 ctx
->tail_block_size
= ctx
->block_size
+ file_size
% ctx
->block_count
;
752 size_t block_count_including_final_block
= ctx
->block_count
;// + (ctx->tail_block_size != 0);
753 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
754 "If-block header decoded, version %d: %d hashes of %d and one of %d", version
, ctx
->block_count
-1,(int)ctx
->block_size
,(int)ctx
->tail_block_size
);
756 // swap to network byte order
758 for (i
= 0; i
< block_count_including_final_block
;++i
)
760 htobe64(ctx
->hashes
[i
]);
763 // Data come in at chunks that are potentially smaller then block_size
764 // Accumulate those chunks into a buffer.
765 // The buffer must be at least 2*block_size so that crc_read_block(...) can find a matching block, regardless
766 // of the data alignment compared to the original page.
767 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
768 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
769 // Though, the larger the buffer, the bigger the memory demand.
770 // A size of 4*block_size (20% of original file size) seems to be a good balance
772 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
773 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
774 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
775 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
776 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
778 ctx
->buffer_size
= ctx
->block_size
*4 + 1;
779 ctx
->buffer_digest_getpos
= 0;
780 ctx
->buffer_read_getpos
= 0;
781 ctx
->buffer_putpos
= 0;
782 ctx
->crc_read_block_result
= 0;
783 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->buffer_size
);
787 /* Setup deflate for compressing non-matched literal data */
788 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
789 // TODO: should I pass some apr_palloc based function to prevent memory leaks
790 //in case of unexpected errors?
792 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx
->compression_stream
)));
793 ctx
->compression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->compression_stream
)));
794 ctx
->compression_stream
->zalloc
= Z_NULL
;
795 ctx
->compression_stream
->zfree
= Z_NULL
;
796 ctx
->compression_stream
->opaque
= Z_NULL
;
797 zRC
= deflateInit(ctx
->compression_stream
, Z_DEFAULT_COMPRESSION
); // TODO: make compression level configurable
800 // Can't initialize the compression engine for compressing literal data
801 deflateEnd(ctx
->compression_stream
); // free memory used by deflate
802 free(ctx
->compression_stream
);
803 ctx
->compression_stream
= NULL
;
804 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
805 "unable to init Zlib: "
806 "deflateInit returned %d: URL %s",
808 ap_remove_output_filter(f
);
809 return ap_pass_brigade(f
->next
, bb
);
812 // initialise the context for our sha1 digest of the unencoded response
813 EVP_MD_CTX_init(&ctx
->mdctx
);
814 const EVP_MD
*md
= EVP_sha1();
815 EVP_DigestInit_ex(&ctx
->mdctx
, md
, NULL
);
817 // now initialise the crcsync context that will do the real work
818 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, block_count_including_final_block
, ctx
->tail_block_size
);
820 // Register a cleanup function to cleanup internal libz and crcsync resources
821 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
822 apr_pool_cleanup_null
);
824 // All checks and initializations are OK
825 // Modify headers that are impacted by this transformation
826 // TODO: the crccache-client could recalculate these headers once it has
827 // reconstructed the page, before handling the reconstructed page
828 // back to the client
829 apr_table_setn(r
->headers_out
, ENCODING_HEADER
, CRCCACHE_ENCODING
);
830 apr_table_addn(r
->headers_out
, VARY_HEADER
, VARY_VALUE
);
831 apr_table_unset(r
->headers_out
, "Content-Length");
832 apr_table_unset(r
->headers_out
, "Content-MD5");
833 crccache_check_etag(r
, CRCCACHE_ENCODING
);
835 // All is okay, so set response header to IM Used
836 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE Server end of context setup");
838 //r->status_line="226 IM Used";
841 while (!APR_BRIGADE_EMPTY(bb
))
847 e
= APR_BRIGADE_FIRST(bb
);
849 if (APR_BUCKET_IS_EOS(e
))
851 // Process end of stream: flush data buffers, compression buffers, etc.
852 // and calculate a strong hash.
853 rslt
= process_eos(f
);
855 /* Remove EOS from the old list, and insert into the new. */
856 APR_BUCKET_REMOVE(e
);
857 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
859 /* This filter is done once it has served up its content */
860 ap_remove_output_filter(f
);
862 if (rslt
!= APR_SUCCESS
)
864 return rslt
; // A problem occurred. Abort the processing
867 /* Okay, we've seen the EOS.
868 * Time to pass it along down the chain.
870 return ap_pass_brigade(f
->next
, ctx
->bb
);
873 if (APR_BUCKET_IS_FLUSH(e
))
875 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
878 /* Remove flush bucket from old brigade and insert into the new. */
879 APR_BUCKET_REMOVE(e
);
880 // TODO: optimize; do not insert two consecutive flushes when no intermediate
881 // output block was written
882 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
883 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
884 if (rv
!= APR_SUCCESS
) {
890 if (APR_BUCKET_IS_METADATA(e
)) {
891 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE metadata APR bucket");
893 * Remove meta data bucket from old brigade and insert into the
896 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
898 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
899 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len
,data
[0],data
[1],data
[2]);
901 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
902 "CRCCACHE-ENCODE Metadata, read %zu",len
);
903 APR_BUCKET_REMOVE(e
);
904 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
908 // Bucket is non of the above types. Assume it is a data bucket
909 // which means it can be encoded with the crcsync algorithm
910 rslt
= process_data_bucket(f
, e
);
912 APR_BUCKET_REMOVE(e
);
913 if (rslt
!= APR_SUCCESS
)
915 break; // A problem occurred. Abort the processing
919 apr_brigade_cleanup(bb
);
923 static void crccache_server_register_hook(apr_pool_t
*p
) {
924 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
925 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
927 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
930 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
931 NULL, AP_FTYPE_PROTOCOL);
933 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
934 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
);
937 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
938 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
939 NULL
, /* merge per-directory config structures */
940 crccache_server_create_config
, /* create per-server config structure */
941 NULL
, /* merge per-server config structures */
942 crccache_server_cmds
, /* command apr_table_t */
943 crccache_server_register_hook
/* register hooks */