1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Authors: Toby Collett (2009), Alex Wulms (2009)
30 #include "apr_file_io.h"
31 #include "apr_strings.h"
32 #include "mod_cache.h"
33 #include "mod_disk_cache.h"
34 #include "ap_provider.h"
35 #include "util_filter.h"
36 #include "util_script.h"
37 #include "util_charset.h"
40 #include "ap_wrapper.h"
41 #include "mod_crccache_server.h"
43 #include <crcsync/crcsync.h>
46 module AP_MODULE_DECLARE_DATA crccache_server_module
;
48 // Possible states for the output compression
50 COMPRESSION_BUFFER_EMPTY
,
51 COMPRESSION_FIRST_DATA_RECEIVED
,
52 COMPRESSION_FIRST_BLOCK_WRITTEN
,
54 } compression_state_t
;
56 //#define MIN(X,Y) (X<Y?X:Y)
58 static void *create_config(apr_pool_t
*p
, server_rec
*s
) {
59 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
60 conf
->disk_cache_conf
= apr_pcalloc(p
, sizeof(disk_cache_conf
));
62 /* XXX: Set default values */
64 conf
->disk_cache_conf
->dirlevels
= DEFAULT_DIRLEVELS
;
65 conf
->disk_cache_conf
->dirlength
= DEFAULT_DIRLENGTH
;
66 conf
->disk_cache_conf
->maxfs
= DEFAULT_MAX_FILE_SIZE
;
67 conf
->disk_cache_conf
->minfs
= DEFAULT_MIN_FILE_SIZE
;
69 conf
->disk_cache_conf
->cache_root
= NULL
;
70 conf
->disk_cache_conf
->cache_root_len
= 0;
75 typedef struct crccache_ctx_t
{
76 unsigned char *buffer
;
77 size_t buffer_digest_getpos
;
78 size_t buffer_read_getpos
;
81 long crc_read_block_result
;
82 size_t crc_read_block_ndigested
;
83 apr_bucket_brigade
*bb
;
85 size_t tail_block_size
;
86 uint64_t hashes
[FULL_BLOCK_COUNT
+1];
87 struct crc_context
*crcctx
;
90 size_t tx_uncompressed_length
;
91 compression_state_t compression_state
;
92 z_stream
*compression_stream
;
93 int debug_skip_writing
; // ____
98 * mod_disk_cache configuration directives handlers.
100 static const char *set_cache_root(cmd_parms
*parms
, void *in_struct_ptr
,
102 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
103 &crccache_server_module
);
104 conf
->disk_cache_conf
->cache_root
= arg
;
105 conf
->disk_cache_conf
->cache_root_len
= strlen(arg
);
106 /* TODO: canonicalize cache_root and strip off any trailing slashes */
112 * Only enable CRCCache Server when requested through the config file
113 * so that the user can switch CRCCache server on in a specific virtual server
115 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
117 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
118 &crccache_server_module
);
119 conf
->enabled
= flag
;
125 * Consider eliminating the next two directives in favor of
126 * Ian's prime number hash...
127 * key = hash_fn( r->uri)
128 * filename = "/key % prime1 /key %prime2/key %prime3"
130 static const char *set_cache_dirlevels(cmd_parms
*parms
, void *in_struct_ptr
,
132 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
133 &crccache_server_module
);
136 return "CacheDirLevelsServer value must be an integer greater than 0";
137 if (val
* conf
->disk_cache_conf
->dirlength
> CACHEFILE_LEN
)
138 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
139 conf
->disk_cache_conf
->dirlevels
= val
;
142 static const char *set_cache_dirlength(cmd_parms
*parms
, void *in_struct_ptr
,
144 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
145 &crccache_server_module
);
148 return "CacheDirLengthServer value must be an integer greater than 0";
149 if (val
* conf
->disk_cache_conf
->dirlevels
> CACHEFILE_LEN
)
150 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
152 conf
->disk_cache_conf
->dirlength
= val
;
156 static const char *set_cache_minfs(cmd_parms
*parms
, void *in_struct_ptr
,
158 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
159 &crccache_server_module
);
161 if (apr_strtoff(&conf
->disk_cache_conf
->minfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->minfs
163 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
168 static const char *set_cache_maxfs(cmd_parms
*parms
, void *in_struct_ptr
,
170 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
171 &crccache_server_module
);
172 if (apr_strtoff(&conf
->disk_cache_conf
->maxfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->maxfs
174 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
179 static const command_rec disk_cache_cmds
[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root
, NULL
, RSRC_CONF
,
180 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels
, NULL
, RSRC_CONF
,
181 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength
, NULL
, RSRC_CONF
,
182 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs
, NULL
, RSRC_CONF
,
183 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs
, NULL
, RSRC_CONF
,
184 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
,
185 "Enable the CRCCache server in this virtual server"),{ NULL
} };
187 static ap_filter_rec_t
*crccache_out_filter_handle
;
189 static int crccache_server_header_parser_handler(request_rec
*r
) {
190 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
191 &crccache_server_module
);
194 const char * hashes
, *file_size_header
;
195 hashes
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
196 file_size_header
= apr_table_get(r
->headers_in
, FILE_SIZE_HEADER
);
197 if (hashes
&& file_size_header
)
200 int ret
= sscanf(file_size_header
,"%zu",&file_size
);
203 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
, "CRCCACHE-ENCODE Failed to convert file size header to size_t, %s",file_size_header
);
207 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCCACHE-ENCODE Block Hashes header found so enabling protocol: %s",hashes
);
208 // Insert mod_deflate's INFLATE filter in the chain to unzip content
209 // so that there is clear text available for the delta algorithm
210 ap_filter_t
*inflate_filter
= ap_add_output_filter("INFLATE", NULL
, r
, r
->connection
);
211 if (inflate_filter
== NULL
)
213 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Could not enable INFLATE filter. Will be unable to handle deflated encoded content");
217 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Successfully enabled INFLATE filter to handle deflated content");
219 // And the crccache filter itself ofcourse
220 ap_add_output_filter_handle(crccache_out_filter_handle
,
221 NULL
, r
, r
->connection
);
224 /* // All is okay, so set response header to IM Used
225 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE Setting 226 header");
227 r->status_line="226 IM Used";
233 /*static int crccache_server_header_filter_handler(ap_filter_t *f, apr_bucket_brigade *b) {
235 request_rec *r = f->r;
237 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE Setting return status code");
239 // All is okay, so set response header to IM Used
241 r->status_line="HTTP/1.1 226 IM Used";
245 /* PR 39727: we're screwing up our clients if we leave a strong ETag
246 * header while transforming content. Henrik Nordstrom suggests
249 * Pending a more thorough review of our Etag handling, let's just
250 * implement his suggestion. It fixes the bug, or at least turns it
251 * from a showstopper to an inefficiency. And it breaks nothing that
252 * wasn't already broken.
253 * TODO: the crccache_client should undo this once the reconstructed page has been saved in the cache
255 static void crccache_check_etag(request_rec
*r
, const char *transform
) {
256 const char *etag
= apr_table_get(r
->headers_out
, "ETag");
257 if (etag
&& (((etag
[0] != 'W') && (etag
[0] != 'w')) || (etag
[1] != '/'))) {
258 apr_table_set(r
->headers_out
, "ETag", apr_pstrcat(r
->pool
, etag
, "-",
263 static apr_status_t
write_compress_buffer(ap_filter_t
*f
, int flush
)
265 unsigned char compress_buf
[30000];
266 request_rec
*r
= f
->r
;
267 crccache_ctx
*ctx
= f
->ctx
;
268 z_stream
*strm
= ctx
->compression_stream
;
270 if (ctx
->debug_skip_writing
)
275 strm
->avail_out
= sizeof(compress_buf
);
276 strm
->next_out
= compress_buf
;
277 uInt avail_in_pre_deflate
= strm
->avail_in
;
278 int zRC
= deflate(strm
, flush
);
279 if (zRC
== Z_STREAM_ERROR
)
281 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate error: %d", zRC
);
284 int have
= sizeof(compress_buf
) - strm
->avail_out
;
285 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
286 "CRCCACHE-ENCODE deflate rslt %d, flush %d, consumed %d, produced %d",
287 zRC
, flush
, avail_in_pre_deflate
- strm
->avail_in
, have
);
290 // output buffer contains some data to be written
291 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, compress_buf, have);
292 unsigned bucket_size
= have
;
293 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
295 bucket_size
+= ENCODING_COMPRESSED_HEADER_SIZE
;
297 ctx
->tx_length
+= bucket_size
;
298 char * buf
= apr_palloc(r
->pool
, bucket_size
);
300 if (ctx
->compression_state
!= COMPRESSION_FIRST_BLOCK_WRITTEN
)
302 buf
[0] = ENCODING_COMPRESSED
;
303 memcpy(buf
+ ENCODING_COMPRESSED_HEADER_SIZE
, compress_buf
, have
);
304 ctx
->compression_state
= COMPRESSION_FIRST_BLOCK_WRITTEN
;
308 memcpy(buf
, compress_buf
, have
);
310 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
311 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
314 while (strm
->avail_out
== 0);
315 if (strm
->avail_in
!= 0)
317 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
,"CRCCACHE-ENCODE deflate still has %d input bytes available", strm
->avail_in
);
325 static apr_status_t
flush_compress_buffer(ap_filter_t
*f
)
327 crccache_ctx
*ctx
= f
->ctx
;
328 apr_status_t rslt
= APR_SUCCESS
; // assume all will be fine
330 if (ctx
->debug_skip_writing
)
333 if (ctx
->compression_state
!= COMPRESSION_BUFFER_EMPTY
)
335 rslt
= write_compress_buffer(f
, Z_FINISH
); // take the real status
336 deflateReset(ctx
->compression_stream
);
337 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
338 // ____ ctx->debug_skip_writing = 1; // skip writing after handling first compressed block
346 static apr_status_t
write_literal(ap_filter_t
*f
, unsigned char *buffer
, long count
)
348 crccache_ctx
*ctx
= f
->ctx
;
350 if (ctx
->debug_skip_writing
)
354 if (ctx
->compression_state
== COMPRESSION_BUFFER_EMPTY
)
356 ctx
->compression_state
= COMPRESSION_FIRST_DATA_RECEIVED
;
358 ctx
->compression_stream
->avail_in
= count
;
359 ctx
->compression_stream
->next_in
= buffer
;
360 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, f->r->server, buffer, count);
361 rslt
= write_compress_buffer(f
, Z_NO_FLUSH
);
362 ctx
->tx_uncompressed_length
+= count
;
367 * Write a block reference
369 static apr_status_t
write_block_reference(ap_filter_t
*f
, long result
)
371 request_rec
*r
= f
->r
;
372 crccache_ctx
*ctx
= f
->ctx
;
375 rslt
= flush_compress_buffer(f
);
376 if (rslt
!= APR_SUCCESS
)
381 if (ctx
->debug_skip_writing
)
384 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
385 ctx
->tx_length
+= bucket_size
;
386 ctx
->tx_uncompressed_length
+= bucket_size
;
387 char * buf
= apr_palloc(r
->pool
, bucket_size
);
389 buf
[0] = ENCODING_BLOCK
;
390 buf
[1] = (unsigned char) ((-result
)-1); // invert and get back to zero based
391 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
392 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
393 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
398 * Process one block of data: try to match it against the CRC, append
399 * the result to the ouput ring and remember the result (e.g. was
400 * it a block-match or was a literal processed)
402 static apr_status_t
process_block(ap_filter_t
*f
)
404 request_rec
*r
= f
->r
;
405 crccache_ctx
*ctx
= f
->ctx
;
406 apr_status_t rslt
= APR_SUCCESS
;
408 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_block");
409 if (ctx
->crcctx
== NULL
)
411 // This should never happen
412 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
417 size_t ndigested
= crc_read_block(
420 ctx
->buffer
+ctx
->buffer_digest_getpos
,
421 ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
423 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
424 "CRCCACHE-ENCODE crc_read_block ndigested: %zu, result %ld", ndigested
, rd_block_rslt
);
427 // rd_block_rslt = 0: do nothing (it is a 'literal' block of exactly 'blocksize' bytes at the end of the buffer, it will have to be moved
428 // to the beginning of the moving window so that it can be written upon the next call to crc_read_block or crc_read_flush)
429 // rd_block_rslt > 0: send literal
430 // rd_block_rslt < 0: send block
431 if (rd_block_rslt
> 0)
433 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_block_rslt
);
434 ctx
->buffer_read_getpos
+= rd_block_rslt
;
436 else if (rd_block_rslt
< 0)
438 rslt
= write_block_reference(f
, rd_block_rslt
);
439 unsigned char blocknum
= (unsigned char) ((-rd_block_rslt
)-1);
440 ctx
->buffer_read_getpos
+= (blocknum
== FULL_BLOCK_COUNT
) ? ctx
->tail_block_size
: ctx
->block_size
;
443 // Update the context with the results
444 ctx
->crc_read_block_result
= rd_block_rslt
;
445 ctx
->crc_read_block_ndigested
= ndigested
;
446 ctx
->buffer_digest_getpos
+= ndigested
;
451 * Flush one block of data: get it from the crccontext, append
452 * the result to the ouput ring and remember the result (e.g. was
453 * it a block-match or was a literal processed)
455 static apr_status_t
flush_block(ap_filter_t
*f
)
457 request_rec
*r
= f
->r
;
458 crccache_ctx
*ctx
= f
->ctx
;
459 apr_status_t rslt
= APR_SUCCESS
;
461 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE invoking crc_read_flush");
462 if (ctx
->crcctx
== NULL
)
464 // This should never happen
465 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crcctx = null");
468 long rd_flush_rslt
= crc_read_flush(ctx
->crcctx
);
469 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE crc_read_flush result %ld", rd_flush_rslt
);
471 // rd_flush_rslt = 0: do nothing
472 // rd_flush_rslt > 0: send literal that was already digested but not yet returned by read-block
473 // rd_flush_rslt < 0: send block that was already digested but not yet returned by read-block
474 if (rd_flush_rslt
> 0)
476 rslt
= write_literal(f
, ctx
->buffer
+ctx
->buffer_read_getpos
, rd_flush_rslt
);
477 ctx
->buffer_read_getpos
+= rd_flush_rslt
;
479 else if (rd_flush_rslt
< 0)
481 rslt
= write_block_reference(f
, rd_flush_rslt
);
482 unsigned char blocknum
= (unsigned char) ((-rd_flush_rslt
)-1);
483 ctx
->buffer_read_getpos
+= (blocknum
== FULL_BLOCK_COUNT
) ? ctx
->tail_block_size
: ctx
->block_size
;
486 // Update the context with the results
487 ctx
->crc_read_block_result
= rd_flush_rslt
;
488 ctx
->crc_read_block_ndigested
= 0;
493 * Clean-up memory used by helper libraries, that don't know about apr_palloc
494 * and that (probably) use classical malloc/free
496 static apr_status_t
deflate_ctx_cleanup(void *data
)
498 crccache_ctx
*ctx
= (crccache_ctx
*)data
;
502 if (ctx
->compression_state
!= COMPRESSION_ENDED
)
504 deflateEnd(ctx
->compression_stream
);
505 ctx
->compression_state
= COMPRESSION_ENDED
;
507 if (ctx
->crcctx
!= NULL
)
509 crc_context_free(ctx
->crcctx
);
516 * End of stream has been reached:
517 * Process any data still in the buffer and flush all internal
518 * structures of crcsync and of zlib
519 * Furthermore, add a strong hash
521 static apr_status_t
process_eos(ap_filter_t
*f
)
523 crccache_ctx
*ctx
= f
->ctx
;
526 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,"CRCCACHE-ENCODE EOS reached for APR bucket");
529 while (ctx
->buffer_digest_getpos
< ctx
->buffer_putpos
)
531 // There is still data in the buffer. Process it.
532 rslt
= process_block(f
);
533 if (rslt
!= APR_SUCCESS
)
541 // Flush remaining block in the crcctx
542 rslt
= flush_block(f
);
543 if (rslt
!= APR_SUCCESS
)
548 while (ctx
->crc_read_block_result
!= 0);
550 // Flush anything that is remaining in the compress buffer
551 rslt
= flush_compress_buffer(f
);
552 if (rslt
!= APR_SUCCESS
)
557 // TODO: add strong hash here
559 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, f
->r
->server
,
560 "CRCCACHE-ENCODE complete size %f%% (encoded-uncompressed=%zu encoded=%zu original=%zu",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_uncompressed_length
, ctx
->tx_length
, ctx
->orig_length
);
566 * Process a data bucket; append data into a moving window buffer
567 * and encode it with crcsync algorithm when window contains enough
568 * data for crcsync to find potential matches
570 static apr_status_t
process_data_bucket(ap_filter_t
*f
, apr_bucket
*e
)
572 request_rec
*r
= f
->r
;
573 crccache_ctx
*ctx
= f
->ctx
;
580 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
581 ctx
->orig_length
+= len
;
582 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE normal data in APR bucket, read %ld", len);
584 // append data to the buffer and encode buffer content using the crc_read_block magic
585 size_t bucket_used_count
= 0;
586 size_t bucket_data_left
;
587 while(bucket_used_count
< len
)
589 /* Append as much data as possible into the buffer */
590 bucket_data_left
= len
- bucket_used_count
;
591 size_t copy_size
= MIN(ctx
->buffer_size
-ctx
->buffer_putpos
, bucket_data_left
);
592 memcpy(ctx
->buffer
+ctx
->buffer_putpos
, data
+bucket_used_count
, copy_size
);
593 bucket_used_count
+= copy_size
;
594 bucket_data_left
-= copy_size
;
595 ctx
->buffer_putpos
+= copy_size
;
596 /* flush the buffer if it is appropriate */
597 if (ctx
->buffer_putpos
== ctx
->buffer_size
)
599 // Buffer is filled to the end. Flush as much as possible
600 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
601 "CRCCACHE-ENCODE Buffer is filled to end, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
602 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
603 while (ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
605 // We can still scan at least 1 block + 1 byte forward: try to flush next part
606 rslt
= process_block(f
);
607 if (rslt
!= APR_SUCCESS
)
611 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
612 "CRCCACHE-ENCODE Processed a block, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
613 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
616 if (ctx
->buffer_putpos
!= ctx
->buffer_read_getpos
)
618 // Copy the remaining part of the buffer to the start of the buffer,
619 // so that it can be filled again as new data arrive
620 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
621 "CRCCACHE-ENCODE Moving %zu bytes to begin of buffer",
622 ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
623 memcpy(ctx
->buffer
, ctx
->buffer
+ ctx
->buffer_read_getpos
, ctx
->buffer_putpos
- ctx
->buffer_read_getpos
);
625 // Reset getpos to the beginning of the buffer and putpos accordingly
626 ctx
->buffer_putpos
-= ctx
->buffer_read_getpos
;
627 ctx
->buffer_digest_getpos
-= ctx
->buffer_read_getpos
;
628 ctx
->buffer_read_getpos
= 0;
630 while (ctx
->crc_read_block_result
< 0 && ctx
->buffer_putpos
- ctx
->buffer_digest_getpos
> ctx
->block_size
)
632 // Previous block matched exactly. Let's hope the next block as well
633 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
634 "CRCCACHE-ENCODE Previous block matched, read_getpos: %zu, digest_getpos: %zu, putpos: %zu, putpos-digest_getpos: %zu (blocksize: %zu)",
635 ctx
->buffer_read_getpos
, ctx
->buffer_digest_getpos
, ctx
->buffer_putpos
, ctx
->buffer_putpos
-ctx
->buffer_digest_getpos
, ctx
->block_size
);
636 rslt
= process_block(f
);
637 if (rslt
!= APR_SUCCESS
)
643 return APR_SUCCESS
; // Yahoo, all went well
650 * Deliver cached content (headers and body) up the stack.
652 static apr_status_t
crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
654 request_rec
*r
= f
->r
;
655 crccache_ctx
*ctx
= f
->ctx
;
657 int return_code
= APR_SUCCESS
;
659 /* Do nothing if asked to filter nothing. */
660 if (APR_BRIGADE_EMPTY(bb
)) {
661 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE bucket brigade is empty -> nothing todo");
662 return ap_pass_brigade(f
->next
, bb
);
665 /* If we don't have a context, we need to ensure that it is okay to send
666 * the deflated content. If we have a context, that means we've done
667 * this before and we liked it.
668 * This could be not so nice if we always fail. But, if we succeed,
669 * we're in better shape.
673 const char *encoding
;
675 /* only work on main request/no subrequests */
676 if (r
->main
!= NULL
) {
677 ap_remove_output_filter(f
);
678 return ap_pass_brigade(f
->next
, bb
);
681 /* We can't operate on Content-Ranges */
682 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
683 ap_remove_output_filter(f
);
684 return ap_pass_brigade(f
->next
, bb
);
687 /* Let's see what our current Content-Encoding is.
688 * If it's already encoded by crccache: don't compress again.
689 * (We could, but let's not.)
691 encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
692 if (encoding
&& strcasecmp(CRCCACHE_ENCODING
,encoding
) == 0)
694 /* Even if we don't accept this request based on it not having
695 * the Accept-Encoding, we need to note that we were looking
696 * for this header and downstream proxies should be aware of that.
698 apr_table_mergen(r
->headers_out
, "Vary", "A-IM");
699 ap_remove_output_filter(f
);
700 return ap_pass_brigade(f
->next
, bb
);
703 /* For a 304 or 204 response there is no entity included in
704 * the response and hence nothing to deflate. */
705 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
707 ap_remove_output_filter(f
);
708 return ap_pass_brigade(f
->next
, bb
);
711 /* All Ok. We're cool with filtering this. */
712 ctx
= f
->ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
713 ctx
->debug_skip_writing
= 0;
714 ctx
->orig_length
= 0;
716 ctx
->tx_uncompressed_length
= 0;
717 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
719 /* If Content-Encoding present and differs from "identity", we can't handle it */
720 if (encoding
&& strcasecmp(encoding
, "identity")) {
721 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, r
->server
,
722 "Not encoding with crccache. It is already encoded with: %s", encoding
);
723 ap_remove_output_filter(f
);
724 return ap_pass_brigade(f
->next
, bb
);
727 /* Parse the input headers */
728 const char * hashes
, *file_size_header
;
729 hashes
= apr_table_get(r
->headers_in
, BLOCK_HEADER
);
730 file_size_header
= apr_table_get(r
->headers_in
, FILE_SIZE_HEADER
);
732 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
733 "CRCCACHE-ENCODE encoding file size header %s", file_size_header
);
736 size_t file_size
= strtoull(file_size_header
,NULL
,0);
737 if (errno
|| file_size
<= 0)
739 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to convert file size header to size_t, %s",file_size_header
);
740 ap_remove_output_filter(f
);
741 return ap_pass_brigade(f
->next
, bb
);
743 ctx
->block_size
= file_size
/FULL_BLOCK_COUNT
;
744 ctx
->tail_block_size
= file_size
% FULL_BLOCK_COUNT
;
745 size_t block_count_including_final_block
= FULL_BLOCK_COUNT
+ (ctx
->tail_block_size
!= 0);
747 // Data come in at chunks that are potentially smaller then block_size
748 // Accumulate those chunks into a buffer.
749 // The buffer must be at least 2*block_size so that crc_read_block(...) can find a matching block, regardless
750 // of the data alignment compared to the original page.
751 // The buffer is basically a moving window in the new page. So sometimes the last part of the buffer must be
752 // copied to the beginning again. The larger the buffer, the less often such a copy operation is required
753 // Though, the larger the buffer, the bigger the memory demand.
754 // A size of 4*block_size (20% of original file size) seems to be a good balance
756 // TODO: tune the buffer-size depending on the mime-type. Already compressed data (zip, gif, jpg, mpg, etc) will
757 // probably only have matching blocks if the file is totally unmodified. As soon as one byte differs in the original
758 // uncompressed data, the entire compressed data stream will be different anyway, so in such case it does not make
759 // much sense to even keep invoking the crc_read_block(...) function as soon as a difference has been found.
760 // Hence, no need to make a (potentially huge) buffer for these type of compressed (potentially huge, think about movies)
762 ctx
->buffer_size
= ctx
->block_size
*4 + 1;
763 ctx
->buffer_digest_getpos
= 0;
764 ctx
->buffer_read_getpos
= 0;
765 ctx
->buffer_putpos
= 0;
766 ctx
->crc_read_block_result
= 0;
767 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->buffer_size
);
771 for (ii
= 0; ii
< block_count_including_final_block
; ++ii
)
773 ctx
->hashes
[ii
] = decode_30bithash(&hashes
[ii
*HASH_BASE64_SIZE_TX
]);
774 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCCACHE-ENCODE decoded hash[%d] %08X",ii,ctx->hashes[ii]);
777 /* Setup deflate for compressing non-matched literal data */
778 ctx
->compression_state
= COMPRESSION_BUFFER_EMPTY
;
779 // TODO: should I pass some apr_palloc based function to prevent memory leaks
780 //in case of unexpected errors?
782 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE size of compression stream: %zd",sizeof(*(ctx
->compression_stream
)));
783 ctx
->compression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->compression_stream
)));
784 ctx
->compression_stream
->zalloc
= Z_NULL
;
785 ctx
->compression_stream
->zfree
= Z_NULL
;
786 ctx
->compression_stream
->opaque
= Z_NULL
;
787 zRC
= deflateInit(ctx
->compression_stream
, Z_DEFAULT_COMPRESSION
); // TODO: make compression level configurable
790 // Can't initialize the compression engine for compressing literal data
791 deflateEnd(ctx
->compression_stream
); // free memory used by deflate
792 free(ctx
->compression_stream
);
793 ctx
->compression_stream
= NULL
;
794 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
795 "unable to init Zlib: "
796 "deflateInit returned %d: URL %s",
798 ap_remove_output_filter(f
);
799 return ap_pass_brigade(f
->next
, bb
);
802 // now initialise the crcsync context that will do the real work
803 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, block_count_including_final_block
, ctx
->tail_block_size
);
805 // Register a cleanup function to cleanup internal libz and crcsync resources
806 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
807 apr_pool_cleanup_null
);
809 // All checks and initializations are OK
810 // Modify headers that are impacted by this transformation
811 // TODO: the crccache-client could recalculate these headers once it has
812 // reconstructed the page, before handling the reconstructed page
813 // back to the client
814 apr_table_setn(r
->headers_out
, ENCODING_HEADER
, CRCCACHE_ENCODING
);
815 apr_table_unset(r
->headers_out
, "Content-Length");
816 apr_table_unset(r
->headers_out
, "Content-MD5");
817 crccache_check_etag(r
, CRCCACHE_ENCODING
);
819 // All is okay, so set response header to IM Used
820 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "CRCCACHE-ENCODE Setting 226 header");
822 r
->status_line
="226 IM Used";
827 while (!APR_BRIGADE_EMPTY(bb
))
833 e
= APR_BRIGADE_FIRST(bb
);
835 if (APR_BUCKET_IS_EOS(e
))
837 // Process end of stream: flush data buffers, compression buffers, etc.
838 // and calculate a strong hash.
839 rslt
= process_eos(f
);
841 /* Remove EOS from the old list, and insert into the new. */
842 APR_BUCKET_REMOVE(e
);
843 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
845 /* This filter is done once it has served up its content */
846 ap_remove_output_filter(f
);
848 if (rslt
!= APR_SUCCESS
)
850 return rslt
; // A problem occurred. Abort the processing
853 /* Okay, we've seen the EOS.
854 * Time to pass it along down the chain.
856 return ap_pass_brigade(f
->next
, ctx
->bb
);
859 if (APR_BUCKET_IS_FLUSH(e
))
861 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE flush APR bucket");
864 /* Remove flush bucket from old brigade and insert into the new. */
865 APR_BUCKET_REMOVE(e
);
866 // TODO: optimize; do not insert two consecutive flushes when no intermediate
867 // output block was written
868 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
869 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
870 if (rv
!= APR_SUCCESS
) {
876 if (APR_BUCKET_IS_METADATA(e
)) {
877 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE metadata APR bucket");
879 * Remove meta data bucket from old brigade and insert into the
882 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
884 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
885 "CRCCACHE-ENCODE Metadata, read %zu, %d %d %d",len
,data
[0],data
[1],data
[2]);
887 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
888 "CRCCACHE-ENCODE Metadata, read %zu",len
);
889 APR_BUCKET_REMOVE(e
);
890 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
894 // Bucket is non of the above types. Assume it is a data bucket
895 // which means it can be encoded with the crcsync algorithm
896 rslt
= process_data_bucket(f
, e
);
898 APR_BUCKET_REMOVE(e
);
899 if (rslt
!= APR_SUCCESS
)
901 break; // A problem occurred. Abort the processing
905 apr_brigade_cleanup(bb
);
909 static void disk_cache_register_hook(apr_pool_t
*p
) {
910 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
911 "Registering crccache server module, (C) 2009, Toby Collett and Alex Wulms");
913 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
916 ap_register_output_filter("CRCCACHE_HEADER", crccache_server_header_filter_handler,
917 NULL, AP_FTYPE_PROTOCOL);
919 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
920 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
);
923 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
924 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
925 NULL
, /* merge per-directory config structures */
926 create_config
, /* create per-server config structure */
927 NULL
, /* merge per-server config structures */
928 disk_cache_cmds
, /* command apr_table_t */
929 disk_cache_register_hook
/* register hooks */