1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache server module
19 * This module is designed to run as a proxy server on the remote end of a slow
20 * internet link. This module uses a crc32 running hash algorithm to reduce
21 * data transfer in cached but modified downstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
29 #include "apr_file_io.h"
30 #include "apr_strings.h"
31 #include "mod_cache.h"
32 #include "mod_disk_cache.h"
33 #include "ap_provider.h"
34 #include "util_filter.h"
35 #include "util_script.h"
36 #include "util_charset.h"
39 #include "mod_crccache_server.h"
41 #include <crcsync/crcsync.h>
43 const int bufferSize
= 1024;
45 module AP_MODULE_DECLARE_DATA crccache_server_module
;
47 //#define MIN(X,Y) (X<Y?X:Y)
49 static void *create_config(apr_pool_t
*p
, server_rec
*s
) {
50 crccache_server_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_server_conf
));
51 conf
->disk_cache_conf
= apr_pcalloc(p
, sizeof(disk_cache_conf
));
53 /* XXX: Set default values */
55 conf
->disk_cache_conf
->dirlevels
= DEFAULT_DIRLEVELS
;
56 conf
->disk_cache_conf
->dirlength
= DEFAULT_DIRLENGTH
;
57 conf
->disk_cache_conf
->maxfs
= DEFAULT_MAX_FILE_SIZE
;
58 conf
->disk_cache_conf
->minfs
= DEFAULT_MIN_FILE_SIZE
;
60 conf
->disk_cache_conf
->cache_root
= NULL
;
61 conf
->disk_cache_conf
->cache_root_len
= 0;
66 typedef struct crccache_ctx_t
{
67 unsigned char *buffer
;
69 apr_bucket_brigade
*bb
;
71 unsigned hashes
[BLOCK_COUNT
];
72 struct crc_context
*crcctx
;
78 * mod_disk_cache configuration directives handlers.
80 static const char *set_cache_root(cmd_parms
*parms
, void *in_struct_ptr
,
82 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
83 &crccache_server_module
);
84 conf
->disk_cache_conf
->cache_root
= arg
;
85 conf
->disk_cache_conf
->cache_root_len
= strlen(arg
);
86 /* TODO: canonicalize cache_root and strip off any trailing slashes */
92 * Only enable CRCCache Server when requested through the config file
93 * so that the user can switch CRCCache server on in a specific virtual server
95 static const char *set_crccache_server(cmd_parms
*parms
, void *dummy
, int flag
)
97 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
98 &crccache_server_module
);
105 * Consider eliminating the next two directives in favor of
106 * Ian's prime number hash...
107 * key = hash_fn( r->uri)
108 * filename = "/key % prime1 /key %prime2/key %prime3"
110 static const char *set_cache_dirlevels(cmd_parms
*parms
, void *in_struct_ptr
,
112 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
113 &crccache_server_module
);
116 return "CacheDirLevelsServer value must be an integer greater than 0";
117 if (val
* conf
->disk_cache_conf
->dirlength
> CACHEFILE_LEN
)
118 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
119 conf
->disk_cache_conf
->dirlevels
= val
;
122 static const char *set_cache_dirlength(cmd_parms
*parms
, void *in_struct_ptr
,
124 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
125 &crccache_server_module
);
128 return "CacheDirLengthServer value must be an integer greater than 0";
129 if (val
* conf
->disk_cache_conf
->dirlevels
> CACHEFILE_LEN
)
130 return "CacheDirLevelsServer*CacheDirLengthServer value must not be higher than 20";
132 conf
->disk_cache_conf
->dirlength
= val
;
136 static const char *set_cache_minfs(cmd_parms
*parms
, void *in_struct_ptr
,
138 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
139 &crccache_server_module
);
141 if (apr_strtoff(&conf
->disk_cache_conf
->minfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->minfs
143 return "CacheMinFileSizeServer argument must be a non-negative integer representing the min size of a file to cache in bytes.";
148 static const char *set_cache_maxfs(cmd_parms
*parms
, void *in_struct_ptr
,
150 crccache_server_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
151 &crccache_server_module
);
152 if (apr_strtoff(&conf
->disk_cache_conf
->maxfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->disk_cache_conf
->maxfs
154 return "CacheMaxFileSizeServer argument must be a non-negative integer representing the max size of a file to cache in bytes.";
159 static const command_rec disk_cache_cmds
[] = { AP_INIT_TAKE1("CacheRootServer", set_cache_root
, NULL
, RSRC_CONF
,
160 "The directory to store cache files"), AP_INIT_TAKE1("CacheDirLevelsServer", set_cache_dirlevels
, NULL
, RSRC_CONF
,
161 "The number of levels of subdirectories in the cache"), AP_INIT_TAKE1("CacheDirLengthServer", set_cache_dirlength
, NULL
, RSRC_CONF
,
162 "The number of characters in subdirectory names"), AP_INIT_TAKE1("CacheMinFileSizeServer", set_cache_minfs
, NULL
, RSRC_CONF
,
163 "The minimum file size to cache a document"), AP_INIT_TAKE1("CacheMaxFileSizeServer", set_cache_maxfs
, NULL
, RSRC_CONF
,
164 "The maximum file size to cache a document"), AP_INIT_FLAG("CRCcacheServer", set_crccache_server
, NULL
, RSRC_CONF
,
165 "Enable the CRCCache server in this virtual server"),{ NULL
} };
167 static ap_filter_rec_t
*crccache_out_filter_handle
;
169 static int crccache_server_header_parser_handler(request_rec
*r
) {
170 crccache_server_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
171 &crccache_server_module
);
174 const char * hashes
, *block_size_header
;
175 hashes
= apr_table_get(r
->headers_in
, "Block-Hashes");
176 block_size_header
= apr_table_get(r
->headers_in
, "Block-Size");
177 if (hashes
&& block_size_header
)
180 int ret
= sscanf(block_size_header
,"%ld",&block_size
);
183 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
, "crccache: failed to convert block size header to int, %s",block_size_header
);
187 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
, "CRCSYNC: Block-Hashes header found so enabling protocol: %s",hashes
);
188 ap_add_output_filter_handle(crccache_out_filter_handle
,
189 NULL
, r
, r
->connection
);
197 /* PR 39727: we're screwing up our clients if we leave a strong ETag
198 * header while transforming content. Henrik Nordstrom suggests
201 * Pending a more thorough review of our Etag handling, let's just
202 * implement his suggestion. It fixes the bug, or at least turns it
203 * from a showstopper to an inefficiency. And it breaks nothing that
204 * wasn't already broken.
206 static void crccache_check_etag(request_rec
*r
, const char *transform
) {
207 const char *etag
= apr_table_get(r
->headers_out
, "ETag");
208 if (etag
&& (((etag
[0] != 'W') && (etag
[0] != 'w')) || (etag
[1] != '/'))) {
209 apr_table_set(r
->headers_out
, "ETag", apr_pstrcat(r
->pool
, etag
, "-",
218 * Deliver cached content (headers and body) up the stack.
220 static int crccache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
222 request_rec
*r
= f
->r
;
223 crccache_ctx
*ctx
= f
->ctx
;
225 /* Do nothing if asked to filter nothing. */
226 if (APR_BRIGADE_EMPTY(bb
)) {
227 return ap_pass_brigade(f
->next
, bb
);
230 /* If we don't have a context, we need to ensure that it is okay to send
231 * the deflated content. If we have a context, that means we've done
232 * this before and we liked it.
233 * This could be not so nice if we always fail. But, if we succeed,
234 * we're in better shape.
238 const char *encoding
;
240 /* only work on main request/no subrequests */
241 if (r
->main
!= NULL
) {
242 ap_remove_output_filter(f
);
243 return ap_pass_brigade(f
->next
, bb
);
246 /* We can't operate on Content-Ranges */
247 if (apr_table_get(r
->headers_out
, "Content-Range") != NULL
) {
248 ap_remove_output_filter(f
);
249 return ap_pass_brigade(f
->next
, bb
);
252 /* Let's see what our current Content-Encoding is.
253 * If it's already encoded, don't compress again.
254 * (We could, but let's not.)
256 encoding
= apr_table_get(r
->headers_out
, "Content-Encoding");
257 if (encoding
&& strcasecmp(CRCCACHE_ENCODING
,encoding
) == 0)
259 /* Even if we don't accept this request based on it not having
260 * the Accept-Encoding, we need to note that we were looking
261 * for this header and downstream proxies should be aware of that.
263 apr_table_mergen(r
->headers_out
, "Vary", "Accept-Encoding");
264 ap_remove_output_filter(f
);
265 return ap_pass_brigade(f
->next
, bb
);
268 /* For a 304 or 204 response there is no entity included in
269 * the response and hence nothing to deflate. */
270 if (r
->status
== HTTP_NOT_MODIFIED
|| r
->status
==HTTP_NO_CONTENT
)
272 ap_remove_output_filter(f
);
273 return ap_pass_brigade(f
->next
, bb
);
276 /* We're cool with filtering this. */
277 ctx
= f
->ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
278 ctx
->orig_length
= 0;
280 ctx
->bb
= apr_brigade_create(r
->pool
, f
->c
->bucket_alloc
);
282 /* If the entire Content-Encoding is "identity", we can replace it. */
283 if (!encoding
|| !strcasecmp(encoding
, "identity")) {
284 apr_table_setn(r
->headers_out
, "Content-Encoding", CRCCACHE_ENCODING
);
287 apr_table_mergen(r
->headers_out
, "Content-Encoding", CRCCACHE_ENCODING
);
289 apr_table_unset(r
->headers_out
, "Content-Length");
290 apr_table_unset(r
->headers_out
, "Content-MD5");
291 crccache_check_etag(r
, CRCCACHE_ENCODING
);
293 const char * hashes
, *block_size_header
;
294 hashes
= apr_table_get(r
->headers_in
, "Block-Hashes");
295 block_size_header
= apr_table_get(r
->headers_in
, "Block-Size");
297 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
298 "crccache encoding block size %s", block_size_header
);
301 ctx
->block_size
= strtoull(block_size_header
,NULL
,0);
302 if (errno
|| ctx
->block_size
<= 0)
304 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,"crccache: failed to convert block size header to int, %s",block_size_header
);
305 ap_remove_output_filter(f
);
306 return ap_pass_brigade(f
->next
, bb
);
309 // allocate a buffer of twice our block size so we can store non matching parts of data as it comes in
310 ctx
->buffer_count
= 0;
311 ctx
->buffer
= apr_palloc(r
->pool
, ctx
->block_size
*2);
314 for (ii
= 0; ii
< BLOCK_COUNT
; ++ii
)
316 ctx
->hashes
[ii
] = decode_30bithash(&hashes
[ii
*HASH_BASE64_SIZE_TX
]);
317 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "cache: decoded hash[%d] %08X",ii,ctx->hashes[ii]);
320 // now initialise the crcsync context that will do the real work
321 ctx
->crcctx
= crc_context_new(ctx
->block_size
, HASH_SIZE
,ctx
->hashes
, BLOCK_COUNT
);
328 while (!APR_BRIGADE_EMPTY(bb
))
333 e
= APR_BRIGADE_FIRST(bb
);
335 if (APR_BUCKET_IS_EOS(e
))
337 // send one last literal if we still have unmatched data
338 if (ctx
->buffer_count
> 0)
340 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCCACHE-ENCODE final literal %ld bytes",ctx->buffer_count);
341 unsigned bucket_size
= ctx
->buffer_count
+ ENCODING_LITERAL_HEADER_SIZE
;
342 ctx
->tx_length
+= bucket_size
;
343 char * buf
= apr_palloc(r
->pool
, bucket_size
);
345 buf
[0] = ENCODING_LITERAL
;
346 *(unsigned *)&buf
[1] = htonl(ctx
->buffer_count
);
347 memcpy(&buf
[5], ctx
->buffer
,ctx
->buffer_count
);
349 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
350 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
354 // TODO: add strong hash here
357 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
358 "CRCCACHE-ENCODE complete size %f%% (encoded=%ld original=%ld",100.0*((float)ctx
->tx_length
/(float)ctx
->orig_length
),ctx
->tx_length
, ctx
->orig_length
);
361 /* Remove EOS from the old list, and insert into the new. */
362 APR_BUCKET_REMOVE(e
);
363 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
365 /* This filter is done once it has served up its content */
366 ap_remove_output_filter(f
);
368 /* Okay, we've seen the EOS.
369 * Time to pass it along down the chain.
371 return ap_pass_brigade(f
->next
, ctx
->bb
);
374 if (APR_BUCKET_IS_FLUSH(e
))
378 /* Remove flush bucket from old brigade and insert into the new. */
379 APR_BUCKET_REMOVE(e
);
380 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
381 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
382 if (rv
!= APR_SUCCESS
) {
388 if (APR_BUCKET_IS_METADATA(e
)) {
390 * Remove meta data bucket from old brigade and insert into the
393 // TODO: do we need to encode metadata
394 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
396 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
397 "CRCCACHE-ENCODE: Metadata, read %ld, %d %d %d",len
,data
[0],data
[1],data
[2]);
399 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
400 "CRCCACHE-ENCODE: Metadata, read %ld",len
);
401 APR_BUCKET_REMOVE(e
);
402 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
407 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
408 ctx
->orig_length
+= len
;
410 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"cache: running CRCCACHE_OUT filter, read %ld bytes",len);
412 // TODO: make this a little more efficient so we need to copy less data around
413 size_t bucket_used_count
= 0;
415 while(bucket_used_count
< len
)
417 const char * source_array
= data
;
418 size_t source_offset
= bucket_used_count
;
419 data_left
= len
- bucket_used_count
;
420 size_t source_length
= data_left
;
421 // if we have some data in our buffer, we need to full up the buffer until we have enough to match a block
422 if (ctx
->buffer_count
> 0 || data_left
< ctx
->block_size
)
424 size_t copy_size
= MIN(ctx
->block_size
*2-ctx
->buffer_count
,data_left
);
425 memcpy(&ctx
->buffer
[ctx
->buffer_count
],&data
[bucket_used_count
],copy_size
);
426 ctx
->buffer_count
+= copy_size
;
427 bucket_used_count
+= copy_size
;
428 data_left
= len
- bucket_used_count
;
429 source_array
= (char *)ctx
->buffer
;
431 source_length
= ctx
->buffer_count
;
432 // not enough to match a block so stop here
433 if (ctx
->buffer_count
< ctx
->block_size
)
438 size_t count
= crc_read_block(ctx
->crcctx
, &result
,
439 &source_array
[source_offset
], source_length
);;
441 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "crccache: CRCSYNC, processed %ld, used %ld bytes, result was %ld",source_length,count,result);
443 // do different things if we match a literal or block
446 // didnt match a block, send a literal
448 // if we matched all our data as a literal
449 // update our used byte count, we can only be sure that 1+count-blocksize bytes are not in a block
450 // as the tail end of the buffer could match when more data is added to it.
451 if (count
== source_length
)
453 if (count
> (ctx
->block_size
-1))
454 count
-=(ctx
->block_size
-1);
461 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE literal %ld bytes",count
);
462 unsigned bucket_size
= count
+ ENCODING_LITERAL_HEADER_SIZE
;
463 ctx
->tx_length
+= bucket_size
;
464 char * buf
= apr_palloc(r
->pool
, bucket_size
);
466 buf
[0] = ENCODING_LITERAL
;
467 *(unsigned *)&buf
[1] = htonl(count
);
468 memcpy(&buf
[5],&source_array
[source_offset
],count
);
470 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
471 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
476 // matched send a block
477 unsigned bucket_size
= ENCODING_BLOCK_HEADER_SIZE
;
478 ctx
->tx_length
+= bucket_size
;
479 char * buf
= apr_palloc(r
->pool
, bucket_size
);
481 // we used a block of data
482 count
= ctx
->block_size
;
484 buf
[0] = ENCODING_BLOCK
;
485 buf
[1] = (unsigned char) (result
* -1 - 1); // invert and get back to zero based
486 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCCACHE-ENCODE block %d",buf
[1]);
487 apr_bucket
* b
= apr_bucket_pool_create(buf
, bucket_size
, r
->pool
, f
->c
->bucket_alloc
);
488 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
492 // something odd happened here
493 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
494 "crccache: CRCSYNC, no data, processed %ld bytes, result was %ld",count
,result
);
497 if (ctx
->buffer_count
> 0)
499 // if we have used up all of our buffer, stop using it and use the bucket directly
500 if (ctx
->buffer_count
- count
< bucket_used_count
)
502 size_t extra_data
= ctx
->buffer_count
- bucket_used_count
;
503 bucket_used_count
= count
- extra_data
;
504 ctx
->buffer_count
= 0;
508 // otherwise memmove the unused data to the start of the buffer
509 memmove(ctx
->buffer
,&ctx
->buffer
[count
],ctx
->buffer_count
- count
);
510 ctx
->buffer_count
-= count
;
511 bucket_used_count
+= count
;
516 bucket_used_count
+= count
;
520 APR_BUCKET_REMOVE(e
);
524 apr_brigade_cleanup(bb
);
528 static void disk_cache_register_hook(apr_pool_t
*p
) {
529 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
530 "Registering crccache server module, (C) 2009, Toby Collett");
532 ap_hook_header_parser(crccache_server_header_parser_handler
, NULL
, NULL
,
536 * CACHE_OUT must go into the filter chain after a possible DEFLATE
537 * filter to ensure that already compressed cache objects do not
538 * get compressed again. Incrementing filter type by 1 ensures
541 crccache_out_filter_handle
= ap_register_output_filter("CRCCACHE_OUT",
542 crccache_out_filter
, NULL
, AP_FTYPE_CONTENT_SET
+ 1);
545 module AP_MODULE_DECLARE_DATA crccache_server_module
= {
546 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
547 NULL
, /* merge per-directory config structures */
548 create_config
, /* create per-server config structure */
549 NULL
, /* merge per-server config structures */
550 disk_cache_cmds
, /* command apr_table_t */
551 disk_cache_register_hook
/* register hooks */