1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache client module
19 * This module is designed to run as a cache server on the local end of a slow
20 * internet link. This module uses a crc running hash algorithm to reduce
21 * data transfer in cached but modified upstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
26 * Contributor: Alex Wulms (2009)
34 #include <apr_file_io.h>
35 #include <apr_strings.h>
36 #include <apr_base64.h>
39 #include <apr_tables.h>
40 #include "ap_provider.h"
41 #include "util_filter.h"
42 #include "util_script.h"
43 #include "util_charset.h"
45 #include <http_protocol.h>
48 #include "ap_wrapper.h"
49 #include <crcsync/crcsync.h>
53 #include "mod_crccache_client.h"
55 static ap_filter_rec_t
*crccache_decode_filter_handle
;
56 static ap_filter_rec_t
*cache_save_filter_handle
;
57 static ap_filter_rec_t
*cache_save_subreq_filter_handle
;
59 module AP_MODULE_DECLARE_DATA crccache_client_module
;
60 APR_OPTIONAL_FN_TYPE(ap_cache_generate_key
) *cache_generate_key
;
63 static int crccache_client_post_config(apr_pool_t
*p
, apr_pool_t
*plog
,
64 apr_pool_t
*ptemp
, server_rec
*s
)
66 /* This is the means by which unusual (non-unix) os's may find alternate
67 * means to run a given command (e.g. shebang/registry parsing on Win32)
69 cache_generate_key
= APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key
);
70 if (!cache_generate_key
) {
71 cache_generate_key
= cache_generate_key_default
;
78 * Clean-up memory used by helper libraries, that don't know about apr_palloc
79 * and that (probably) use classical malloc/free
81 apr_status_t
deflate_ctx_cleanup(void *data
)
83 crccache_client_ctx
*ctx
= (crccache_client_ctx
*)data
;
87 if (ctx
->decompression_state
!= DECOMPRESSION_ENDED
)
89 inflateEnd(ctx
->decompression_stream
);
90 ctx
->decompression_state
= DECOMPRESSION_ENDED
;
98 * Reads headers from a buffer and returns an array of headers.
99 * Returns NULL on file error
100 * This routine tries to deal with too long lines and continuation lines.
101 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
102 * Is that okay, or should they be collapsed where possible?
104 apr_status_t
recall_headers(cache_handle_t
*h
, request_rec
*r
) {
111 disk_cache_object_t
*dobj
= (disk_cache_object_t
*) h
->cache_obj
->vobj
;
113 /* This case should not happen... */
115 /* XXX log message */
119 h
->req_hdrs
= apr_table_make(r
->pool
, 20);
120 h
->resp_hdrs
= apr_table_make(r
->pool
, 20);
122 /* Call routine to read the header lines/status line */
123 read_table(h
, r
, h
->resp_hdrs
, dobj
->hfd
);
124 read_table(h
, r
, h
->req_hdrs
, dobj
->hfd
);
126 e
= apr_bucket_file_create(dobj
->fd
, 0, (apr_size_t
) dobj
->file_size
, r
->pool
,
127 r
->connection
->bucket_alloc
);
130 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
132 // this will be rounded down, but thats okay
133 size_t blocksize
= len
/FULL_BLOCK_COUNT
;
134 size_t tail_block_size
= blocksize
+ len
% FULL_BLOCK_COUNT
;
135 size_t block_count_including_final_block
= FULL_BLOCK_COUNT
;// + (tail_block_size != 0);
136 // sanity check for very small files
139 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"crccache: %d blocks of %ld bytes, one block of %ld bytes",FULL_BLOCK_COUNT
-1,blocksize
,tail_block_size
);
141 crccache_client_ctx
* ctx
;
142 ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
143 ctx
->bb
= apr_brigade_create(r
->pool
, r
->connection
->bucket_alloc
);
144 ctx
->block_size
= blocksize
;
145 ctx
->tail_block_size
= tail_block_size
;
146 ctx
->state
= DECODING_NEW_SECTION
;
147 ctx
->cached_bucket
= e
;
149 // Setup inflate for decompressing non-matched literal data
150 ctx
->decompression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->decompression_stream
)));
151 ctx
->decompression_stream
->zalloc
= Z_NULL
;
152 ctx
->decompression_stream
->zfree
= Z_NULL
;
153 ctx
->decompression_stream
->opaque
= Z_NULL
;
154 ctx
->decompression_stream
->avail_in
= 0;
155 ctx
->decompression_stream
->next_in
= Z_NULL
;
156 z_RC
= inflateInit(ctx
->decompression_stream
);
159 ap_log_error(APLOG_MARK
, APLOG_WARNING
, 0, r
->server
,
160 "Can not initialize decompression engine, return code: %d", z_RC
);
163 ctx
->decompression_state
= DECOMPRESSION_INITIALIZED
;
165 // Register a cleanup function to cleanup internal libz resources
166 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
167 apr_pool_cleanup_null
);
169 // All OK to go for the crcsync decoding: add the headers
170 // and set-up the decoding filter
172 // add one for base 64 overflow and null terminator
173 char hash_set
[HASH_HEADER_SIZE
+1];
175 uint64_t crcs
[block_count_including_final_block
];
176 crc_of_blocks(data
, len
, blocksize
, HASH_SIZE
, true, crcs
);
177 // for (i = 0; i < FULL_BLOCK_COUNT - 1; i++) {
178 // crcs[i] = crc64_iso(0, &data[i*blocksize], blocksize);
180 // crcs[FULL_BLOCK_COUNT-1] = crc64_iso(0, &data[(FULL_BLOCK_COUNT-1)*blocksize], tail_block_size);
182 // swap to network byte order
183 for (i
= 0; i
< block_count_including_final_block
;++i
)
188 apr_base64_encode (hash_set
, (char *)crcs
, block_count_including_final_block
*sizeof(crcs
[0]));
189 hash_set
[HASH_HEADER_SIZE
] = '\0';
190 //apr_bucket_delete(e);
192 // TODO; bit of a safety margin here, could calculate exact size
193 const int block_header_max_size
= HASH_HEADER_SIZE
+40;
194 char block_header_txt
[block_header_max_size
];
195 snprintf(block_header_txt
, block_header_max_size
,"v=1, fs=%zu, h=%s",len
,hash_set
);
196 apr_table_set(r
->headers_in
, BLOCK_HEADER
, block_header_txt
);
197 // TODO: do we want to cache the hashes here?
199 // initialise the context for our sha1 digest of the unencoded response
200 EVP_MD_CTX_init(&ctx
->mdctx
);
201 const EVP_MD
*md
= EVP_sha1();
202 EVP_DigestInit_ex(&ctx
->mdctx
, md
, NULL
);
204 // we want to add a filter here so that we can decode the response.
205 // we need access to the original cached data when we get the response as
206 // we need that to fill in the matched blocks.
207 ap_add_output_filter_handle(crccache_decode_filter_handle
,
208 ctx
, r
, r
->connection
);
210 // TODO: why is hfd file only closed in this case?
211 apr_file_close(dobj
->hfd
);
213 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
214 "crccache_client: Recalled headers for URL %s", dobj
->name
);
219 * CACHE_DECODE filter
222 * Deliver cached content (headers and body) up the stack.
224 static int crccache_decode_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
226 request_rec
*r
= f
->r
;
227 // TODO: set up context type struct
228 crccache_client_ctx
*ctx
= f
->ctx
;
230 // if this is the first pass in decoding we should check the headers etc
231 // and fix up those headers that we modified as part of the encoding
232 if (ctx
->headers_checked
== 0)
234 ctx
->headers_checked
= 1;
236 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
237 "CRCSYNC returned status code (%d)", r
->status
);
239 // TODO: make this work if we have multiple encodings
240 const char * content_encoding
;
241 content_encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
242 if (content_encoding
== NULL
|| strcmp(CRCCACHE_ENCODING
, content_encoding
)
244 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
245 "CRCSYNC not decoding, content encoding bad (%s)", content_encoding
?content_encoding
:"NULL");
246 ap_remove_output_filter(f
);
247 return ap_pass_brigade(f
->next
, bb
);
250 // remove the encoding header
251 apr_table_unset(r
->headers_out
, ENCODING_HEADER
);
253 // remove If-Block from the Vary header
254 char * vary
= apr_pstrdup(r
->pool
, apr_table_get(r
->headers_out
, "Vary"));
257 apr_table_unset(r
->headers_out
, "Vary");
260 for (tok
= apr_strtok(vary
,", ",&last
);tok
!= NULL
;tok
= apr_strtok(NULL
,", ",&last
))
262 if (strcmp(BLOCK_HEADER
,tok
)!=0)
264 apr_table_mergen(r
->headers_out
,"Vary",tok
);
270 char * etag
= apr_pstrdup(r
->pool
, apr_table_get(r
->headers_out
, "etag"));
273 int etaglen
= strlen(etag
);
274 if (etaglen
>strlen(CRCCACHE_ENCODING
) + 1)
276 if (strcmp("-"CRCCACHE_ENCODING
,&etag
[etaglen
-(strlen(CRCCACHE_ENCODING
) + 1)])==0)
278 etag
[etaglen
-(strlen(CRCCACHE_ENCODING
) + 1)] = '\0';
279 apr_table_setn(r
->headers_out
,"etag",etag
);
287 /* Do nothing if asked to filter nothing. */
288 if (APR_BRIGADE_EMPTY(bb
)) {
289 return ap_pass_brigade(f
->next
, bb
);
292 /* We require that we have a context already, otherwise we dont have our cached file
293 * to fill in the gaps with.
296 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
297 "No context available %s", r
->uri
);
298 ap_remove_output_filter(f
);
299 return ap_pass_brigade(f
->next
, bb
);
302 while (!APR_BRIGADE_EMPTY(bb
))
307 e
= APR_BRIGADE_FIRST(bb
);
309 if (APR_BUCKET_IS_EOS(e
)) {
311 /* Remove EOS from the old list, and insert into the new. */
312 APR_BUCKET_REMOVE(e
);
313 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
315 /* This filter is done once it has served up its content */
316 ap_remove_output_filter(f
);
318 // check strong hash here
320 unsigned char md_value
[EVP_MAX_MD_SIZE
];
321 EVP_DigestFinal_ex(&ctx
->mdctx
, md_value
, &md_len
);
322 EVP_MD_CTX_cleanup(&ctx
->mdctx
);
324 if (memcmp(md_value
, ctx
->md_value_rx
, 20) != 0)
326 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCSYNC-DECODE HASH CHECK FAILED");
327 apr_brigade_cleanup(bb
);
332 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCSYNC-DECODE HASH CHECK PASSED");
335 /* Okay, we've seen the EOS.
336 * Time to pass it along down the chain.
338 return ap_pass_brigade(f
->next
, ctx
->bb
);
341 if (APR_BUCKET_IS_FLUSH(e
)) {
344 /* Remove flush bucket from old brigade anf insert into the new. */
345 APR_BUCKET_REMOVE(e
);
346 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
347 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
348 if (rv
!= APR_SUCCESS
) {
354 if (APR_BUCKET_IS_METADATA(e
)) {
356 * Remove meta data bucket from old brigade and insert into the
359 APR_BUCKET_REMOVE(e
);
360 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
365 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
366 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len);
368 apr_size_t consumed_bytes
= 0;
369 while (consumed_bytes
< len
)
371 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes);
372 // no guaruntee that our buckets line up with our encoding sections
373 // so we need a processing state machine stored in our context
376 case DECODING_NEW_SECTION
:
378 // check if we have a compressed section or a block section
379 if (data
[consumed_bytes
] == ENCODING_COMPRESSED
)
380 ctx
->state
= DECODING_COMPRESSED
;
381 else if (data
[consumed_bytes
] == ENCODING_BLOCK
)
382 ctx
->state
= DECODING_BLOCK_HEADER
;
383 else if (data
[consumed_bytes
] == ENCODING_LITERAL
)
385 ctx
->state
= DECODING_LITERAL_SIZE
;
386 ctx
->partial_literal
= NULL
;
389 else if (data
[consumed_bytes
] == ENCODING_HASH
)
391 ctx
->state
= DECODING_HASH
;
396 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,
397 "CRCSYNC-DECODE, unknown section %d(%c)",data
[consumed_bytes
],data
[consumed_bytes
]);
398 apr_brigade_cleanup(bb
);
401 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE found a new section %d",ctx->state);
405 case DECODING_BLOCK_HEADER
:
407 unsigned char block_number
= data
[consumed_bytes
];
409 ctx
->state
= DECODING_NEW_SECTION
;
411 // TODO: Output the indicated block here
412 size_t current_block_size
= block_number
< FULL_BLOCK_COUNT
-1 ? ctx
->block_size
: ctx
->tail_block_size
;
413 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
414 "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number
, current_block_size
);
416 char * buf
= apr_palloc(r
->pool
, current_block_size
);
417 const char * source_data
;
419 apr_bucket_read(ctx
->cached_bucket
, &source_data
, &source_len
, APR_BLOCK_READ
);
420 assert(block_number
< (FULL_BLOCK_COUNT
/*+ (ctx->tail_block_size != 0)*/));
421 memcpy(buf
,&source_data
[block_number
*ctx
->block_size
],current_block_size
);
422 // update our sha1 hash
423 EVP_DigestUpdate(&ctx
->mdctx
, buf
, current_block_size
);
424 apr_bucket
* b
= apr_bucket_pool_create(buf
, current_block_size
, r
->pool
, f
->c
->bucket_alloc
);
425 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
428 case DECODING_LITERAL_SIZE
:
430 unsigned avail_in
= len
- consumed_bytes
;
431 // if we havent got the full int then store the data for later
432 if (avail_in
< 4 || ctx
->rx_count
!= 0)
434 if (ctx
->partial_literal
== NULL
)
436 ctx
->partial_literal
= apr_palloc(r
->pool
, 4);
438 unsigned len_to_copy
= MIN(4-ctx
->rx_count
, avail_in
);
439 memcpy(&ctx
->partial_literal
[ctx
->rx_count
], &data
[consumed_bytes
],len_to_copy
);
440 ctx
->rx_count
+= len_to_copy
;
441 consumed_bytes
+= len_to_copy
;
443 if (ctx
->rx_count
== 4)
445 ctx
->literal_size
= ntohl(*(unsigned*)ctx
->partial_literal
);
455 ctx
->literal_size
= ntohl(*(unsigned*)&data
[consumed_bytes
]);
458 ctx
->partial_literal
= apr_palloc(r
->pool
, ctx
->literal_size
);
459 ctx
->state
= DECODING_LITERAL_BODY
;
462 case DECODING_LITERAL_BODY
:
464 unsigned avail_in
= len
- consumed_bytes
;
465 unsigned len_to_copy
= MIN(ctx
->literal_size
-ctx
->rx_count
, avail_in
);
466 memcpy(&ctx
->partial_literal
[ctx
->rx_count
], &data
[consumed_bytes
],len_to_copy
);
467 ctx
->rx_count
+= len_to_copy
;
468 consumed_bytes
+= len_to_copy
;
470 if (ctx
->rx_count
== ctx
->literal_size
)
472 EVP_DigestUpdate(&ctx
->mdctx
, ctx
->partial_literal
, ctx
->literal_size
);
473 apr_bucket
* b
= apr_bucket_pool_create((char*)ctx
->partial_literal
, ctx
->literal_size
, r
->pool
, f
->c
->bucket_alloc
);
474 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
475 ctx
->state
= DECODING_NEW_SECTION
;
482 unsigned avail_in
= len
- consumed_bytes
;
483 // 20 bytes for an SHA1 hash
484 unsigned needed
= MIN(20-ctx
->rx_count
, avail_in
);
485 memcpy(&ctx
->md_value_rx
[ctx
->rx_count
], &data
[consumed_bytes
],needed
);
486 ctx
->rx_count
+=needed
;
487 consumed_bytes
+= needed
;
488 if (ctx
->rx_count
== 20)
490 ctx
->state
= DECODING_NEW_SECTION
;
494 case DECODING_COMPRESSED
:
496 unsigned char decompressed_data_buf
[30000];
498 z_stream
*strm
= ctx
->decompression_stream
;
499 strm
->avail_in
= len
- consumed_bytes
;
500 strm
->next_in
= (Bytef
*)(data
+ consumed_bytes
);
501 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCSYNC-DECODE inflating %d bytes", strm.avail_in);
502 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, strm.next_in, strm.avail_in);
504 strm
->avail_out
= sizeof(decompressed_data_buf
);
505 strm
->next_out
= decompressed_data_buf
;
506 uInt avail_in_pre_inflate
= strm
->avail_in
;
507 z_RC
= inflate(strm
, Z_NO_FLUSH
);
508 if (z_RC
== Z_NEED_DICT
|| z_RC
== Z_DATA_ERROR
|| z_RC
== Z_MEM_ERROR
)
510 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
, "CRCSYNC-DECODE inflate error: %d", z_RC
);
511 apr_brigade_cleanup(bb
);
514 int have
= sizeof(decompressed_data_buf
) - strm
->avail_out
;
515 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
516 "CRCSYNC-DECODE inflate rslt %d, consumed %d, produced %d",
517 z_RC
, avail_in_pre_inflate
- strm
->avail_in
, have
);
521 char * buf
= apr_palloc(r
->pool
, have
);
522 memcpy(buf
,decompressed_data_buf
,have
);
523 EVP_DigestUpdate(&ctx
->mdctx
, buf
, have
);
524 apr_bucket
* b
= apr_bucket_pool_create(buf
, have
, r
->pool
, f
->c
->bucket_alloc
);
525 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
527 } while (strm
->avail_out
== 0);
528 consumed_bytes
= len
- strm
->avail_in
;
529 if (z_RC
== Z_STREAM_END
)
531 ctx
->state
= DECODING_NEW_SECTION
;
538 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,
539 "CRCSYNC-DECODE, unknown state %d, terminating transaction",ctx
->state
);
540 apr_brigade_cleanup(bb
);
541 return APR_EGENERAL
; // TODO: figure out how to pass the error on to the client
544 APR_BUCKET_REMOVE(e
);
548 apr_brigade_cleanup(bb
);
552 static void *crccache_client_create_config(apr_pool_t
*p
, server_rec
*s
) {
553 crccache_client_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_client_conf
));
554 /* array of URL prefixes for which caching is enabled */
555 conf
->cacheenable
= apr_array_make(p
, 10, sizeof(struct cache_enable
));
556 /* array of URL prefixes for which caching is disabled */
557 conf
->cachedisable
= apr_array_make(p
, 10, sizeof(struct cache_disable
));
559 /* XXX: Set default values */
560 conf
->dirlevels
= DEFAULT_DIRLEVELS
;
561 conf
->dirlength
= DEFAULT_DIRLENGTH
;
562 conf
->maxfs
= DEFAULT_MAX_FILE_SIZE
;
563 conf
->minfs
= DEFAULT_MIN_FILE_SIZE
;
565 conf
->cache_root
= NULL
;
566 conf
->cache_root_len
= 0;
572 * mod_disk_cache configuration directives handlers.
574 static const char *set_cache_root(cmd_parms
*parms
, void *in_struct_ptr
,
576 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
577 &crccache_client_module
);
578 conf
->cache_root
= arg
;
579 conf
->cache_root_len
= strlen(arg
);
580 /* TODO: canonicalize cache_root and strip off any trailing slashes */
586 * Consider eliminating the next two directives in favor of
587 * Ian's prime number hash...
588 * key = hash_fn( r->uri)
589 * filename = "/key % prime1 /key %prime2/key %prime3"
591 static const char *set_cache_dirlevels(cmd_parms
*parms
, void *in_struct_ptr
,
593 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
594 &crccache_client_module
);
597 return "CacheDirLevelsClient value must be an integer greater than 0";
598 if (val
* conf
->dirlength
> CACHEFILE_LEN
)
599 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
600 conf
->dirlevels
= val
;
603 static const char *set_cache_dirlength(cmd_parms
*parms
, void *in_struct_ptr
,
605 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
606 &crccache_client_module
);
609 return "CacheDirLengthClient value must be an integer greater than 0";
610 if (val
* conf
->dirlevels
> CACHEFILE_LEN
)
611 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
613 conf
->dirlength
= val
;
617 static const char *set_cache_minfs(cmd_parms
*parms
, void *in_struct_ptr
,
619 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
620 &crccache_client_module
);
622 if (apr_strtoff(&conf
->minfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->minfs
624 return "CacheMinFileSizeClient argument must be a non-negative integer representing the min size of a file to cache in bytes.";
629 static const char *set_cache_maxfs(cmd_parms
*parms
, void *in_struct_ptr
,
631 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
632 &crccache_client_module
);
633 if (apr_strtoff(&conf
->maxfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->maxfs
635 return "CacheMaxFileSizeClient argument must be a non-negative integer representing the max size of a file to cache in bytes.";
640 static const char *add_crc_client_enable(cmd_parms
*parms
, void *dummy
,
643 crccache_client_conf
*conf
;
644 struct cache_enable
*new;
647 (crccache_client_conf
*)ap_get_module_config(parms
->server
->module_config
,
648 &crccache_client_module
);
649 new = apr_array_push(conf
->cacheenable
);
650 if (apr_uri_parse(parms
->pool
, url
, &(new->url
))) {
654 new->pathlen
= strlen(new->url
.path
);
662 static const command_rec crccache_client_cmds
[] =
664 AP_INIT_TAKE1("CRCClientEnable", add_crc_client_enable
, NULL
, RSRC_CONF
, "A cache type and partial URL prefix below which caching is enabled"),
665 AP_INIT_TAKE1("CacheRootClient", set_cache_root
, NULL
, RSRC_CONF
,"The directory to store cache files"),
666 AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels
, NULL
, RSRC_CONF
, "The number of levels of subdirectories in the cache"),
667 AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength
, NULL
, RSRC_CONF
, "The number of characters in subdirectory names"),
668 AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs
, NULL
, RSRC_CONF
, "The minimum file size to cache a document"),
669 AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs
, NULL
, RSRC_CONF
, "The maximum file size to cache a document"),
673 int ap_run_insert_filter(request_rec
*r
);
675 int crccache_client_url_handler(request_rec
*r
, int lookup
)
678 cache_request_rec
*cache
;
679 crccache_client_conf
*conf
;
681 /* Delay initialization until we know we are handling a GET */
682 if (r
->method_number
!= M_GET
) {
686 conf
= (crccache_client_conf
*) ap_get_module_config(r
->server
->module_config
,
687 &crccache_client_module
);
689 if (conf
->cacheenable
->nelts
== 0)
692 /* make space for the per request config */
693 cache
= (cache_request_rec
*) ap_get_module_config(r
->request_config
,
694 &crccache_client_module
);
696 cache
= apr_pcalloc(r
->pool
, sizeof(cache_request_rec
));
697 ap_set_module_config(r
->request_config
, &crccache_client_module
, cache
);
701 * Are we allowed to serve cached info at all?
704 /* find certain cache controlling headers */
705 auth
= apr_table_get(r
->headers_in
, "Authorization");
707 /* First things first - does the request allow us to return
708 * cached information at all? If not, just decline the request.
715 * Add cache_save filter to cache this request. Choose
716 * the correct filter by checking if we are a subrequest
720 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
722 "Adding CACHE_SAVE_SUBREQ filter for %s",
724 ap_add_output_filter_handle(cache_save_subreq_filter_handle
,
725 NULL
, r
, r
->connection
);
728 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
729 r
->server
, "Adding CACHE_SAVE filter for %s",
731 ap_add_output_filter_handle(cache_save_filter_handle
,
732 NULL
, r
, r
->connection
);
738 if (cache_generate_key(r
, r
->pool
, &key
) != APR_SUCCESS
) {
739 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
740 r
->server
, "Failed to generate key");
743 h
= apr_palloc(r
->pool
, sizeof(cache_handle_t
));
744 if (open_entity(h
, r
, key
) != OK
)
746 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
747 r
->server
, "Failed to open entity not good");
750 if (recall_headers(h
, r
) != APR_SUCCESS
) {
751 /* TODO: Handle this error */
752 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
753 r
->server
, "Failed to recall headers");
766 * Decide whether or not this content should be cached.
767 * If we decide no it should not:
768 * remove the filter from the chain
769 * If we decide yes it should:
770 * Have we already started saving the response?
771 * If we have started, pass the data to the storage manager via store_body
773 * Check to see if we *can* save this particular response.
774 * If we can, call cache_create_entity() and save the headers and body
775 * Finally, pass the data to the next filter (the network or whatever)
778 int cache_save_filter(ap_filter_t
*f
, apr_bucket_brigade
*in
)
781 request_rec
*r
= f
->r
;
782 cache_request_rec
*cache
;
783 crccache_client_conf
*conf
;
784 //const char *cc_out, *cl;
786 const char *exps
, /* *lastmods,*/ *dates
;//, *etag;
787 apr_time_t exp
, date
,/* lastmod,*/ now
;
789 cache_info
*info
= NULL
;
793 conf
= (crccache_client_conf
*) ap_get_module_config(r
->server
->module_config
,
794 &crccache_client_module
);
796 /* Setup cache_request_rec */
797 cache
= (cache_request_rec
*) ap_get_module_config(r
->request_config
,
798 &crccache_client_module
);
800 /* user likely configured CACHE_SAVE manually; they should really use
801 * mod_cache configuration to do that
803 cache
= apr_pcalloc(r
->pool
, sizeof(cache_request_rec
));
804 ap_set_module_config(r
->request_config
, &crccache_client_module
, cache
);
812 * This section passes the brigades into the cache modules, but only
813 * if the setup section (see below) is complete.
815 if (cache
->block_response
) {
816 /* We've already sent down the response and EOS. So, ignore
817 * whatever comes now.
822 /* have we already run the cachability check and set up the
823 * cached file handle?
825 if (cache
->in_checked
) {
826 /* pass the brigades into the cache, then pass them
827 * up the filter stack
829 rv
= store_body(cache
->handle
, r
, in
);
830 if (rv
!= APR_SUCCESS
) {
831 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
832 "cache: Cache provider's store_body failed!");
833 ap_remove_output_filter(f
);
835 return ap_pass_brigade(f
->next
, in
);
839 * Setup Data in Cache
840 * -------------------
841 * This section opens the cache entity and sets various caching
842 * parameters, and decides whether this URL should be cached at
843 * all. This section is* run before the above section.
846 /* read expiry date; if a bad date, then leave it so the client can
849 exps
= apr_table_get(r
->err_headers_out
, "Expires");
851 exps
= apr_table_get(r
->headers_out
, "Expires");
854 if (APR_DATE_BAD
== (exp
= apr_date_parse_http(exps
))) {
863 * what responses should we not cache?
865 * At this point we decide based on the response headers whether it
866 * is appropriate _NOT_ to cache the data from the server. There are
867 * a whole lot of conditions that prevent us from caching this data.
868 * They are tested here one by one to be clear and unambiguous.
870 if (r
->status
!= HTTP_OK
&& r
->status
!= HTTP_NON_AUTHORITATIVE
871 && r
->status
!= HTTP_MULTIPLE_CHOICES
872 && r
->status
!= HTTP_MOVED_PERMANENTLY
873 && r
->status
!= HTTP_NOT_MODIFIED
) {
874 /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
875 * We don't cache 206, because we don't (yet) cache partial responses.
876 * We include 304 Not Modified here too as this is the origin server
877 * telling us to serve the cached copy.
885 else if (r
->status
== HTTP_NOT_MODIFIED
&&
886 !cache
->handle
&& !cache
->stale_handle
) {
887 /* if the server said 304 Not Modified but we have no cache
888 * file - pass this untouched to the user agent, it's not for us.
890 reason
= "HTTP Status 304 Not Modified";
893 else if (r
->header_only
&& !cache
->stale_handle
) {
894 /* Forbid HEAD requests unless we have it cached already */
895 reason
= "HTTP HEAD request";
898 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
899 "cache: %s not cached. Reason: %s", r
->unparsed_uri
,
902 /* remove this filter from the chain */
903 ap_remove_output_filter(f
);
905 /* ship the data up the stack */
906 return ap_pass_brigade(f
->next
, in
);
909 /* Make it so that we don't execute this path again. */
910 cache
->in_checked
= 1;
912 /* Set the content length if known.
914 cl
= apr_table_get(r
->err_headers_out
, "Content-Length");
916 cl
= apr_table_get(r
->headers_out
, "Content-Length");
920 if (apr_strtoff(&size
, cl
, &errp
, 10) || *errp
|| size
< 0) {
921 cl
= NULL
; /* parse error, see next 'if' block */
926 /* if we don't get the content-length, see if we have all the
927 * buckets and use their length to calculate the size
930 int all_buckets_here
=0;
931 int unresolved_length
= 0;
933 for (e
= APR_BRIGADE_FIRST(in
);
934 e
!= APR_BRIGADE_SENTINEL(in
);
935 e
= APR_BUCKET_NEXT(e
))
937 if (APR_BUCKET_IS_EOS(e
)) {
941 if (APR_BUCKET_IS_FLUSH(e
)) {
942 unresolved_length
= 1;
945 if (e
->length
== (apr_size_t
)-1) {
950 if (!all_buckets_here
) {
955 /* It's safe to cache the response.
957 * There are two possiblities at this point:
958 * - cache->handle == NULL. In this case there is no previously
959 * cached entity anywhere on the system. We must create a brand
960 * new entity and store the response in it.
961 * - cache->stale_handle != NULL. In this case there is a stale
962 * entity in the system which needs to be replaced by new
963 * content (unless the result was 304 Not Modified, which means
964 * the cached entity is actually fresh, and we should update
968 /* Did we have a stale cache entry that really is stale?
970 * Note that for HEAD requests, we won't get the body, so for a stale
971 * HEAD request, we don't remove the entity - instead we let the
972 * CACHE_REMOVE_URL filter remove the stale item from the cache.
974 if (cache
->stale_handle
) {
975 if (r
->status
== HTTP_NOT_MODIFIED
) {
976 /* Oh, hey. It isn't that stale! Yay! */
977 cache
->handle
= cache
->stale_handle
;
978 info
= &cache
->handle
->cache_obj
->info
;
981 else if (!r
->header_only
) {
982 /* Oh, well. Toss it. */
983 remove_entity(cache
->stale_handle
);
984 /* Treat the request as if it wasn't conditional. */
985 cache
->stale_handle
= NULL
;
987 * Restore the original request headers as they may be needed
988 * by further output filters like the byterange filter to make
989 * the correct decisions.
991 r
->headers_in
= cache
->stale_headers
;
995 /* no cache handle, create a new entity only for non-HEAD requests */
996 if (!cache
->handle
&& !r
->header_only
) {
998 cache_handle_t
*h
= apr_pcalloc(r
->pool
, sizeof(cache_handle_t
));
999 rv
= cache_generate_key(r
, r
->pool
, &key
);
1000 if (rv
!= APR_SUCCESS
) {
1003 rv
= create_entity(h
, r
, key
, size
);
1004 if (rv
!= APR_SUCCESS
) {
1008 info
= apr_pcalloc(r
->pool
, sizeof(cache_info
));
1009 /* We only set info->status upon the initial creation. */
1010 info
->status
= r
->status
;
1014 /* Caching layer declined the opportunity to cache the response */
1015 ap_remove_output_filter(f
);
1016 return ap_pass_brigade(f
->next
, in
);
1019 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
1020 "cache: Caching url: %s", r
->unparsed_uri
);
1022 /* We are actually caching this response. So it does not
1023 * make sense to remove this entity any more.
1025 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
1026 "cache: Removing CACHE_REMOVE_URL filter.");
1027 //ap_remove_output_filter(cache->remove_url_filter);
1030 * We now want to update the cache file header information with
1031 * the new date, last modified, expire and content length and write
1032 * it away to our cache file. First, we determine these values from
1033 * the response, using heuristics if appropriate.
1035 * In addition, we make HTTP/1.1 age calculations and write them away
1039 /* Read the date. Generate one if one is not supplied */
1040 dates
= apr_table_get(r
->err_headers_out
, "Date");
1041 if (dates
== NULL
) {
1042 dates
= apr_table_get(r
->headers_out
, "Date");
1044 if (dates
!= NULL
) {
1045 info
->date
= apr_date_parse_http(dates
);
1048 info
->date
= APR_DATE_BAD
;
1051 now
= apr_time_now();
1052 if (info
->date
== APR_DATE_BAD
) { /* No, or bad date */
1053 /* no date header (or bad header)! */
1058 /* set response_time for HTTP/1.1 age calculations */
1059 info
->response_time
= now
;
1061 /* get the request time */
1062 info
->request_time
= r
->request_time
;
1066 /* We found a stale entry which wasn't really stale. */
1067 if (cache
->stale_handle
) {
1068 /* Load in the saved status and clear the status line. */
1069 r
->status
= info
->status
;
1070 r
->status_line
= NULL
;
1072 /* RFC 2616 10.3.5 states that entity headers are not supposed
1073 * to be in the 304 response. Therefore, we need to combine the
1074 * response headers with the cached headers *before* we update
1075 * the cached headers.
1077 * However, before doing that, we need to first merge in
1078 * err_headers_out and we also need to strip any hop-by-hop
1079 * headers that might have snuck in.
1081 r
->headers_out
= ap_cache_cacheable_headers_out(r
);
1083 /* Merge in our cached headers. However, keep any updated values. */
1084 ap_cache_accept_headers(cache
->handle
, r
, 1);
1087 /* Write away header information to cache. It is possible that we are
1088 * trying to update headers for an entity which has already been cached.
1090 * This may fail, due to an unwritable cache area. E.g. filesystem full,
1091 * permissions problems or a read-only (re)mount. This must be handled
1094 rv
= store_headers(cache
->handle
, r
, info
);
1096 if(rv
!= APR_SUCCESS
) {
1097 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
1098 "cache: store_headers failed");
1099 ap_remove_output_filter(f
);
1101 return ap_pass_brigade(f
->next
, in
);
1104 rv
= store_body(cache
->handle
, r
, in
);
1105 if (rv
!= APR_SUCCESS
) {
1106 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
1107 "cache: store_body failed");
1108 ap_remove_output_filter(f
);
1111 return ap_pass_brigade(f
->next
, in
);
1114 static void crccache_client_register_hook(apr_pool_t
*p
) {
1115 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
1116 "Registering crccache client module, (C) 2009, Toby Collett");
1118 /* cache initializer */
1119 ap_hook_post_config(crccache_client_post_config
, NULL
, NULL
, APR_HOOK_REALLY_FIRST
);
1121 ap_hook_quick_handler(crccache_client_url_handler
, NULL
, NULL
, APR_HOOK_FIRST
);
1123 * XXX The cache filters need to run right after the handlers and before
1124 * any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
1126 * Depending on the type of request (subrequest / main request) they
1127 * need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
1128 * filters. Thus create two filter handles for each type:
1129 * cache_save_filter_handle / cache_out_filter_handle to be used by
1131 * cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
1132 * to be run by subrequest
1135 * CACHE_SAVE must go into the filter chain after a possible DEFLATE
1136 * filter to ensure that the compressed content is stored.
1137 * Incrementing filter type by 1 ensures his happens.
1138 * TODO: Revise this logic. In order for the crccache to work properly,
1139 * the plain text content must be cached and not the deflated content
1140 * Even more so, when receiving compressed content from the upstream
1141 * server, the cache_save_filter handler should uncompress it before
1142 * storing in the cache (but provide the compressed data to the client)
1144 cache_save_filter_handle
=
1145 ap_register_output_filter("CACHE_SAVE",
1148 AP_FTYPE_CONTENT_SET
+1);
1150 * CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
1151 * handle subrequsts. Decrementing filter type by 1 ensures this
1154 cache_save_subreq_filter_handle
=
1155 ap_register_output_filter("CACHE_SAVE_SUBREQ",
1158 AP_FTYPE_CONTENT_SET
-1);
1160 * CRCCACHE_DECODE must go into the filter chain after a possible DEFLATE
1161 * filter to ensure that already compressed cache objects do not
1162 * get compressed again. Incrementing filter type by 1 ensures
1165 crccache_decode_filter_handle
= ap_register_output_filter(
1166 "CRCCACHE_DECODE", crccache_decode_filter
, NULL
,
1167 AP_FTYPE_CONTENT_SET
+ 1);
1172 module AP_MODULE_DECLARE_DATA crccache_client_module
= {
1173 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
1174 NULL
, /* merge per-directory config structures */
1175 crccache_client_create_config
, /* create per-server config structure */
1176 NULL
, /* merge per-server config structures */
1177 crccache_client_cmds
, /* command apr_table_t */
1178 crccache_client_register_hook
/* register hooks */