1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* crcsync/crccache apache client module
19 * This module is designed to run as a cache server on the local end of a slow
20 * internet link. This module uses a crc running hash algorithm to reduce
21 * data transfer in cached but modified upstream files.
23 * CRC algorithm uses the crcsync library created by Rusty Russel
25 * Author: Toby Collett (2009)
26 * Contributor: Alex Wulms (2009)
34 #include <apr-1.0/apr_file_io.h>
35 #include <apr-1.0/apr_strings.h>
36 #include <apr-1.0/apr_base64.h>
37 #include <apr-1.0/apr_lib.h>
38 #include <apr-1.0/apr_date.h>
39 #include "ap_provider.h"
40 #include "util_filter.h"
41 #include "util_script.h"
42 #include "util_charset.h"
44 #include <http_protocol.h>
47 #include "ap_wrapper.h"
48 #include <crcsync/crcsync.h>
51 #include "mod_crccache_client.h"
53 static ap_filter_rec_t
*crccache_decode_filter_handle
;
55 /* Handles for cache filters, resolved at startup to eliminate
56 * a name-to-function mapping on each request
58 static ap_filter_rec_t
*cache_save_filter_handle
;
59 static ap_filter_rec_t
*cache_save_subreq_filter_handle
;
60 static ap_filter_rec_t
*cache_out_filter_handle
;
61 static ap_filter_rec_t
*cache_out_subreq_filter_handle
;
62 static ap_filter_rec_t
*cache_remove_url_filter_handle
;
65 * mod_disk_cache: Disk Based HTTP 1.1 Cache.
67 * Flow to Find the .data file:
68 * Incoming client requests URI /foo/bar/baz
69 * Generate <hash> off of /foo/bar/baz
71 * Read in <hash>.header file (may contain Format #1 or Format #2)
72 * If format #1 (Contains a list of Vary Headers):
73 * Use each header name (from .header) with our request values (headers_in) to
74 * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
75 * re-read in <hash>.header (must be format #2)
79 * apr_uint32_t format;
81 * apr_array_t vary_headers (delimited by CRLF)
84 * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
85 * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
86 * r->headers_out (delimited by CRLF)
88 * r->headers_in (delimited by CRLF)
92 module AP_MODULE_DECLARE_DATA crccache_client_module
;
93 APR_OPTIONAL_FN_TYPE(ap_cache_generate_key
) *cache_generate_key
;
96 static int cache_post_config(apr_pool_t
*p
, apr_pool_t
*plog
,
97 apr_pool_t
*ptemp
, server_rec
*s
)
99 /* This is the means by which unusual (non-unix) os's may find alternate
100 * means to run a given command (e.g. shebang/registry parsing on Win32)
102 cache_generate_key
= APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key
);
103 if (!cache_generate_key
) {
104 cache_generate_key
= cache_generate_key_default
;
111 * Local static functions
114 static char *header_file(apr_pool_t
*p
, crccache_client_conf
*conf
,
115 disk_cache_object_t
*dobj
, const char *name
) {
116 if (!dobj
->hashfile
) {
117 dobj
->hashfile
= ap_cache_generate_name(p
, conf
->dirlevels
,
118 conf
->dirlength
, name
);
122 return apr_pstrcat(p
, dobj
->prefix
, CACHE_VDIR_SUFFIX
, "/",
123 dobj
->hashfile
, CACHE_HEADER_SUFFIX
, NULL
);
125 return apr_pstrcat(p
, conf
->cache_root
, "/", dobj
->hashfile
,
126 CACHE_HEADER_SUFFIX
, NULL
);
130 static char *data_file(apr_pool_t
*p
, crccache_client_conf
*conf
,
131 disk_cache_object_t
*dobj
, const char *name
) {
132 if (!dobj
->hashfile
) {
133 dobj
->hashfile
= ap_cache_generate_name(p
, conf
->dirlevels
,
134 conf
->dirlength
, name
);
138 return apr_pstrcat(p
, dobj
->prefix
, CACHE_VDIR_SUFFIX
, "/",
139 dobj
->hashfile
, CACHE_DATA_SUFFIX
, NULL
);
141 return apr_pstrcat(p
, conf
->cache_root
, "/", dobj
->hashfile
,
142 CACHE_DATA_SUFFIX
, NULL
);
146 static void mkdir_structure(crccache_client_conf
*conf
, const char *file
,
151 for (p
= (char*) file
+ conf
->cache_root_len
+ 1;;) {
157 rv
= apr_dir_make(file
, APR_UREAD
| APR_UWRITE
| APR_UEXECUTE
, pool
);
158 if (rv
!= APR_SUCCESS
&& !APR_STATUS_IS_EEXIST(rv
)) {
166 /* htcacheclean may remove directories underneath us.
167 * So, we'll try renaming three times at a cost of 0.002 seconds.
169 static apr_status_t
safe_file_rename(crccache_client_conf
*conf
, const char *src
,
170 const char *dest
, apr_pool_t
*pool
) {
173 rv
= apr_file_rename(src
, dest
, pool
);
175 if (rv
!= APR_SUCCESS
) {
178 for (i
= 0; i
< 2 && rv
!= APR_SUCCESS
; i
++) {
179 /* 1000 micro-seconds aka 0.001 seconds. */
182 mkdir_structure(conf
, dest
, pool
);
184 rv
= apr_file_rename(src
, dest
, pool
);
191 static apr_status_t
file_cache_el_final(disk_cache_object_t
*dobj
,
193 /* move the data over */
197 apr_file_close(dobj
->tfd
);
199 /* This assumes that the tempfile is on the same file system
200 * as the cache_root. If not, then we need a file copy/move
201 * rather than a rename.
203 rv
= apr_file_rename(dobj
->tempfile
, dobj
->datafile
, r
->pool
);
204 if (rv
!= APR_SUCCESS
) {
205 ap_log_error(APLOG_MARK
, APLOG_WARNING
, rv
,r
->server
, "disk_cache: rename tempfile to datafile failed:"
206 " %s -> %s", dobj
->tempfile
, dobj
->datafile
);
207 apr_file_remove(dobj
->tempfile
, r
->pool
);
216 static apr_status_t
file_cache_errorcleanup(disk_cache_object_t
*dobj
,
218 /* Remove the header file and the body file. */
219 apr_file_remove(dobj
->hdrsfile
, r
->pool
);
220 apr_file_remove(dobj
->datafile
, r
->pool
);
222 /* If we opened the temporary data file, close and remove it. */
224 apr_file_close(dobj
->tfd
);
225 apr_file_remove(dobj
->tempfile
, r
->pool
);
232 /* These two functions get and put state information into the data
233 * file for an ap_cache_el, this state information will be read
234 * and written transparent to clients of this module
236 static int file_cache_recall_mydata(apr_file_t
*fd
, cache_info
*info
,
237 disk_cache_object_t
*dobj
, request_rec
*r
) {
240 disk_cache_info_t disk_info
;
243 /* read the data from the cache file */
244 len
= sizeof(disk_cache_info_t
);
245 rv
= apr_file_read_full(fd
, &disk_info
, len
, &len
);
246 if (rv
!= APR_SUCCESS
) {
250 /* Store it away so we can get it later. */
251 dobj
->disk_info
= disk_info
;
253 info
->status
= disk_info
.status
;
254 info
->date
= disk_info
.date
;
255 info
->expire
= disk_info
.expire
;
256 info
->request_time
= disk_info
.request_time
;
257 info
->response_time
= disk_info
.response_time
;
259 /* Note that we could optimize this by conditionally doing the palloc
260 * depending upon the size. */
261 urlbuff
= apr_palloc(r
->pool
, disk_info
.name_len
+ 1);
262 len
= disk_info
.name_len
;
263 rv
= apr_file_read_full(fd
, urlbuff
, len
, &len
);
264 if (rv
!= APR_SUCCESS
) {
267 urlbuff
[disk_info
.name_len
] = '\0';
269 /* check that we have the same URL */
270 /* Would strncmp be correct? */
271 if (strcmp(urlbuff
, dobj
->name
) != 0) {
278 static const char* regen_key(apr_pool_t
*p
, apr_table_t
*headers
,
279 apr_array_header_t
*varray
, const char *oldkey
) {
286 nvec
= (varray
->nelts
* 2) + 1;
287 iov
= apr_palloc(p
, sizeof(struct iovec
) * nvec
);
288 elts
= (const char **) varray
->elts
;
291 * - Handle multiple-value headers better. (sort them?)
292 * - Handle Case in-sensitive Values better.
293 * This isn't the end of the world, since it just lowers the cache
294 * hit rate, but it would be nice to fix.
296 * The majority are case insenstive if they are values (encoding etc).
297 * Most of rfc2616 is case insensitive on header contents.
299 * So the better solution may be to identify headers which should be
300 * treated case-sensitive?
301 * HTTP URI's (3.2.3) [host and scheme are insensitive]
302 * HTTP method (5.1.1)
303 * HTTP-date values (3.3.1)
304 * 3.7 Media Types [exerpt]
305 * The type, subtype, and parameter attribute names are case-
306 * insensitive. Parameter values might or might not be case-sensitive,
307 * depending on the semantics of the parameter name.
308 * 4.20 Except [exerpt]
309 * Comparison of expectation values is case-insensitive for unquoted
310 * tokens (including the 100-continue token), and is case-sensitive for
311 * quoted-string expectation-extensions.
314 for (i
= 0, k
= 0; i
< varray
->nelts
; i
++) {
315 header
= apr_table_get(headers
, elts
[i
]);
319 iov
[k
].iov_base
= (char*) elts
[i
];
320 iov
[k
].iov_len
= strlen(elts
[i
]);
322 iov
[k
].iov_base
= (char*) header
;
323 iov
[k
].iov_len
= strlen(header
);
326 iov
[k
].iov_base
= (char*) oldkey
;
327 iov
[k
].iov_len
= strlen(oldkey
);
330 return apr_pstrcatv(p
, iov
, k
, NULL
);
333 static int array_alphasort(const void *fn1
, const void *fn2
) {
334 return strcmp(*(char**) fn1
, *(char**) fn2
);
337 static void tokens_to_array(apr_pool_t
*p
, const char *data
,
338 apr_array_header_t
*arr
) {
341 while ((token
= ap_get_list_item(p
, &data
)) != NULL
) {
342 *((const char **) apr_array_push(arr
)) = token
;
345 /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
346 qsort((void *) arr
->elts
, arr
->nelts
, sizeof(char *), array_alphasort
);
350 * Hook and mod_cache callback functions
352 int create_entity(cache_handle_t
*h
, request_rec
*r
, const char *key
,
354 crccache_client_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
355 &crccache_client_module
);
357 disk_cache_object_t
*dobj
;
359 if (conf
->cache_root
== NULL
) {
363 /* Allocate and initialize cache_object_t and disk_cache_object_t */
364 h
->cache_obj
= obj
= apr_pcalloc(r
->pool
, sizeof(*obj
));
365 obj
->vobj
= dobj
= apr_pcalloc(r
->pool
, sizeof(*dobj
));
367 obj
->key
= apr_pstrdup(r
->pool
, key
);
369 dobj
->name
= obj
->key
;
371 /* Save the cache root */
372 dobj
->root
= apr_pstrndup(r
->pool
, conf
->cache_root
, conf
->cache_root_len
);
373 dobj
->root_len
= conf
->cache_root_len
;
374 dobj
->datafile
= data_file(r
->pool
, conf
, dobj
, key
);
375 dobj
->hdrsfile
= header_file(r
->pool
, conf
, dobj
, key
);
376 dobj
->tempfile
= apr_pstrcat(r
->pool
, conf
->cache_root
, AP_TEMPFILE
, NULL
);
381 int open_entity(cache_handle_t
*h
, request_rec
*r
, const char *key
) {
386 static int error_logged
= 0;
387 crccache_client_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
388 &crccache_client_module
);
392 disk_cache_object_t
*dobj
;
396 /* Look up entity keyed to 'url' */
397 if (conf
->cache_root
== NULL
) {
400 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,
401 "disk_cache: Cannot cache files to disk without a CacheRootClient specified.");
406 /* Create and init the cache object */
407 h
->cache_obj
= obj
= apr_pcalloc(r
->pool
, sizeof(cache_object_t
));
408 obj
->vobj
= dobj
= apr_pcalloc(r
->pool
, sizeof(disk_cache_object_t
));
412 /* Open the headers file */
415 /* Save the cache root */
416 dobj
->root
= apr_pstrndup(r
->pool
, conf
->cache_root
, conf
->cache_root_len
);
417 dobj
->root_len
= conf
->cache_root_len
;
419 dobj
->hdrsfile
= header_file(r
->pool
, conf
, dobj
, key
);
420 flags
= APR_READ
|APR_BINARY
|APR_BUFFERED
;
421 rc
= apr_file_open(&dobj
->hfd
, dobj
->hdrsfile
, flags
, 0, r
->pool
);
422 if (rc
!= APR_SUCCESS
) {
426 /* read the format from the cache file */
427 len
= sizeof(format
);
428 apr_file_read_full(dobj
->hfd
, &format
, len
, &len
);
430 if (format
== VARY_FORMAT_VERSION
) {
431 apr_array_header_t
* varray
;
434 len
= sizeof(expire
);
435 apr_file_read_full(dobj
->hfd
, &expire
, len
, &len
);
437 varray
= apr_array_make(r
->pool
, 5, sizeof(char*));
438 rc
= read_array(r
, varray
, dobj
->hfd
);
439 if (rc
!= APR_SUCCESS
) {
440 ap_log_error(APLOG_MARK
, APLOG_ERR
, rc
, r
->server
,
441 "disk_cache: Cannot parse vary header file: %s",
445 apr_file_close(dobj
->hfd
);
447 nkey
= regen_key(r
->pool
, r
->headers_in
, varray
, key
);
449 dobj
->hashfile
= NULL
;
450 dobj
->prefix
= dobj
->hdrsfile
;
451 dobj
->hdrsfile
= header_file(r
->pool
, conf
, dobj
, nkey
);
453 flags
= APR_READ
|APR_BINARY
|APR_BUFFERED
;
454 rc
= apr_file_open(&dobj
->hfd
, dobj
->hdrsfile
, flags
, 0, r
->pool
);
455 if (rc
!= APR_SUCCESS
) {
459 else if (format
!= DISK_FORMAT_VERSION
) {
460 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,
461 "cache_disk: File '%s' has a version mismatch. File had version: %d.",
462 dobj
->hdrsfile
, format
);
466 apr_off_t offset
= 0;
467 /* This wasn't a Vary Format file, so we must seek to the
468 * start of the file again, so that later reads work.
470 apr_file_seek(dobj
->hfd
, APR_SET
, &offset
);
477 dobj
->datafile
= data_file(r
->pool
, conf
, dobj
, nkey
);
478 dobj
->tempfile
= apr_pstrcat(r
->pool
, conf
->cache_root
, AP_TEMPFILE
, NULL
);
480 /* Open the data file */
481 flags
= APR_READ
|APR_BINARY
;
482 #ifdef APR_SENDFILE_ENABLED
483 flags
|= APR_SENDFILE_ENABLED
;
485 rc
= apr_file_open(&dobj
->fd
, dobj
->datafile
, flags
, 0, r
->pool
);
486 if (rc
!= APR_SUCCESS
) {
487 /* XXX: Log message */
491 rc
= apr_file_info_get(&finfo
, APR_FINFO_SIZE
, dobj
->fd
);
492 if (rc
== APR_SUCCESS
) {
493 dobj
->file_size
= finfo
.size
;
496 /* Read the bytes to setup the cache_info fields */
497 rc
= file_cache_recall_mydata(dobj
->hfd
, info
, dobj
, r
);
498 if (rc
!= APR_SUCCESS
) {
499 /* XXX log message */
503 /* Initialize the cache_handle callback functions */
504 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
505 "disk_cache: Recalled cached URL info header %s", dobj
->name
);
509 int remove_entity(cache_handle_t
*h
) {
510 /* Null out the cache object pointer so next time we start from scratch */
516 int remove_url(cache_handle_t
*h
, apr_pool_t
*p
) {
518 disk_cache_object_t
*dobj
;
520 /* Get disk cache object from cache handle */
521 dobj
= (disk_cache_object_t
*) h
->cache_obj
->vobj
;
526 /* Delete headers file */
527 if (dobj
->hdrsfile
) {
528 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, NULL
,
529 "disk_cache: Deleting %s from cache.", dobj
->hdrsfile
);
531 rc
= apr_file_remove(dobj
->hdrsfile
, p
);
532 if ((rc
!= APR_SUCCESS
) && !APR_STATUS_IS_ENOENT(rc
)) {
533 /* Will only result in an output if httpd is started with -e debug.
534 * For reason see log_error_core for the case s == NULL.
536 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rc
, NULL
,
537 "disk_cache: Failed to delete headers file %s from cache.",
543 /* Delete data file */
544 if (dobj
->datafile
) {
545 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, NULL
,
546 "disk_cache: Deleting %s from cache.", dobj
->datafile
);
548 rc
= apr_file_remove(dobj
->datafile
, p
);
549 if ((rc
!= APR_SUCCESS
) && !APR_STATUS_IS_ENOENT(rc
)) {
550 /* Will only result in an output if httpd is started with -e debug.
551 * For reason see log_error_core for the case s == NULL.
553 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rc
, NULL
,
554 "disk_cache: Failed to delete data file %s from cache.",
560 /* now delete directories as far as possible up to our cache root */
562 const char *str_to_copy
;
564 str_to_copy
= dobj
->hdrsfile
? dobj
->hdrsfile
: dobj
->datafile
;
566 char *dir
, *slash
, *q
;
568 dir
= apr_pstrdup(p
, str_to_copy
);
570 /* remove filename */
571 slash
= strrchr(dir
, '/');
575 * now walk our way back to the cache root, delete everything
576 * in the way as far as possible
578 * Note: due to the way we constructed the file names in
579 * header_file and data_file, we are guaranteed that the
580 * cache_root is suffixed by at least one '/' which will be
581 * turned into a terminating null by this loop. Therefore,
582 * we won't either delete or go above our cache root.
584 for (q
= dir
+ dobj
->root_len
; *q
; ) {
585 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, NULL
,
586 "disk_cache: Deleting directory %s from cache",
589 rc
= apr_dir_remove(dir
, p
);
590 if (rc
!= APR_SUCCESS
&& !APR_STATUS_IS_ENOENT(rc
)) {
593 slash
= strrchr(q
, '/');
602 apr_status_t
read_array(request_rec
*r
, apr_array_header_t
* arr
,
604 char w
[MAX_STRING_LEN
];
609 rv
= apr_file_gets(w
, MAX_STRING_LEN
- 1, file
);
610 if (rv
!= APR_SUCCESS
) {
611 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
612 "Premature end of vary array.");
617 if (p
> 0 && w
[p
- 1] == '\n') {
618 if (p
> 1 && w
[p
- 2] == CR
) {
626 /* If we've finished reading the array, break out of the loop. */
631 *((const char **) apr_array_push(arr
)) = apr_pstrdup(r
->pool
, w
);
637 static apr_status_t
store_array(apr_file_t
*fd
, apr_array_header_t
* arr
) {
644 elts
= (const char **) arr
->elts
;
646 for (i
= 0; i
< arr
->nelts
; i
++) {
647 iov
[0].iov_base
= (char*) elts
[i
];
648 iov
[0].iov_len
= strlen(elts
[i
]);
649 iov
[1].iov_base
= CRLF
;
650 iov
[1].iov_len
= sizeof(CRLF
) - 1;
652 rv
= apr_file_writev(fd
, (const struct iovec
*) &iov
, 2,
654 if (rv
!= APR_SUCCESS
) {
659 iov
[0].iov_base
= CRLF
;
660 iov
[0].iov_len
= sizeof(CRLF
) - 1;
662 return apr_file_writev(fd
, (const struct iovec
*) &iov
, 1,
666 apr_status_t
read_table(cache_handle_t
*handle
, request_rec
*r
,
667 apr_table_t
*table
, apr_file_t
*file
) {
668 char w
[MAX_STRING_LEN
];
675 /* ### What about APR_EOF? */
676 rv
= apr_file_gets(w
, MAX_STRING_LEN
- 1, file
);
677 if (rv
!= APR_SUCCESS
) {
678 ap_log_rerror(APLOG_MARK
, APLOG_ERR
, 0, r
,
679 "Premature end of cache headers.");
683 /* Delete terminal (CR?)LF */
686 /* Indeed, the host's '\n':
687 '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
688 -- whatever the script generates.
690 if (p
> 0 && w
[p
- 1] == '\n') {
691 if (p
> 1 && w
[p
- 2] == CR
) {
699 /* If we've finished reading the headers, break out of the loop. */
704 #if APR_CHARSET_EBCDIC
705 /* Chances are that we received an ASCII header text instead of
706 * the expected EBCDIC header lines. Try to auto-detect:
708 if (!(l
= strchr(w
, ':'))) {
709 int maybeASCII
= 0, maybeEBCDIC
= 0;
710 unsigned char *cp
, native
;
711 apr_size_t inbytes_left
, outbytes_left
;
713 for (cp
= w
; *cp
!= '\0'; ++cp
) {
714 native
= apr_xlate_conv_byte(ap_hdrs_from_ascii
, *cp
);
715 if (apr_isprint(*cp
) && !apr_isprint(native
))
717 if (!apr_isprint(*cp
) && apr_isprint(native
))
720 if (maybeASCII
> maybeEBCDIC
) {
721 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,
722 "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
724 inbytes_left
= outbytes_left
= cp
- w
;
725 apr_xlate_conv_buffer(ap_hdrs_from_ascii
,
726 w
, &inbytes_left
, w
, &outbytes_left
);
729 #endif /*APR_CHARSET_EBCDIC*/
731 /* if we see a bogus header don't ignore it. Shout and scream */
732 if (!(l
= strchr(w
, ':'))) {
737 while (*l
&& apr_isspace(*l
)) {
741 apr_table_add(table
, w
, l
);
748 * Clean-up memory used by helper libraries, that don't know about apr_palloc
749 * and that (probably) use classical malloc/free
751 apr_status_t
deflate_ctx_cleanup(void *data
)
753 crccache_client_ctx
*ctx
= (crccache_client_ctx
*)data
;
757 if (ctx
->decompression_state
!= DECOMPRESSION_ENDED
)
759 inflateEnd(ctx
->decompression_stream
);
760 ctx
->decompression_state
= DECOMPRESSION_ENDED
;
768 * Reads headers from a buffer and returns an array of headers.
769 * Returns NULL on file error
770 * This routine tries to deal with too long lines and continuation lines.
771 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
772 * Is that okay, or should they be collapsed where possible?
774 apr_status_t
recall_headers(cache_handle_t
*h
, request_rec
*r
) {
781 disk_cache_object_t
*dobj
= (disk_cache_object_t
*) h
->cache_obj
->vobj
;
783 /* This case should not happen... */
785 /* XXX log message */
789 h
->req_hdrs
= apr_table_make(r
->pool
, 20);
790 h
->resp_hdrs
= apr_table_make(r
->pool
, 20);
792 /* Call routine to read the header lines/status line */
793 read_table(h
, r
, h
->resp_hdrs
, dobj
->hfd
);
794 read_table(h
, r
, h
->req_hdrs
, dobj
->hfd
);
796 // TODO: We only really want to add our block hashes if the cache is not fresh
797 // TODO: We could achieve that by adding a filter here on sending the request
798 // and then doing all of this in the filter 'JIT'
799 e
= apr_bucket_file_create(dobj
->fd
, 0, (apr_size_t
) dobj
->file_size
, r
->pool
,
800 r
->connection
->bucket_alloc
);
803 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
805 // this will be rounded down, but thats okay
806 // TODO: I think that we should just add % to the trailing block, otherwise our extra block
807 // is always limited to max of BLOCK_COUNT size.
808 size_t blocksize
= len
/FULL_BLOCK_COUNT
;
809 size_t tail_block_size
= len
% FULL_BLOCK_COUNT
;
810 size_t block_count_including_final_block
= FULL_BLOCK_COUNT
+ (tail_block_size
!= 0);
811 // sanity check for very small files
814 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"crccache: %d blocks of %ld bytes",FULL_BLOCK_COUNT,blocksize);
816 crccache_client_ctx
* ctx
;
817 ctx
= apr_pcalloc(r
->pool
, sizeof(*ctx
));
818 ctx
->bb
= apr_brigade_create(r
->pool
, r
->connection
->bucket_alloc
);
819 ctx
->block_size
= blocksize
;
820 ctx
->tail_block_size
= tail_block_size
;
821 ctx
->state
= DECODING_NEW_SECTION
;
822 ctx
->cached_bucket
= e
;
824 // Setup inflate for decompressing non-matched literal data
825 ctx
->decompression_stream
= apr_palloc(r
->pool
, sizeof(*(ctx
->decompression_stream
)));
826 ctx
->decompression_stream
->zalloc
= Z_NULL
;
827 ctx
->decompression_stream
->zfree
= Z_NULL
;
828 ctx
->decompression_stream
->opaque
= Z_NULL
;
829 ctx
->decompression_stream
->avail_in
= 0;
830 ctx
->decompression_stream
->next_in
= Z_NULL
;
831 z_RC
= inflateInit(ctx
->decompression_stream
);
834 ap_log_error(APLOG_MARK
, APLOG_WARNING
, 0, r
->server
,
835 "Can not initialize decompression engine, return code: %d", z_RC
);
838 ctx
->decompression_state
= DECOMPRESSION_INITIALIZED
;
840 // Register a cleanup function to cleanup internal libz resources
841 apr_pool_cleanup_register(r
->pool
, ctx
, deflate_ctx_cleanup
,
842 apr_pool_cleanup_null
);
844 // All OK to go for the crcsync decoding: add the headers
845 // and set-up the decoding filter
847 // add one for base 64 overflow and null terminator
848 char hash_set
[HASH_HEADER_SIZE
+1];
850 uint64_t crcs
[block_count_including_final_block
];
851 crc_of_blocks(data
, len
, blocksize
, HASH_SIZE
, crcs
);
853 // swap to network byte order
854 for (i
= 0; i
< block_count_including_final_block
;++i
)
859 apr_base64_encode (hash_set
, (char *)crcs
, block_count_including_final_block
*sizeof(crcs
[0]));
860 hash_set
[HASH_HEADER_SIZE
] = '\0';
861 //apr_bucket_delete(e);
863 // TODO; bit of a safety margin here, could calculate exact size
864 const int block_header_max_size
= HASH_HEADER_SIZE
+32;
865 char block_header_txt
[block_header_max_size
];
866 snprintf(block_header_txt
, block_header_max_size
,"fs=%zu, h=%s",len
,hash_set
);
867 apr_table_set(r
->headers_in
, BLOCK_HEADER
, block_header_txt
);
868 // TODO: do we want to cache the hashes here?
870 // initialise the context for our sha1 digest of the unencoded response
871 EVP_MD_CTX_init(&ctx
->mdctx
);
872 const EVP_MD
*md
= EVP_sha1();
873 EVP_DigestInit_ex(&ctx
->mdctx
, md
, NULL
);
875 // we want to add a filter here so that we can decode the response.
876 // we need access to the original cached data when we get the response as
877 // we need that to fill in the matched blocks.
878 ap_add_output_filter_handle(crccache_decode_filter_handle
,
879 ctx
, r
, r
->connection
);
881 // TODO: why is hfd file only closed in this case?
882 apr_file_close(dobj
->hfd
);
884 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
885 "crccache_client: Recalled headers for URL %s", dobj
->name
);
889 apr_status_t
recall_body(cache_handle_t
*h
, apr_pool_t
*p
,
890 apr_bucket_brigade
*bb
) {
892 disk_cache_object_t
*dobj
= (disk_cache_object_t
*) h
->cache_obj
->vobj
;
894 e
= apr_bucket_file_create(dobj
->fd
, 0, (apr_size_t
) dobj
->file_size
, p
,
897 APR_BRIGADE_INSERT_HEAD(bb
, e
);
898 e
= apr_bucket_eos_create(bb
->bucket_alloc
);
899 APR_BRIGADE_INSERT_TAIL(bb
, e
);
901 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, NULL
, "crccache_client: Recalled body for URL %s", dobj
->name
);
905 apr_status_t
store_table(apr_file_t
*fd
, apr_table_t
*table
) {
910 apr_table_entry_t
*elts
;
912 elts
= (apr_table_entry_t
*) apr_table_elts(table
)->elts
;
913 for (i
= 0; i
< apr_table_elts(table
)->nelts
; ++i
) {
914 if (elts
[i
].key
!= NULL
) {
915 iov
[0].iov_base
= elts
[i
].key
;
916 iov
[0].iov_len
= strlen(elts
[i
].key
);
917 iov
[1].iov_base
= ": ";
918 iov
[1].iov_len
= sizeof(": ") - 1;
919 iov
[2].iov_base
= elts
[i
].val
;
920 iov
[2].iov_len
= strlen(elts
[i
].val
);
921 iov
[3].iov_base
= CRLF
;
922 iov
[3].iov_len
= sizeof(CRLF
) - 1;
924 rv
= apr_file_writev(fd
, (const struct iovec
*) &iov
, 4,
926 if (rv
!= APR_SUCCESS
) {
931 iov
[0].iov_base
= CRLF
;
932 iov
[0].iov_len
= sizeof(CRLF
) - 1;
933 rv
= apr_file_writev(fd
, (const struct iovec
*) &iov
, 1,
938 apr_status_t
store_headers(cache_handle_t
*h
, request_rec
*r
,
940 crccache_client_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
941 &crccache_client_module
);
945 disk_cache_object_t
*dobj
= (disk_cache_object_t
*) h
->cache_obj
->vobj
;
947 disk_cache_info_t disk_info
;
950 /* This is flaky... we need to manage the cache_info differently */
951 h
->cache_obj
->info
= *info
;
953 if (r
->headers_out
) {
956 tmp
= apr_table_get(r
->headers_out
, "Vary");
959 apr_array_header_t
* varray
;
960 apr_uint32_t format
= VARY_FORMAT_VERSION
;
962 /* If we were initially opened as a vary format, rollback
963 * that internal state for the moment so we can recreate the
964 * vary format hints in the appropriate directory.
967 dobj
->hdrsfile
= dobj
->prefix
;
971 mkdir_structure(conf
, dobj
->hdrsfile
, r
->pool
);
973 rv
= apr_file_mktemp(&dobj
->tfd
, dobj
->tempfile
,
974 APR_CREATE
| APR_WRITE
| APR_BINARY
| APR_EXCL
,
977 if (rv
!= APR_SUCCESS
) {
981 amt
= sizeof(format
);
982 apr_file_write(dobj
->tfd
, &format
, &amt
);
984 amt
= sizeof(info
->expire
);
985 apr_file_write(dobj
->tfd
, &info
->expire
, &amt
);
987 varray
= apr_array_make(r
->pool
, 6, sizeof(char*));
988 tokens_to_array(r
->pool
, tmp
, varray
);
990 store_array(dobj
->tfd
, varray
);
992 apr_file_close(dobj
->tfd
);
996 rv
= safe_file_rename(conf
, dobj
->tempfile
, dobj
->hdrsfile
,
998 if (rv
!= APR_SUCCESS
) {
999 ap_log_error(APLOG_MARK
, APLOG_WARNING
, rv
, r
->server
,
1000 "disk_cache: rename tempfile to varyfile failed: %s -> %s",
1001 dobj
->tempfile
, dobj
->hdrsfile
);
1002 apr_file_remove(dobj
->tempfile
, r
->pool
);
1006 dobj
->tempfile
= apr_pstrcat(r
->pool
, conf
->cache_root
, AP_TEMPFILE
, NULL
);
1007 tmp
= regen_key(r
->pool
, r
->headers_in
, varray
, dobj
->name
);
1008 dobj
->prefix
= dobj
->hdrsfile
;
1009 dobj
->hashfile
= NULL
;
1010 dobj
->datafile
= data_file(r
->pool
, conf
, dobj
, tmp
);
1011 dobj
->hdrsfile
= header_file(r
->pool
, conf
, dobj
, tmp
);
1016 rv
= apr_file_mktemp(&dobj
->hfd
, dobj
->tempfile
,
1017 APR_CREATE
| APR_WRITE
| APR_BINARY
|
1018 APR_BUFFERED
| APR_EXCL
, r
->pool
);
1020 if (rv
!= APR_SUCCESS
) {
1024 disk_info
.format
= DISK_FORMAT_VERSION
;
1025 disk_info
.date
= info
->date
;
1026 disk_info
.expire
= info
->expire
;
1027 disk_info
.entity_version
= dobj
->disk_info
.entity_version
++;
1028 disk_info
.request_time
= info
->request_time
;
1029 disk_info
.response_time
= info
->response_time
;
1030 disk_info
.status
= info
->status
;
1032 disk_info
.name_len
= strlen(dobj
->name
);
1034 iov
[0].iov_base
= (void*)&disk_info
;
1035 iov
[0].iov_len
= sizeof(disk_cache_info_t
);
1036 iov
[1].iov_base
= (void*)dobj
->name
;
1037 iov
[1].iov_len
= disk_info
.name_len
;
1039 rv
= apr_file_writev(dobj
->hfd
, (const struct iovec
*) &iov
, 2, &amt
);
1040 if (rv
!= APR_SUCCESS
) {
1044 if (r
->headers_out
) {
1045 apr_table_t
*headers_out
;
1047 headers_out
= ap_cache_cacheable_hdrs_out(r
->pool
, r
->headers_out
,
1050 if (!apr_table_get(headers_out
, "Content-Type")
1051 && r
->content_type
) {
1052 apr_table_setn(headers_out
, "Content-Type",
1053 ap_make_content_type(r
, r
->content_type
));
1056 headers_out
= apr_table_overlay(r
->pool
, headers_out
,
1057 r
->err_headers_out
);
1058 rv
= store_table(dobj
->hfd
, headers_out
);
1059 if (rv
!= APR_SUCCESS
) {
1064 /* Parse the vary header and dump those fields from the headers_in. */
1065 /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
1066 if (r
->headers_in
) {
1067 apr_table_t
*headers_in
;
1069 headers_in
= ap_cache_cacheable_hdrs_out(r
->pool
, r
->headers_in
,
1071 rv
= store_table(dobj
->hfd
, headers_in
);
1072 if (rv
!= APR_SUCCESS
) {
1077 apr_file_close(dobj
->hfd
); /* flush and close */
1079 /* Remove old file with the same name. If remove fails, then
1080 * perhaps we need to create the directory tree where we are
1081 * about to write the new headers file.
1083 rv
= apr_file_remove(dobj
->hdrsfile
, r
->pool
);
1084 if (rv
!= APR_SUCCESS
) {
1085 mkdir_structure(conf
, dobj
->hdrsfile
, r
->pool
);
1088 rv
= safe_file_rename(conf
, dobj
->tempfile
, dobj
->hdrsfile
, r
->pool
);
1089 if (rv
!= APR_SUCCESS
) {
1090 ap_log_error(APLOG_MARK
, APLOG_WARNING
, rv
, r
->server
,
1091 "disk_cache: rename tempfile to hdrsfile failed: %s -> %s",
1092 dobj
->tempfile
, dobj
->hdrsfile
);
1093 apr_file_remove(dobj
->tempfile
, r
->pool
);
1097 dobj
->tempfile
= apr_pstrcat(r
->pool
, conf
->cache_root
, AP_TEMPFILE
, NULL
);
1099 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
1100 "disk_cache: Stored headers for URL %s", dobj
->name
);
1104 apr_status_t
store_body(cache_handle_t
*h
, request_rec
*r
,
1105 apr_bucket_brigade
*bb
) {
1109 disk_cache_object_t
*dobj
= (disk_cache_object_t
*) h
->cache_obj
->vobj
;
1110 crccache_client_conf
*conf
= ap_get_module_config(r
->server
->module_config
,
1111 &crccache_client_module
);
1113 /* We write to a temp file and then atomically rename the file over
1114 * in file_cache_el_final().
1117 rv
= apr_file_mktemp(&dobj
->tfd
, dobj
->tempfile
, APR_CREATE
| APR_WRITE
1118 | APR_BINARY
| APR_BUFFERED
| APR_EXCL
, r
->pool
);
1119 if (rv
!= APR_SUCCESS
) {
1122 dobj
->file_size
= 0;
1125 for (e
= APR_BRIGADE_FIRST(bb
); e
!= APR_BRIGADE_SENTINEL(bb
); e
= APR_BUCKET_NEXT(e
)) {
1127 apr_size_t length
, written
;
1128 rv
= apr_bucket_read(e
, &str
, &length
, APR_BLOCK_READ
);
1129 if (rv
!= APR_SUCCESS
) {
1130 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,
1131 "cache_disk: Error when reading bucket for URL %s",
1133 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1134 file_cache_errorcleanup(dobj
, r
);
1137 rv
= apr_file_write_full(dobj
->tfd
, str
, length
, &written
);
1138 if (rv
!= APR_SUCCESS
) {
1139 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,
1140 "cache_disk: Error when writing cache file for URL %s",
1142 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1143 file_cache_errorcleanup(dobj
, r
);
1146 dobj
->file_size
+= written
;
1147 if (dobj
->file_size
> conf
->maxfs
) {
1148 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
1149 "cache_disk: URL %s failed the size check "
1150 "(%" APR_OFF_T_FMT
" > %" APR_OFF_T_FMT
")",
1151 h
->cache_obj
->key
, dobj
->file_size
, conf
->maxfs
);
1152 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1153 file_cache_errorcleanup(dobj
, r
);
1154 return APR_EGENERAL
;
1158 /* Was this the final bucket? If yes, close the temp file and perform
1161 if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb
))) {
1162 if (r
->connection
->aborted
|| r
->no_cache
) {
1163 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, r
->server
,
1164 "disk_cache: Discarding body for URL %s "
1165 "because connection has been aborted.",
1167 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1168 file_cache_errorcleanup(dobj
, r
);
1169 return APR_EGENERAL
;
1171 if (dobj
->file_size
< conf
->minfs
) {
1172 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
1173 "cache_disk: URL %s failed the size check "
1174 "(%" APR_OFF_T_FMT
" < %" APR_OFF_T_FMT
")",
1175 h
->cache_obj
->key
, dobj
->file_size
, conf
->minfs
);
1176 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1177 file_cache_errorcleanup(dobj
, r
);
1178 return APR_EGENERAL
;
1181 /* All checks were fine. Move tempfile to final destination */
1182 /* Link to the perm file, and close the descriptor */
1183 file_cache_el_final(dobj
, r
);
1184 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
1185 "disk_cache: Body for URL %s cached.", dobj
->name
);
1192 * CACHE_DECODE filter
1195 * Deliver cached content (headers and body) up the stack.
1197 static int crccache_decode_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
) {
1199 request_rec
*r
= f
->r
;
1200 // TODO: set up context type struct
1201 crccache_client_ctx
*ctx
= f
->ctx
;
1203 // if this is the first pass in decoding we should check the headers etc
1204 // and fix up those headers that we modified as part of the encoding
1205 if (ctx
->headers_checked
== 0)
1207 ctx
->headers_checked
= 1;
1209 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1210 "CRCSYNC retuned status code (%d)", r
->status
);
1212 // TODO: make this work if we have multiple encodings
1213 const char * content_encoding
;
1214 content_encoding
= apr_table_get(r
->headers_out
, ENCODING_HEADER
);
1215 if (content_encoding
== NULL
|| strcmp(CRCCACHE_ENCODING
, content_encoding
)
1217 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1218 "CRCSYNC not decoding, content encoding bad (%s)", content_encoding
?content_encoding
:"NULL");
1219 ap_remove_output_filter(f
);
1220 return ap_pass_brigade(f
->next
, bb
);
1222 // TODO: Remove crcsync from the content encoding header
1224 // TODO: we should only set the status back to 200 if there are no
1225 // other instance codings used
1227 //r->status_line = "200 OK";
1230 // TODO: Fix up the etag as well
1235 /* Do nothing if asked to filter nothing. */
1236 if (APR_BRIGADE_EMPTY(bb
)) {
1237 return ap_pass_brigade(f
->next
, bb
);
1240 /* We require that we have a context already, otherwise we dont have our cached file
1241 * to fill in the gaps with.
1244 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1245 "No context available %s", r
->uri
);
1246 ap_remove_output_filter(f
);
1247 return ap_pass_brigade(f
->next
, bb
);
1250 while (!APR_BRIGADE_EMPTY(bb
))
1255 e
= APR_BRIGADE_FIRST(bb
);
1257 if (APR_BUCKET_IS_EOS(e
)) {
1259 /* Remove EOS from the old list, and insert into the new. */
1260 APR_BUCKET_REMOVE(e
);
1261 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1263 /* This filter is done once it has served up its content */
1264 ap_remove_output_filter(f
);
1266 // TODO: check strong hash here
1268 unsigned char md_value
[EVP_MAX_MD_SIZE
];
1269 EVP_DigestFinal_ex(&ctx
->mdctx
, md_value
, &md_len
);
1270 EVP_MD_CTX_cleanup(&ctx
->mdctx
);
1272 if (memcmp(md_value
, ctx
->md_value_rx
, 20) != 0)
1274 // TODO: Actually signal this to the user
1275 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCSYNC-DECODE HASH CHECK FAILED");
1279 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,"CRCSYNC-DECODE HASH CHECK PASSED");
1282 /* Okay, we've seen the EOS.
1283 * Time to pass it along down the chain.
1285 return ap_pass_brigade(f
->next
, ctx
->bb
);
1288 if (APR_BUCKET_IS_FLUSH(e
)) {
1291 /* Remove flush bucket from old brigade anf insert into the new. */
1292 APR_BUCKET_REMOVE(e
);
1293 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1294 rv
= ap_pass_brigade(f
->next
, ctx
->bb
);
1295 if (rv
!= APR_SUCCESS
) {
1301 if (APR_BUCKET_IS_METADATA(e
)) {
1303 * Remove meta data bucket from old brigade and insert into the
1306 APR_BUCKET_REMOVE(e
);
1307 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, e
);
1312 apr_bucket_read(e
, &data
, &len
, APR_BLOCK_READ
);
1313 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE read %zd bytes",len);
1315 apr_size_t consumed_bytes
= 0;
1316 while (consumed_bytes
< len
)
1318 //ap_log_error_wrapper(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE remaining %zd bytes",len - consumed_bytes);
1319 // no guaruntee that our buckets line up with our encoding sections
1320 // so we need a processing state machine stored in our context
1323 case DECODING_NEW_SECTION
:
1325 // check if we have a compressed section or a block section
1326 if (data
[consumed_bytes
] == ENCODING_COMPRESSED
)
1327 ctx
->state
= DECODING_COMPRESSED
;
1328 else if (data
[consumed_bytes
] == ENCODING_BLOCK
)
1329 ctx
->state
= DECODING_BLOCK_HEADER
;
1330 else if (data
[consumed_bytes
] == ENCODING_LITERAL
)
1331 ctx
->state
= DECODING_LITERAL
;
1332 else if (data
[consumed_bytes
] == ENCODING_HASH
)
1334 ctx
->state
= DECODING_HASH
;
1335 ctx
->md_value_rx_count
= 0;
1339 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,
1340 "CRCSYNC-DECODE, unknown section %d(%c)",data
[consumed_bytes
],data
[consumed_bytes
]);
1341 apr_brigade_cleanup(bb
);
1342 return APR_EGENERAL
;
1344 //ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,"CRCSYNC-DECODE found a new section %d",ctx->state);
1348 case DECODING_BLOCK_HEADER
:
1350 unsigned char block_number
= data
[consumed_bytes
];
1352 ctx
->state
= DECODING_NEW_SECTION
;
1354 // TODO: Output the indicated block here
1355 size_t current_block_size
= block_number
< FULL_BLOCK_COUNT
? ctx
->block_size
: ctx
->tail_block_size
;
1356 ap_log_error_wrapper(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1357 "CRCSYNC-DECODE block section, block %d, size %zu" ,block_number
, current_block_size
);
1359 char * buf
= apr_palloc(r
->pool
, current_block_size
);
1360 const char * source_data
;
1362 apr_bucket_read(ctx
->cached_bucket
, &source_data
, &source_len
, APR_BLOCK_READ
);
1363 assert(block_number
< (FULL_BLOCK_COUNT
+ (ctx
->tail_block_size
!= 0)));
1364 memcpy(buf
,&source_data
[block_number
*ctx
->block_size
],current_block_size
);
1365 // update our sha1 hash
1366 EVP_DigestUpdate(&ctx
->mdctx
, buf
, current_block_size
);
1367 apr_bucket
* b
= apr_bucket_pool_create(buf
, current_block_size
, r
->pool
, f
->c
->bucket_alloc
);
1368 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
1373 unsigned avail_in
= len
- consumed_bytes
;
1374 // 20 bytes for an SHA1 hash
1375 unsigned needed
= MIN(20-ctx
->md_value_rx_count
, avail_in
);
1376 memcpy(&ctx
->md_value_rx
[ctx
->md_value_rx_count
], &data
[consumed_bytes
],needed
);
1377 ctx
->md_value_rx_count
+=needed
;
1378 consumed_bytes
+= needed
;
1379 if (ctx
->md_value_rx_count
== 20)
1381 ctx
->state
= DECODING_NEW_SECTION
;
1385 case DECODING_COMPRESSED
:
1387 unsigned char decompressed_data_buf
[30000];
1389 z_stream
*strm
= ctx
->decompression_stream
;
1390 strm
->avail_in
= len
- consumed_bytes
;
1391 strm
->next_in
= (Bytef
*)(data
+ consumed_bytes
);
1392 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, "CRCSYNC-DECODE inflating %d bytes", strm.avail_in);
1393 // ap_log_hex(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server, strm.next_in, strm.avail_in);
1395 strm
->avail_out
= sizeof(decompressed_data_buf
);
1396 strm
->next_out
= decompressed_data_buf
;
1397 uInt avail_in_pre_inflate
= strm
->avail_in
;
1398 z_RC
= inflate(strm
, Z_NO_FLUSH
);
1399 if (z_RC
== Z_NEED_DICT
|| z_RC
== Z_DATA_ERROR
|| z_RC
== Z_MEM_ERROR
)
1401 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, r
->server
, "CRCSYNC-DECODE inflate error: %d", z_RC
);
1402 apr_brigade_cleanup(bb
);
1403 return APR_EGENERAL
;
1405 int have
= sizeof(decompressed_data_buf
) - strm
->avail_out
;
1406 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1407 "CRCSYNC-DECODE inflate rslt %d, consumed %d, produced %d",
1408 z_RC
, avail_in_pre_inflate
- strm
->avail_in
, have
);
1411 // write output data
1412 char * buf
= apr_palloc(r
->pool
, have
);
1413 memcpy(buf
,decompressed_data_buf
,have
);
1414 EVP_DigestUpdate(&ctx
->mdctx
, buf
, have
);
1415 apr_bucket
* b
= apr_bucket_pool_create(buf
, have
, r
->pool
, f
->c
->bucket_alloc
);
1416 APR_BRIGADE_INSERT_TAIL(ctx
->bb
, b
);
1418 } while (strm
->avail_out
== 0);
1419 consumed_bytes
= len
- strm
->avail_in
;
1420 if (z_RC
== Z_STREAM_END
)
1422 ctx
->state
= DECODING_NEW_SECTION
;
1429 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_SUCCESS
, r
->server
,
1430 "CRCSYNC-DECODE, unknown state %d, terminating transaction",ctx
->state
);
1431 apr_brigade_cleanup(bb
);
1432 return APR_EGENERAL
; // TODO: figure out how to pass the error on to the client
1435 APR_BUCKET_REMOVE(e
);
1439 apr_brigade_cleanup(bb
);
1443 static void *create_config(apr_pool_t
*p
, server_rec
*s
) {
1444 crccache_client_conf
*conf
= apr_pcalloc(p
, sizeof(crccache_client_conf
));
1445 /* array of URL prefixes for which caching is enabled */
1446 conf
->cacheenable
= apr_array_make(p
, 10, sizeof(struct cache_enable
));
1447 /* array of URL prefixes for which caching is enabled */
1448 conf
->cacheenable
= apr_array_make(p
, 10, sizeof(struct cache_enable
));
1449 /* array of URL prefixes for which caching is disabled */
1450 conf
->cachedisable
= apr_array_make(p
, 10, sizeof(struct cache_disable
));
1451 /* maximum time to cache a document */
1452 conf
->maxex
= DEFAULT_CACHE_MAXEXPIRE
;
1453 conf
->maxex_set
= 0;
1454 conf
->minex
= DEFAULT_CACHE_MINEXPIRE
;
1455 conf
->minex_set
= 0;
1456 /* default time to cache a document */
1457 conf
->defex
= DEFAULT_CACHE_EXPIRE
;
1458 conf
->defex_set
= 0;
1459 /* factor used to estimate Expires date from LastModified date */
1460 conf
->factor
= DEFAULT_CACHE_LMFACTOR
;
1461 conf
->factor_set
= 0;
1462 conf
->no_last_mod_ignore_set
= 0;
1463 conf
->no_last_mod_ignore
= 0;
1464 conf
->ignorecachecontrol
= 0;
1465 conf
->ignorecachecontrol_set
= 0;
1466 conf
->store_private
= 0;
1467 conf
->store_private_set
= 0;
1468 conf
->store_nostore
= 0;
1469 conf
->store_nostore_set
= 0;
1470 /* array of headers that should not be stored in cache */
1471 conf
->ignore_headers
= apr_array_make(p
, 10, sizeof(char *));
1472 conf
->ignore_headers_set
= CACHE_IGNORE_HEADERS_UNSET
;
1473 /* flag indicating that query-string should be ignored when caching */
1474 conf
->ignorequerystring
= 0;
1475 conf
->ignorequerystring_set
= 0;
1477 /* XXX: Set default values */
1478 conf
->dirlevels
= DEFAULT_DIRLEVELS
;
1479 conf
->dirlength
= DEFAULT_DIRLENGTH
;
1480 conf
->maxfs
= DEFAULT_MAX_FILE_SIZE
;
1481 conf
->minfs
= DEFAULT_MIN_FILE_SIZE
;
1483 conf
->cache_root
= NULL
;
1484 conf
->cache_root_len
= 0;
1490 * mod_disk_cache configuration directives handlers.
1492 static const char *set_cache_root(cmd_parms
*parms
, void *in_struct_ptr
,
1494 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
1495 &crccache_client_module
);
1496 conf
->cache_root
= arg
;
1497 conf
->cache_root_len
= strlen(arg
);
1498 /* TODO: canonicalize cache_root and strip off any trailing slashes */
1504 * Consider eliminating the next two directives in favor of
1505 * Ian's prime number hash...
1506 * key = hash_fn( r->uri)
1507 * filename = "/key % prime1 /key %prime2/key %prime3"
1509 static const char *set_cache_dirlevels(cmd_parms
*parms
, void *in_struct_ptr
,
1511 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
1512 &crccache_client_module
);
1513 int val
= atoi(arg
);
1515 return "CacheDirLevelsClient value must be an integer greater than 0";
1516 if (val
* conf
->dirlength
> CACHEFILE_LEN
)
1517 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
1518 conf
->dirlevels
= val
;
1521 static const char *set_cache_dirlength(cmd_parms
*parms
, void *in_struct_ptr
,
1523 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
1524 &crccache_client_module
);
1525 int val
= atoi(arg
);
1527 return "CacheDirLengthClient value must be an integer greater than 0";
1528 if (val
* conf
->dirlevels
> CACHEFILE_LEN
)
1529 return "CacheDirLevelsClient*CacheDirLengthClient value must not be higher than 20";
1531 conf
->dirlength
= val
;
1535 static const char *set_cache_minfs(cmd_parms
*parms
, void *in_struct_ptr
,
1537 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
1538 &crccache_client_module
);
1540 if (apr_strtoff(&conf
->minfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->minfs
1542 return "CacheMinFileSizeClient argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1547 static const char *set_cache_maxfs(cmd_parms
*parms
, void *in_struct_ptr
,
1549 crccache_client_conf
*conf
= ap_get_module_config(parms
->server
->module_config
,
1550 &crccache_client_module
);
1551 if (apr_strtoff(&conf
->maxfs
, arg
, NULL
, 0) != APR_SUCCESS
|| conf
->maxfs
1553 return "CacheMaxFileSizeClient argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1558 static const char *add_crc_client_enable(cmd_parms
*parms
, void *dummy
,
1562 crccache_client_conf
*conf
;
1563 struct cache_enable
*new;
1566 return apr_psprintf(parms
->pool
,
1567 "provider (%s) starts with a '/'. Are url and provider switched?",
1572 (crccache_client_conf
*)ap_get_module_config(parms
->server
->module_config
,
1573 &crccache_client_module
);
1574 new = apr_array_push(conf
->cacheenable
);
1576 if (apr_uri_parse(parms
->pool
, url
, &(new->url
))) {
1579 if (new->url
.path
) {
1580 new->pathlen
= strlen(new->url
.path
);
1583 new->url
.path
= "/";
1588 static const command_rec disk_cache_cmds
[] =
1590 AP_INIT_TAKE2("CRCClientEnable", add_crc_client_enable
, NULL
, RSRC_CONF
, "A cache type and partial URL prefix below which caching is enabled"),
1591 AP_INIT_TAKE1("CacheRootClient", set_cache_root
, NULL
, RSRC_CONF
,"The directory to store cache files"),
1592 AP_INIT_TAKE1("CacheDirLevelsClient", set_cache_dirlevels
, NULL
, RSRC_CONF
, "The number of levels of subdirectories in the cache"),
1593 AP_INIT_TAKE1("CacheDirLengthClient", set_cache_dirlength
, NULL
, RSRC_CONF
, "The number of characters in subdirectory names"),
1594 AP_INIT_TAKE1("CacheMinFileSizeClient", set_cache_minfs
, NULL
, RSRC_CONF
, "The minimum file size to cache a document"),
1595 AP_INIT_TAKE1("CacheMaxFileSizeClient", set_cache_maxfs
, NULL
, RSRC_CONF
, "The maximum file size to cache a document"),
1599 int ap_run_insert_filter(request_rec
*r
);
1601 int cache_url_handler(request_rec
*r
, int lookup
)
1605 cache_request_rec
*cache
;
1606 crccache_client_conf
*conf
;
1607 apr_bucket_brigade
*out
;
1609 ap_filter_rec_t
*cache_out_handle
;
1611 /* Delay initialization until we know we are handling a GET */
1612 if (r
->method_number
!= M_GET
) {
1616 conf
= (crccache_client_conf
*) ap_get_module_config(r
->server
->module_config
,
1617 &crccache_client_module
);
1619 /* make space for the per request config */
1620 cache
= (cache_request_rec
*) ap_get_module_config(r
->request_config
,
1621 &crccache_client_module
);
1623 cache
= apr_pcalloc(r
->pool
, sizeof(cache_request_rec
));
1624 ap_set_module_config(r
->request_config
, &crccache_client_module
, cache
);
1628 * Are we allowed to serve cached info at all?
1631 /* find certain cache controlling headers */
1632 auth
= apr_table_get(r
->headers_in
, "Authorization");
1634 /* First things first - does the request allow us to return
1635 * cached information at all? If not, just decline the request.
1642 * Try to serve this request from the cache.
1644 * If no existing cache file (DECLINED)
1645 * add cache_save filter
1646 * If cached file (OK)
1647 * clear filter stack
1648 * add cache_out filter
1651 rv
= cache_select(r
);
1653 if (rv
== DECLINED
) {
1657 * Add cache_save filter to cache this request. Choose
1658 * the correct filter by checking if we are a subrequest
1662 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
1664 "Adding CACHE_SAVE_SUBREQ filter for %s",
1666 ap_add_output_filter_handle(cache_save_subreq_filter_handle
,
1667 NULL
, r
, r
->connection
);
1670 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
1671 r
->server
, "Adding CACHE_SAVE filter for %s",
1673 ap_add_output_filter_handle(cache_save_filter_handle
,
1674 NULL
, r
, r
->connection
);
1677 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1678 "Adding CACHE_REMOVE_URL filter for %s",
1681 /* Add cache_remove_url filter to this request to remove a
1682 * stale cache entry if needed. Also put the current cache
1683 * request rec in the filter context, as the request that
1684 * is available later during running the filter maybe
1685 * different due to an internal redirect.
1687 cache
->remove_url_filter
=
1688 ap_add_output_filter_handle(cache_remove_url_filter_handle
,
1689 cache
, r
, r
->connection
);
1692 if (cache
->stale_headers
) {
1693 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
,
1694 r
->server
, "Restoring request headers for %s",
1697 r
->headers_in
= cache
->stale_headers
;
1700 /* Delete our per-request configuration. */
1701 ap_set_module_config(r
->request_config
, &crccache_client_module
, NULL
);
1706 ap_log_error(APLOG_MARK
, APLOG_ERR
, rv
, r
->server
,
1707 "cache: error returned while checking for cached "
1713 /* if we are a lookup, we are exiting soon one way or another; Restore
1716 if (cache
->stale_headers
) {
1717 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1718 "Restoring request headers.");
1719 r
->headers_in
= cache
->stale_headers
;
1722 /* Delete our per-request configuration. */
1723 ap_set_module_config(r
->request_config
, &crccache_client_module
, NULL
);
1726 rv
= ap_meets_conditions(r
);
1728 /* If we are a lookup, we have to return DECLINED as we have no
1729 * way of knowing if we will be able to serve the content.
1735 /* Return cached status. */
1739 /* If we're a lookup, we can exit now instead of serving the content. */
1744 /* Serve up the content */
1746 /* We are in the quick handler hook, which means that no output
1747 * filters have been set. So lets run the insert_filter hook.
1749 ap_run_insert_filter(r
);
1752 * Add cache_out filter to serve this request. Choose
1753 * the correct filter by checking if we are a subrequest
1757 cache_out_handle
= cache_out_subreq_filter_handle
;
1760 cache_out_handle
= cache_out_filter_handle
;
1762 ap_add_output_filter_handle(cache_out_handle
, NULL
, r
, r
->connection
);
1765 * Remove all filters that are before the cache_out filter. This ensures
1766 * that we kick off the filter stack with our cache_out filter being the
1767 * first in the chain. This make sense because we want to restore things
1768 * in the same manner as we saved them.
1769 * There may be filters before our cache_out filter, because
1771 * 1. We call ap_set_content_type during cache_select. This causes
1772 * Content-Type specific filters to be added.
1773 * 2. We call the insert_filter hook. This causes filters e.g. like
1774 * the ones set with SetOutputFilter to be added.
1776 next
= r
->output_filters
;
1777 while (next
&& (next
->frec
!= cache_out_handle
)) {
1778 ap_remove_output_filter(next
);
1782 /* kick off the filter stack */
1783 out
= apr_brigade_create(r
->pool
, r
->connection
->bucket_alloc
);
1784 rv
= ap_pass_brigade(r
->output_filters
, out
);
1785 if (rv
!= APR_SUCCESS
) {
1786 if (rv
!= AP_FILTER_ERROR
) {
1787 ap_log_error(APLOG_MARK
, APLOG_ERR
, rv
, r
->server
,
1788 "cache: error returned while trying to return "
1803 * Deliver cached content (headers and body) up the stack.
1805 int cache_out_filter(ap_filter_t
*f
, apr_bucket_brigade
*bb
)
1807 request_rec
*r
= f
->r
;
1808 cache_request_rec
*cache
;
1810 cache
= (cache_request_rec
*) ap_get_module_config(r
->request_config
,
1811 &crccache_client_module
);
1814 /* user likely configured CACHE_OUT manually; they should use mod_cache
1815 * configuration to do that */
1816 ap_log_error(APLOG_MARK
, APLOG_ERR
, 0, r
->server
,
1817 "CACHE_OUT enabled unexpectedly");
1818 ap_remove_output_filter(f
);
1819 return ap_pass_brigade(f
->next
, bb
);
1822 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1823 "cache: running CACHE_OUT filter");
1825 /* restore status of cached response */
1826 /* XXX: This exposes a bug in mem_cache, since it does not
1827 * restore the status into it's handle. */
1828 r
->status
= cache
->handle
->cache_obj
->info
.status
;
1830 /* recall_headers() was called in cache_select() */
1831 recall_body(cache
->handle
, r
->pool
, bb
);
1833 /* This filter is done once it has served up its content */
1834 ap_remove_output_filter(f
);
1836 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1837 "cache: serving %s", r
->uri
);
1838 return ap_pass_brigade(f
->next
, bb
);
1846 * Decide whether or not this content should be cached.
1847 * If we decide no it should not:
1848 * remove the filter from the chain
1849 * If we decide yes it should:
1850 * Have we already started saving the response?
1851 * If we have started, pass the data to the storage manager via store_body
1853 * Check to see if we *can* save this particular response.
1854 * If we can, call cache_create_entity() and save the headers and body
1855 * Finally, pass the data to the next filter (the network or whatever)
1858 int cache_save_filter(ap_filter_t
*f
, apr_bucket_brigade
*in
)
1861 request_rec
*r
= f
->r
;
1862 cache_request_rec
*cache
;
1863 crccache_client_conf
*conf
;
1864 //const char *cc_out, *cl;
1866 const char *exps
, /* *lastmods,*/ *dates
;//, *etag;
1867 apr_time_t exp
, date
,/* lastmod,*/ now
;
1869 cache_info
*info
= NULL
;
1873 conf
= (crccache_client_conf
*) ap_get_module_config(r
->server
->module_config
,
1874 &crccache_client_module
);
1876 /* Setup cache_request_rec */
1877 cache
= (cache_request_rec
*) ap_get_module_config(r
->request_config
,
1878 &crccache_client_module
);
1880 /* user likely configured CACHE_SAVE manually; they should really use
1881 * mod_cache configuration to do that
1883 cache
= apr_pcalloc(r
->pool
, sizeof(cache_request_rec
));
1884 ap_set_module_config(r
->request_config
, &crccache_client_module
, cache
);
1890 * Pass Data to Cache
1891 * ------------------
1892 * This section passes the brigades into the cache modules, but only
1893 * if the setup section (see below) is complete.
1895 if (cache
->block_response
) {
1896 /* We've already sent down the response and EOS. So, ignore
1897 * whatever comes now.
1902 /* have we already run the cachability check and set up the
1903 * cached file handle?
1905 if (cache
->in_checked
) {
1906 /* pass the brigades into the cache, then pass them
1907 * up the filter stack
1909 rv
= store_body(cache
->handle
, r
, in
);
1910 if (rv
!= APR_SUCCESS
) {
1911 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
1912 "cache: Cache provider's store_body failed!");
1913 ap_remove_output_filter(f
);
1915 return ap_pass_brigade(f
->next
, in
);
1919 * Setup Data in Cache
1920 * -------------------
1921 * This section opens the cache entity and sets various caching
1922 * parameters, and decides whether this URL should be cached at
1923 * all. This section is* run before the above section.
1926 /* read expiry date; if a bad date, then leave it so the client can
1929 exps
= apr_table_get(r
->err_headers_out
, "Expires");
1931 exps
= apr_table_get(r
->headers_out
, "Expires");
1934 if (APR_DATE_BAD
== (exp
= apr_date_parse_http(exps
))) {
1941 // we dont care about these
1943 /* read the last-modified date; if the date is bad, then delete it */
1944 lastmods
= apr_table_get(r
->err_headers_out
, "Last-Modified");
1945 if (lastmods
== NULL
) {
1946 lastmods
= apr_table_get(r
->headers_out
, "Last-Modified");
1948 if (lastmods
!= NULL
) {
1949 lastmod
= apr_date_parse_http(lastmods
);
1950 if (lastmod
== APR_DATE_BAD
) {
1955 lastmod
= APR_DATE_BAD
;
1958 /* read the etag and cache-control from the entity */
1959 etag
= apr_table_get(r
->err_headers_out
, "Etag");
1961 etag
= apr_table_get(r
->headers_out
, "Etag");
1963 cc_out
= apr_table_get(r
->err_headers_out
, "Cache-Control");
1964 if (cc_out
== NULL
) {
1965 cc_out
= apr_table_get(r
->headers_out
, "Cache-Control");
1969 * what responses should we not cache?
1971 * At this point we decide based on the response headers whether it
1972 * is appropriate _NOT_ to cache the data from the server. There are
1973 * a whole lot of conditions that prevent us from caching this data.
1974 * They are tested here one by one to be clear and unambiguous.
1976 if (r
->status
!= HTTP_OK
&& r
->status
!= HTTP_NON_AUTHORITATIVE
1977 && r
->status
!= HTTP_MULTIPLE_CHOICES
1978 && r
->status
!= HTTP_MOVED_PERMANENTLY
1979 && r
->status
!= HTTP_NOT_MODIFIED
) {
1980 /* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
1981 * We don't cache 206, because we don't (yet) cache partial responses.
1982 * We include 304 Not Modified here too as this is the origin server
1983 * telling us to serve the cached copy.
1986 if (exps
!= NULL
|| cc_out
!= NULL
) {
1987 /* We are also allowed to cache any response given that it has a
1988 * valid Expires or Cache Control header. If we find a either of
1989 * those here, we pass request through the rest of the tests. From
1992 * A response received with any other status code (e.g. status
1993 * codes 302 and 307) MUST NOT be returned in a reply to a
1994 * subsequent request unless there are cache-control directives or
1995 * another header(s) that explicitly allow it. For example, these
1996 * include the following: an Expires header (section 14.21); a
1997 * "max-age", "s-maxage", "must-revalidate", "proxy-revalidate",
1998 * "public" or "private" cache-control directive (section 14.9).
2002 reason
= apr_psprintf(p
, "Response status %d", r
->status
);
2011 else if (exps
!= NULL
&& exp
== APR_DATE_BAD
) {
2012 /* if a broken Expires header is present, don't cache it */
2013 reason
= apr_pstrcat(p
, "Broken expires header: ", exps
, NULL
);
2015 else if (exp
!= APR_DATE_BAD
&& exp
< r
->request_time
)
2017 /* if a Expires header is in the past, don't cache it */
2018 reason
= "Expires header already expired, not cacheable";
2020 else if (!conf
->ignorequerystring
&& r
->parsed_uri
.query
&& exps
== NULL
&&
2021 !ap_cache_liststr(NULL
, cc_out
, "max-age", NULL
)) {
2022 /* if a query string is present but no explicit expiration time,
2023 * don't cache it (RFC 2616/13.9 & 13.2.1)
2025 reason
= "Query string present but no explicit expiration time";
2028 else if (r
->status
== HTTP_NOT_MODIFIED
&&
2029 !cache
->handle
&& !cache
->stale_handle
) {
2030 /* if the server said 304 Not Modified but we have no cache
2031 * file - pass this untouched to the user agent, it's not for us.
2033 reason
= "HTTP Status 304 Not Modified";
2036 else if (r
->status
== HTTP_OK
&& lastmods
== NULL
&& etag
== NULL
2037 && (exps
== NULL
) && (conf
->no_last_mod_ignore
==0)) {
2038 /* 200 OK response from HTTP/1.0 and up without Last-Modified,
2039 * Etag, or Expires headers.
2041 /* Note: mod-include clears last_modified/expires/etags - this
2042 * is why we have an optional function for a key-gen ;-)
2044 reason
= "No Last-Modified, Etag, or Expires headers";
2047 else if (r
->header_only
&& !cache
->stale_handle
) {
2048 /* Forbid HEAD requests unless we have it cached already */
2049 reason
= "HTTP HEAD request";
2052 else if (!conf
->store_nostore
&&
2053 ap_cache_liststr(NULL
, cc_out
, "no-store", NULL
)) {
2054 /* RFC2616 14.9.2 Cache-Control: no-store response
2055 * indicating do not cache, or stop now if you are
2056 * trying to cache it.
2058 /* FIXME: The Cache-Control: no-store could have come in on a 304,
2059 * FIXME: while the original request wasn't conditional. IOW, we
2060 * FIXME: made the the request conditional earlier to revalidate
2061 * FIXME: our cached response.
2063 reason
= "Cache-Control: no-store present";
2065 else if (!conf
->store_private
&&
2066 ap_cache_liststr(NULL
, cc_out
, "private", NULL
)) {
2067 /* RFC2616 14.9.1 Cache-Control: private response
2068 * this object is marked for this user's eyes only. Behave
2071 /* FIXME: See above (no-store) */
2072 reason
= "Cache-Control: private present";
2074 else if (apr_table_get(r
->headers_in
, "Authorization") != NULL
2075 && !(ap_cache_liststr(NULL
, cc_out
, "s-maxage", NULL
)
2076 || ap_cache_liststr(NULL
, cc_out
, "must-revalidate", NULL
)
2077 || ap_cache_liststr(NULL
, cc_out
, "public", NULL
))) {
2078 /* RFC2616 14.8 Authorisation:
2079 * if authorisation is included in the request, we don't cache,
2080 * but we can cache if the following exceptions are true:
2081 * 1) If Cache-Control: s-maxage is included
2082 * 2) If Cache-Control: must-revalidate is included
2083 * 3) If Cache-Control: public is included
2085 reason
= "Authorization required";
2088 else if (ap_cache_liststr(NULL
,
2089 apr_table_get(r
->headers_out
, "Vary"),
2091 reason
= "Vary header contains '*'";
2093 else if (apr_table_get(r
->subprocess_env
, "no-cache") != NULL
) {
2094 reason
= "environment variable 'no-cache' is set";
2096 else if (r
->no_cache
) {
2097 /* or we've been asked not to cache it above */
2098 reason
= "r->no_cache present";
2102 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
2103 "cache: %s not cached. Reason: %s", r
->unparsed_uri
,
2106 /* remove this filter from the chain */
2107 ap_remove_output_filter(f
);
2109 /* ship the data up the stack */
2110 return ap_pass_brigade(f
->next
, in
);
2113 /* Make it so that we don't execute this path again. */
2114 cache
->in_checked
= 1;
2116 /* Set the content length if known.
2118 cl
= apr_table_get(r
->err_headers_out
, "Content-Length");
2120 cl
= apr_table_get(r
->headers_out
, "Content-Length");
2124 if (apr_strtoff(&size
, cl
, &errp
, 10) || *errp
|| size
< 0) {
2125 cl
= NULL
; /* parse error, see next 'if' block */
2130 /* if we don't get the content-length, see if we have all the
2131 * buckets and use their length to calculate the size
2134 int all_buckets_here
=0;
2135 int unresolved_length
= 0;
2137 for (e
= APR_BRIGADE_FIRST(in
);
2138 e
!= APR_BRIGADE_SENTINEL(in
);
2139 e
= APR_BUCKET_NEXT(e
))
2141 if (APR_BUCKET_IS_EOS(e
)) {
2145 if (APR_BUCKET_IS_FLUSH(e
)) {
2146 unresolved_length
= 1;
2149 if (e
->length
== (apr_size_t
)-1) {
2154 if (!all_buckets_here
) {
2159 /* It's safe to cache the response.
2161 * There are two possiblities at this point:
2162 * - cache->handle == NULL. In this case there is no previously
2163 * cached entity anywhere on the system. We must create a brand
2164 * new entity and store the response in it.
2165 * - cache->stale_handle != NULL. In this case there is a stale
2166 * entity in the system which needs to be replaced by new
2167 * content (unless the result was 304 Not Modified, which means
2168 * the cached entity is actually fresh, and we should update
2172 /* Did we have a stale cache entry that really is stale?
2174 * Note that for HEAD requests, we won't get the body, so for a stale
2175 * HEAD request, we don't remove the entity - instead we let the
2176 * CACHE_REMOVE_URL filter remove the stale item from the cache.
2178 if (cache
->stale_handle
) {
2179 if (r
->status
== HTTP_NOT_MODIFIED
) {
2180 /* Oh, hey. It isn't that stale! Yay! */
2181 cache
->handle
= cache
->stale_handle
;
2182 info
= &cache
->handle
->cache_obj
->info
;
2185 else if (!r
->header_only
) {
2186 /* Oh, well. Toss it. */
2187 remove_entity(cache
->stale_handle
);
2188 /* Treat the request as if it wasn't conditional. */
2189 cache
->stale_handle
= NULL
;
2191 * Restore the original request headers as they may be needed
2192 * by further output filters like the byterange filter to make
2193 * the correct decisions.
2195 r
->headers_in
= cache
->stale_headers
;
2199 /* no cache handle, create a new entity only for non-HEAD requests */
2200 if (!cache
->handle
&& !r
->header_only
) {
2201 rv
= cache_create_entity(r
, size
);
2202 info
= apr_pcalloc(r
->pool
, sizeof(cache_info
));
2203 /* We only set info->status upon the initial creation. */
2204 info
->status
= r
->status
;
2208 /* Caching layer declined the opportunity to cache the response */
2209 ap_remove_output_filter(f
);
2210 return ap_pass_brigade(f
->next
, in
);
2213 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
2214 "cache: Caching url: %s", r
->unparsed_uri
);
2216 /* We are actually caching this response. So it does not
2217 * make sense to remove this entity any more.
2219 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
2220 "cache: Removing CACHE_REMOVE_URL filter.");
2221 ap_remove_output_filter(cache
->remove_url_filter
);
2224 * We now want to update the cache file header information with
2225 * the new date, last modified, expire and content length and write
2226 * it away to our cache file. First, we determine these values from
2227 * the response, using heuristics if appropriate.
2229 * In addition, we make HTTP/1.1 age calculations and write them away
2233 /* Read the date. Generate one if one is not supplied */
2234 dates
= apr_table_get(r
->err_headers_out
, "Date");
2235 if (dates
== NULL
) {
2236 dates
= apr_table_get(r
->headers_out
, "Date");
2238 if (dates
!= NULL
) {
2239 info
->date
= apr_date_parse_http(dates
);
2242 info
->date
= APR_DATE_BAD
;
2245 now
= apr_time_now();
2246 if (info
->date
== APR_DATE_BAD
) { /* No, or bad date */
2247 /* no date header (or bad header)! */
2252 /* set response_time for HTTP/1.1 age calculations */
2253 info
->response_time
= now
;
2255 /* get the request time */
2256 info
->request_time
= r
->request_time
;
2258 /* check last-modified date */
2259 if (lastmod
!= APR_DATE_BAD
&& lastmod
> date
) {
2260 /* if it's in the future, then replace by date */
2263 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0,
2265 "cache: Last modified is in the future, "
2266 "replacing with now");
2269 /* if no expiry date then
2270 * if Cache-Control: max-age
2271 * expiry date = date + max-age
2273 * expiry date = date + min((date - lastmod) * factor, maxexpire)
2275 * expire date = date + defaultexpire
2277 if (exp
== APR_DATE_BAD
) {
2280 if (ap_cache_liststr(r
->pool
, cc_out
, "max-age", &max_age_val
) &&
2281 max_age_val
!= NULL
) {
2285 x
= apr_atoi64(max_age_val
);
2290 x
= x
* MSEC_ONE_SEC
;
2292 if (x
< conf
->minex
) {
2295 if (x
> conf
->maxex
) {
2300 else if ((lastmod
!= APR_DATE_BAD
) && (lastmod
< date
)) {
2301 /* if lastmod == date then you get 0*conf->factor which results in
2302 * an expiration time of now. This causes some problems with
2303 * freshness calculations, so we choose the else path...
2305 apr_time_t x
= (apr_time_t
) ((date
- lastmod
) * conf
->factor
);
2307 if (x
< conf
->minex
) {
2310 if (x
> conf
->maxex
) {
2316 exp
= date
+ conf
->defex
;
2322 /* We found a stale entry which wasn't really stale. */
2323 if (cache
->stale_handle
) {
2324 /* Load in the saved status and clear the status line. */
2325 r
->status
= info
->status
;
2326 r
->status_line
= NULL
;
2328 /* RFC 2616 10.3.5 states that entity headers are not supposed
2329 * to be in the 304 response. Therefore, we need to combine the
2330 * response headers with the cached headers *before* we update
2331 * the cached headers.
2333 * However, before doing that, we need to first merge in
2334 * err_headers_out and we also need to strip any hop-by-hop
2335 * headers that might have snuck in.
2337 r
->headers_out
= ap_cache_cacheable_headers_out(r
);
2339 /* Merge in our cached headers. However, keep any updated values. */
2340 ap_cache_accept_headers(cache
->handle
, r
, 1);
2343 /* Write away header information to cache. It is possible that we are
2344 * trying to update headers for an entity which has already been cached.
2346 * This may fail, due to an unwritable cache area. E.g. filesystem full,
2347 * permissions problems or a read-only (re)mount. This must be handled
2350 rv
= store_headers(cache
->handle
, r
, info
);
2352 /* Did we just update the cached headers on a revalidated response?
2354 * If so, we can now decide what to serve to the client. This is done in
2355 * the same way as with a regular response, but conditions are now checked
2356 * against the cached or merged response headers.
2358 if (cache
->stale_handle
) {
2359 apr_bucket_brigade
*bb
;
2363 bb
= apr_brigade_create(r
->pool
, r
->connection
->bucket_alloc
);
2365 /* Restore the original request headers and see if we need to
2366 * return anything else than the cached response (ie. the original
2367 * request was conditional).
2369 r
->headers_in
= cache
->stale_headers
;
2370 status
= ap_meets_conditions(r
);
2374 bkt
= apr_bucket_flush_create(bb
->bucket_alloc
);
2375 APR_BRIGADE_INSERT_TAIL(bb
, bkt
);
2378 recall_body(cache
->handle
, r
->pool
, bb
);
2381 cache
->block_response
= 1;
2383 /* Before returning we need to handle the possible case of an
2384 * unwritable cache. Rather than leaving the entity in the cache
2385 * and having it constantly re-validated, now that we have recalled
2386 * the body it is safe to try and remove the url from the cache.
2388 if (rv
!= APR_SUCCESS
) {
2389 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
2390 "cache: updating headers with store_headers failed. "
2391 "Removing cached url.");
2393 rv
= remove_url(cache
->stale_handle
, r
->pool
);
2395 /* Probably a mod_disk_cache cache area has been (re)mounted
2396 * read-only, or that there is a permissions problem.
2398 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
2399 "cache: attempt to remove url from cache unsuccessful.");
2403 return ap_pass_brigade(f
->next
, bb
);
2406 if(rv
!= APR_SUCCESS
) {
2407 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
2408 "cache: store_headers failed");
2409 ap_remove_output_filter(f
);
2411 return ap_pass_brigade(f
->next
, in
);
2414 rv
= store_body(cache
->handle
, r
, in
);
2415 if (rv
!= APR_SUCCESS
) {
2416 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, rv
, r
->server
,
2417 "cache: store_body failed");
2418 ap_remove_output_filter(f
);
2421 return ap_pass_brigade(f
->next
, in
);
2426 * CACHE_REMOVE_URL filter
2429 * This filter gets added in the quick handler every time the CACHE_SAVE filter
2430 * gets inserted. Its purpose is to remove a confirmed stale cache entry from
2433 * CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
2434 * the response is a canned error message, which removes the content filters
2435 * and thus the CACHE_SAVE filter from the chain.
2437 * CACHE_REMOVE_URL expects cache request rec within its context because the
2438 * request this filter runs on can be different from the one whose cache entry
2439 * should be removed, due to internal redirects.
2441 * Note that CACHE_SAVE_URL (as a content-set filter, hence run before the
2442 * protocol filters) will remove this filter if it decides to cache the file.
2443 * Therefore, if this filter is left in, it must mean we need to toss any
2446 int cache_remove_url_filter(ap_filter_t
*f
, apr_bucket_brigade
*in
)
2448 request_rec
*r
= f
->r
;
2449 cache_request_rec
*cache
;
2451 /* Setup cache_request_rec */
2452 cache
= (cache_request_rec
*) f
->ctx
;
2455 /* user likely configured CACHE_REMOVE_URL manually; they should really
2456 * use mod_cache configuration to do that. So:
2457 * 1. Remove ourselves
2458 * 2. Do nothing and bail out
2460 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, r
->server
,
2461 "cache: CACHE_REMOVE_URL enabled unexpectedly");
2462 ap_remove_output_filter(f
);
2463 return ap_pass_brigade(f
->next
, in
);
2465 /* Now remove this cache entry from the cache */
2466 cache_remove_url(cache
, r
->pool
);
2468 /* remove ourselves */
2469 ap_remove_output_filter(f
);
2470 return ap_pass_brigade(f
->next
, in
);
2474 /*static const cache_provider crccache_client_provider = { &remove_entity,
2475 &store_headers, &store_body, &recall_headers, &recall_body,
2476 &create_entity, &open_entity, &remove_url, };
2478 static void disk_cache_register_hook(apr_pool_t
*p
) {
2479 ap_log_error(APLOG_MARK
, APLOG_INFO
, 0, NULL
,
2480 "Registering crccache client module, (C) 2009, Toby Collett");
2482 /* cache initializer */
2484 ap_hook_quick_handler(cache_url_handler
, NULL
, NULL
, APR_HOOK_FIRST
);
2486 * XXX The cache filters need to run right after the handlers and before
2487 * any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
2489 * Depending on the type of request (subrequest / main request) they
2490 * need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
2491 * filters. Thus create two filter handles for each type:
2492 * cache_save_filter_handle / cache_out_filter_handle to be used by
2494 * cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
2495 * to be run by subrequest
2498 * CACHE_SAVE must go into the filter chain after a possible DEFLATE
2499 * filter to ensure that the compressed content is stored.
2500 * Incrementing filter type by 1 ensures his happens.
2502 cache_save_filter_handle
=
2503 ap_register_output_filter("CACHE_SAVE",
2506 AP_FTYPE_CONTENT_SET
+1);
2508 * CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
2509 * handle subrequsts. Decrementing filter type by 1 ensures this
2512 cache_save_subreq_filter_handle
=
2513 ap_register_output_filter("CACHE_SAVE_SUBREQ",
2516 AP_FTYPE_CONTENT_SET
-1);
2518 * CACHE_OUT must go into the filter chain after a possible DEFLATE
2519 * filter to ensure that already compressed cache objects do not
2520 * get compressed again. Incrementing filter type by 1 ensures
2523 cache_out_filter_handle
=
2524 ap_register_output_filter("CACHE_OUT",
2527 AP_FTYPE_CONTENT_SET
+1);
2529 * CACHE_OUT_SUBREQ must go into the filter chain before SUBREQ_CORE to
2530 * handle subrequsts. Decrementing filter type by 1 ensures this
2533 cache_out_subreq_filter_handle
=
2534 ap_register_output_filter("CACHE_OUT_SUBREQ",
2537 AP_FTYPE_CONTENT_SET
-1);
2538 /* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
2539 * run even if the response is a canned error message, which
2540 * removes the content filters.
2542 cache_remove_url_filter_handle
=
2543 ap_register_output_filter("CACHE_REMOVE_URL",
2544 cache_remove_url_filter
,
2548 /* cache initializer */
2549 // ap_register_provider(p, CACHE_PROVIDER_GROUP, "crccache_client", "0",
2550 // &crccache_client_provider);
2552 * CACHE_OUT must go into the filter chain after a possible DEFLATE
2553 * filter to ensure that already compressed cache objects do not
2554 * get compressed again. Incrementing filter type by 1 ensures
2557 crccache_decode_filter_handle
= ap_register_output_filter(
2558 "CRCCACHE_DECODE", crccache_decode_filter
, NULL
,
2559 AP_FTYPE_CONTENT_SET
+ 1);
2561 ap_hook_post_config(cache_post_config
, NULL
, NULL
, APR_HOOK_REALLY_FIRST
);
2565 module AP_MODULE_DECLARE_DATA crccache_client_module
= {
2566 STANDARD20_MODULE_STUFF
, NULL
, /* create per-directory config structure */
2567 NULL
, /* merge per-directory config structures */
2568 create_config
, /* create per-server config structure */
2569 NULL
, /* merge per-server config structures */
2570 disk_cache_cmds
, /* command apr_table_t */
2571 disk_cache_register_hook
/* register hooks */