enabled block processing properly.
[httpd-crcsyncproxy.git] / modules / cache / mod_disk_cache.c
blob3c4876945179c0407ad271e2081c8d2b2407c0ca
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "apr_file_io.h"
18 #include "apr_strings.h"
19 #include "mod_cache.h"
20 #include "mod_disk_cache.h"
21 #include "ap_provider.h"
22 #include "util_filter.h"
23 #include "util_script.h"
24 #include "util_charset.h"
27 * mod_disk_cache: Disk Based HTTP 1.1 Cache.
29 * Flow to Find the .data file:
30 * Incoming client requests URI /foo/bar/baz
31 * Generate <hash> off of /foo/bar/baz
32 * Open <hash>.header
33 * Read in <hash>.header file (may contain Format #1 or Format #2)
34 * If format #1 (Contains a list of Vary Headers):
35 * Use each header name (from .header) with our request values (headers_in) to
36 * regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
37 * re-read in <hash>.header (must be format #2)
38 * read in <hash>.data
40 * Format #1:
41 * apr_uint32_t format;
42 * apr_time_t expire;
43 * apr_array_t vary_headers (delimited by CRLF)
45 * Format #2:
46 * disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
47 * entity name (dobj->name) [length is in disk_cache_info_t->name_len]
48 * r->headers_out (delimited by CRLF)
49 * CRLF
50 * r->headers_in (delimited by CRLF)
51 * CRLF
54 module AP_MODULE_DECLARE_DATA disk_cache_module;
56 /* Forward declarations */
57 static int remove_entity(cache_handle_t *h);
58 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
59 static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *b);
60 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
61 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
62 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
63 apr_file_t *file);
66 * Local static functions
69 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
70 disk_cache_object_t *dobj, const char *name)
72 if (!dobj->hashfile) {
73 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
74 conf->dirlength, name);
77 if (dobj->prefix) {
78 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
79 dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
81 else {
82 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
83 CACHE_HEADER_SUFFIX, NULL);
87 static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
88 disk_cache_object_t *dobj, const char *name)
90 if (!dobj->hashfile) {
91 dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
92 conf->dirlength, name);
95 if (dobj->prefix) {
96 return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
97 dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
99 else {
100 return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
101 CACHE_DATA_SUFFIX, NULL);
105 static apr_status_t mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
107 apr_status_t rv;
108 char *p;
110 for (p = (char*)file + conf->cache_root_len + 1;;) {
111 p = strchr(p, '/');
112 if (!p)
113 break;
114 *p = '\0';
116 rv = apr_dir_make(file,
117 APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
118 if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
119 return rv;
121 *p = '/';
122 ++p;
124 return APR_SUCCESS;
127 /* htcacheclean may remove directories underneath us.
128 * So, we'll try renaming three times at a cost of 0.002 seconds.
130 static apr_status_t safe_file_rename(disk_cache_conf *conf,
131 const char *src, const char *dest,
132 apr_pool_t *pool)
134 apr_status_t rv;
136 rv = apr_file_rename(src, dest, pool);
138 if (rv != APR_SUCCESS) {
139 int i;
141 for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
142 /* 1000 micro-seconds aka 0.001 seconds. */
143 apr_sleep(1000);
145 rv = mkdir_structure(conf, dest, pool);
146 if (rv != APR_SUCCESS)
147 continue;
149 rv = apr_file_rename(src, dest, pool);
153 return rv;
156 static apr_status_t file_cache_el_final(disk_cache_object_t *dobj,
157 request_rec *r)
159 /* move the data over */
160 if (dobj->tfd) {
161 apr_status_t rv;
163 apr_file_close(dobj->tfd);
165 /* This assumes that the tempfile is on the same file system
166 * as the cache_root. If not, then we need a file copy/move
167 * rather than a rename.
169 rv = apr_file_rename(dobj->tempfile, dobj->datafile, r->pool);
170 if (rv != APR_SUCCESS) {
171 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
172 "disk_cache: rename tempfile to datafile failed:"
173 " %s -> %s", dobj->tempfile, dobj->datafile);
174 apr_file_remove(dobj->tempfile, r->pool);
177 dobj->tfd = NULL;
180 return APR_SUCCESS;
183 static apr_status_t file_cache_errorcleanup(disk_cache_object_t *dobj, request_rec *r)
185 /* Remove the header file and the body file. */
186 apr_file_remove(dobj->hdrsfile, r->pool);
187 apr_file_remove(dobj->datafile, r->pool);
189 /* If we opened the temporary data file, close and remove it. */
190 if (dobj->tfd) {
191 apr_file_close(dobj->tfd);
192 apr_file_remove(dobj->tempfile, r->pool);
193 dobj->tfd = NULL;
196 return APR_SUCCESS;
200 /* These two functions get and put state information into the data
201 * file for an ap_cache_el, this state information will be read
202 * and written transparent to clients of this module
204 static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
205 disk_cache_object_t *dobj, request_rec *r)
207 apr_status_t rv;
208 char *urlbuff;
209 disk_cache_info_t disk_info;
210 apr_size_t len;
212 /* read the data from the cache file */
213 len = sizeof(disk_cache_info_t);
214 rv = apr_file_read_full(fd, &disk_info, len, &len);
215 if (rv != APR_SUCCESS) {
216 return rv;
219 /* Store it away so we can get it later. */
220 dobj->disk_info = disk_info;
222 info->status = disk_info.status;
223 info->date = disk_info.date;
224 info->expire = disk_info.expire;
225 info->request_time = disk_info.request_time;
226 info->response_time = disk_info.response_time;
228 /* Note that we could optimize this by conditionally doing the palloc
229 * depending upon the size. */
230 urlbuff = apr_palloc(r->pool, disk_info.name_len + 1);
231 len = disk_info.name_len;
232 rv = apr_file_read_full(fd, urlbuff, len, &len);
233 if (rv != APR_SUCCESS) {
234 return rv;
236 urlbuff[disk_info.name_len] = '\0';
238 /* check that we have the same URL */
239 /* Would strncmp be correct? */
240 if (strcmp(urlbuff, dobj->name) != 0) {
241 return APR_EGENERAL;
244 return APR_SUCCESS;
247 static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
248 apr_array_header_t *varray, const char *oldkey)
250 struct iovec *iov;
251 int i, k;
252 int nvec;
253 const char *header;
254 const char **elts;
256 nvec = (varray->nelts * 2) + 1;
257 iov = apr_palloc(p, sizeof(struct iovec) * nvec);
258 elts = (const char **) varray->elts;
260 /* TODO:
261 * - Handle multiple-value headers better. (sort them?)
262 * - Handle Case in-sensitive Values better.
263 * This isn't the end of the world, since it just lowers the cache
264 * hit rate, but it would be nice to fix.
266 * The majority are case insenstive if they are values (encoding etc).
267 * Most of rfc2616 is case insensitive on header contents.
269 * So the better solution may be to identify headers which should be
270 * treated case-sensitive?
271 * HTTP URI's (3.2.3) [host and scheme are insensitive]
272 * HTTP method (5.1.1)
273 * HTTP-date values (3.3.1)
274 * 3.7 Media Types [exerpt]
275 * The type, subtype, and parameter attribute names are case-
276 * insensitive. Parameter values might or might not be case-sensitive,
277 * depending on the semantics of the parameter name.
278 * 4.20 Except [exerpt]
279 * Comparison of expectation values is case-insensitive for unquoted
280 * tokens (including the 100-continue token), and is case-sensitive for
281 * quoted-string expectation-extensions.
284 for(i=0, k=0; i < varray->nelts; i++) {
285 header = apr_table_get(headers, elts[i]);
286 if (!header) {
287 header = "";
289 iov[k].iov_base = (char*) elts[i];
290 iov[k].iov_len = strlen(elts[i]);
291 k++;
292 iov[k].iov_base = (char*) header;
293 iov[k].iov_len = strlen(header);
294 k++;
296 iov[k].iov_base = (char*) oldkey;
297 iov[k].iov_len = strlen(oldkey);
298 k++;
300 return apr_pstrcatv(p, iov, k, NULL);
303 static int array_alphasort(const void *fn1, const void *fn2)
305 return strcmp(*(char**)fn1, *(char**)fn2);
308 static void tokens_to_array(apr_pool_t *p, const char *data,
309 apr_array_header_t *arr)
311 char *token;
313 while ((token = ap_get_list_item(p, &data)) != NULL) {
314 *((const char **) apr_array_push(arr)) = token;
317 /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
318 qsort((void *) arr->elts, arr->nelts,
319 sizeof(char *), array_alphasort);
323 * Hook and mod_cache callback functions
325 static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len)
327 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
328 &disk_cache_module);
329 cache_object_t *obj;
330 disk_cache_object_t *dobj;
332 if (conf->cache_root == NULL) {
333 return DECLINED;
336 /* Note, len is -1 if unknown so don't trust it too hard */
337 if (len > conf->maxfs) {
338 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
339 "disk_cache: URL %s failed the size check "
340 "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
341 key, len, conf->maxfs);
342 return DECLINED;
344 if (len >= 0 && len < conf->minfs) {
345 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
346 "disk_cache: URL %s failed the size check "
347 "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
348 key, len, conf->minfs);
349 return DECLINED;
352 /* Allocate and initialize cache_object_t and disk_cache_object_t */
353 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
354 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
356 obj->key = apr_pstrdup(r->pool, key);
358 dobj->name = obj->key;
359 dobj->prefix = NULL;
360 /* Save the cache root */
361 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
362 dobj->root_len = conf->cache_root_len;
363 dobj->datafile = data_file(r->pool, conf, dobj, key);
364 dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
365 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
367 return OK;
370 static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
372 apr_uint32_t format;
373 apr_size_t len;
374 const char *nkey;
375 apr_status_t rc;
376 static int error_logged = 0;
377 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
378 &disk_cache_module);
379 #ifdef APR_SENDFILE_ENABLED
380 core_dir_config *coreconf = ap_get_module_config(r->per_dir_config,
381 &core_module);
382 #endif
383 apr_finfo_t finfo;
384 cache_object_t *obj;
385 cache_info *info;
386 disk_cache_object_t *dobj;
387 int flags;
389 h->cache_obj = NULL;
391 /* Look up entity keyed to 'url' */
392 if (conf->cache_root == NULL) {
393 if (!error_logged) {
394 error_logged = 1;
395 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
396 "disk_cache: Cannot cache files to disk without a CacheRoot specified.");
398 return DECLINED;
401 /* Create and init the cache object */
402 h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
403 obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
405 info = &(obj->info);
407 /* Open the headers file */
408 dobj->prefix = NULL;
410 /* Save the cache root */
411 dobj->root = apr_pstrndup(r->pool, conf->cache_root, conf->cache_root_len);
412 dobj->root_len = conf->cache_root_len;
414 dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
415 flags = APR_READ|APR_BINARY|APR_BUFFERED;
416 rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
417 if (rc != APR_SUCCESS) {
418 return DECLINED;
421 /* read the format from the cache file */
422 len = sizeof(format);
423 apr_file_read_full(dobj->hfd, &format, len, &len);
425 if (format == VARY_FORMAT_VERSION) {
426 apr_array_header_t* varray;
427 apr_time_t expire;
429 len = sizeof(expire);
430 apr_file_read_full(dobj->hfd, &expire, len, &len);
432 varray = apr_array_make(r->pool, 5, sizeof(char*));
433 rc = read_array(r, varray, dobj->hfd);
434 if (rc != APR_SUCCESS) {
435 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
436 "disk_cache: Cannot parse vary header file: %s",
437 dobj->hdrsfile);
438 return DECLINED;
440 apr_file_close(dobj->hfd);
442 nkey = regen_key(r->pool, r->headers_in, varray, key);
444 dobj->hashfile = NULL;
445 dobj->prefix = dobj->hdrsfile;
446 dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey);
448 flags = APR_READ|APR_BINARY|APR_BUFFERED;
449 rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, flags, 0, r->pool);
450 if (rc != APR_SUCCESS) {
451 return DECLINED;
454 else if (format != DISK_FORMAT_VERSION) {
455 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
456 "disk_cache: File '%s' has a version mismatch. File had version: %d.",
457 dobj->hdrsfile, format);
458 return DECLINED;
460 else {
461 apr_off_t offset = 0;
462 /* This wasn't a Vary Format file, so we must seek to the
463 * start of the file again, so that later reads work.
465 apr_file_seek(dobj->hfd, APR_SET, &offset);
466 nkey = key;
469 obj->key = nkey;
470 dobj->key = nkey;
471 dobj->name = key;
472 dobj->datafile = data_file(r->pool, conf, dobj, nkey);
473 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
475 /* Open the data file */
476 flags = APR_READ|APR_BINARY;
477 #ifdef APR_SENDFILE_ENABLED
478 /* When we are in the quick handler we don't have the per-directory
479 * configuration, so this check only takes the globel setting of
480 * the EnableSendFile directive into account.
482 flags |= ((coreconf->enable_sendfile == ENABLE_SENDFILE_OFF)
483 ? 0 : APR_SENDFILE_ENABLED);
484 #endif
485 rc = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool);
486 if (rc != APR_SUCCESS) {
487 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
488 "disk_cache: Cannot open info header file %s", dobj->datafile);
489 return DECLINED;
492 rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd);
493 if (rc == APR_SUCCESS) {
494 dobj->file_size = finfo.size;
497 /* Read the bytes to setup the cache_info fields */
498 rc = file_cache_recall_mydata(dobj->hfd, info, dobj, r);
499 if (rc != APR_SUCCESS) {
500 ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
501 "disk_cache: Cannot read header file %s", dobj->hdrsfile);
502 return DECLINED;
505 /* Initialize the cache_handle callback functions */
506 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
507 "disk_cache: Recalled cached URL info header %s", dobj->name);
508 return OK;
511 static int remove_entity(cache_handle_t *h)
513 /* Null out the cache object pointer so next time we start from scratch */
514 h->cache_obj = NULL;
515 return OK;
518 static int remove_url(cache_handle_t *h, apr_pool_t *p)
520 apr_status_t rc;
521 disk_cache_object_t *dobj;
523 /* Get disk cache object from cache handle */
524 dobj = (disk_cache_object_t *) h->cache_obj->vobj;
525 if (!dobj) {
526 return DECLINED;
529 /* Delete headers file */
530 if (dobj->hdrsfile) {
531 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
532 "disk_cache: Deleting %s from cache.", dobj->hdrsfile);
534 rc = apr_file_remove(dobj->hdrsfile, p);
535 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
536 /* Will only result in an output if httpd is started with -e debug.
537 * For reason see log_error_core for the case s == NULL.
539 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
540 "disk_cache: Failed to delete headers file %s from cache.",
541 dobj->hdrsfile);
542 return DECLINED;
546 /* Delete data file */
547 if (dobj->datafile) {
548 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
549 "disk_cache: Deleting %s from cache.", dobj->datafile);
551 rc = apr_file_remove(dobj->datafile, p);
552 if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
553 /* Will only result in an output if httpd is started with -e debug.
554 * For reason see log_error_core for the case s == NULL.
556 ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
557 "disk_cache: Failed to delete data file %s from cache.",
558 dobj->datafile);
559 return DECLINED;
563 /* now delete directories as far as possible up to our cache root */
564 if (dobj->root) {
565 const char *str_to_copy;
567 str_to_copy = dobj->hdrsfile ? dobj->hdrsfile : dobj->datafile;
568 if (str_to_copy) {
569 char *dir, *slash, *q;
571 dir = apr_pstrdup(p, str_to_copy);
573 /* remove filename */
574 slash = strrchr(dir, '/');
575 *slash = '\0';
578 * now walk our way back to the cache root, delete everything
579 * in the way as far as possible
581 * Note: due to the way we constructed the file names in
582 * header_file and data_file, we are guaranteed that the
583 * cache_root is suffixed by at least one '/' which will be
584 * turned into a terminating null by this loop. Therefore,
585 * we won't either delete or go above our cache root.
587 for (q = dir + dobj->root_len; *q ; ) {
588 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
589 "disk_cache: Deleting directory %s from cache",
590 dir);
592 rc = apr_dir_remove(dir, p);
593 if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
594 break;
596 slash = strrchr(q, '/');
597 *slash = '\0';
602 return OK;
605 static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
606 apr_file_t *file)
608 char w[MAX_STRING_LEN];
609 int p;
610 apr_status_t rv;
612 while (1) {
613 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
614 if (rv != APR_SUCCESS) {
615 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
616 "Premature end of vary array.");
617 return rv;
620 p = strlen(w);
621 if (p > 0 && w[p - 1] == '\n') {
622 if (p > 1 && w[p - 2] == CR) {
623 w[p - 2] = '\0';
625 else {
626 w[p - 1] = '\0';
630 /* If we've finished reading the array, break out of the loop. */
631 if (w[0] == '\0') {
632 break;
635 *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
638 return APR_SUCCESS;
641 static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
643 int i;
644 apr_status_t rv;
645 struct iovec iov[2];
646 apr_size_t amt;
647 const char **elts;
649 elts = (const char **) arr->elts;
651 for (i = 0; i < arr->nelts; i++) {
652 iov[0].iov_base = (char*) elts[i];
653 iov[0].iov_len = strlen(elts[i]);
654 iov[1].iov_base = CRLF;
655 iov[1].iov_len = sizeof(CRLF) - 1;
657 rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
658 &amt);
659 if (rv != APR_SUCCESS) {
660 return rv;
664 iov[0].iov_base = CRLF;
665 iov[0].iov_len = sizeof(CRLF) - 1;
667 return apr_file_writev(fd, (const struct iovec *) &iov, 1,
668 &amt);
671 static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
672 apr_table_t *table, apr_file_t *file)
674 char w[MAX_STRING_LEN];
675 char *l;
676 int p;
677 apr_status_t rv;
679 while (1) {
681 /* ### What about APR_EOF? */
682 rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
683 if (rv != APR_SUCCESS) {
684 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
685 "Premature end of cache headers.");
686 return rv;
689 /* Delete terminal (CR?)LF */
691 p = strlen(w);
692 /* Indeed, the host's '\n':
693 '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
694 -- whatever the script generates.
696 if (p > 0 && w[p - 1] == '\n') {
697 if (p > 1 && w[p - 2] == CR) {
698 w[p - 2] = '\0';
700 else {
701 w[p - 1] = '\0';
705 /* If we've finished reading the headers, break out of the loop. */
706 if (w[0] == '\0') {
707 break;
710 #if APR_CHARSET_EBCDIC
711 /* Chances are that we received an ASCII header text instead of
712 * the expected EBCDIC header lines. Try to auto-detect:
714 if (!(l = strchr(w, ':'))) {
715 int maybeASCII = 0, maybeEBCDIC = 0;
716 unsigned char *cp, native;
717 apr_size_t inbytes_left, outbytes_left;
719 for (cp = w; *cp != '\0'; ++cp) {
720 native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
721 if (apr_isprint(*cp) && !apr_isprint(native))
722 ++maybeEBCDIC;
723 if (!apr_isprint(*cp) && apr_isprint(native))
724 ++maybeASCII;
726 if (maybeASCII > maybeEBCDIC) {
727 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
728 "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
729 r->filename);
730 inbytes_left = outbytes_left = cp - w;
731 apr_xlate_conv_buffer(ap_hdrs_from_ascii,
732 w, &inbytes_left, w, &outbytes_left);
735 #endif /*APR_CHARSET_EBCDIC*/
737 /* if we see a bogus header don't ignore it. Shout and scream */
738 if (!(l = strchr(w, ':'))) {
739 return APR_EGENERAL;
742 *l++ = '\0';
743 while (*l && apr_isspace(*l)) {
744 ++l;
747 apr_table_add(table, w, l);
750 return APR_SUCCESS;
754 * Reads headers from a buffer and returns an array of headers.
755 * Returns NULL on file error
756 * This routine tries to deal with too long lines and continuation lines.
757 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
758 * Is that okay, or should they be collapsed where possible?
760 static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
762 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
764 /* This case should not happen... */
765 if (!dobj->hfd) {
766 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
767 "disk_cache: recalling headers; but no header fd for %s", dobj->name);
768 return APR_NOTFOUND;
771 h->req_hdrs = apr_table_make(r->pool, 20);
772 h->resp_hdrs = apr_table_make(r->pool, 20);
774 /* Call routine to read the header lines/status line */
775 read_table(h, r, h->resp_hdrs, dobj->hfd);
776 read_table(h, r, h->req_hdrs, dobj->hfd);
778 apr_file_close(dobj->hfd);
780 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
781 "disk_cache: Recalled headers for URL %s", dobj->name);
782 return APR_SUCCESS;
785 static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb)
787 apr_bucket *e;
788 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
790 apr_brigade_insert_file(bb, dobj->fd, 0, dobj->file_size, p);
792 e = apr_bucket_eos_create(bb->bucket_alloc);
793 APR_BRIGADE_INSERT_TAIL(bb, e);
795 return APR_SUCCESS;
798 static apr_status_t store_table(apr_file_t *fd, apr_table_t *table)
800 int i;
801 apr_status_t rv;
802 struct iovec iov[4];
803 apr_size_t amt;
804 apr_table_entry_t *elts;
806 elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
807 for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
808 if (elts[i].key != NULL) {
809 iov[0].iov_base = elts[i].key;
810 iov[0].iov_len = strlen(elts[i].key);
811 iov[1].iov_base = ": ";
812 iov[1].iov_len = sizeof(": ") - 1;
813 iov[2].iov_base = elts[i].val;
814 iov[2].iov_len = strlen(elts[i].val);
815 iov[3].iov_base = CRLF;
816 iov[3].iov_len = sizeof(CRLF) - 1;
818 rv = apr_file_writev(fd, (const struct iovec *) &iov, 4,
819 &amt);
820 if (rv != APR_SUCCESS) {
821 return rv;
825 iov[0].iov_base = CRLF;
826 iov[0].iov_len = sizeof(CRLF) - 1;
827 rv = apr_file_writev(fd, (const struct iovec *) &iov, 1,
828 &amt);
829 return rv;
832 static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info)
834 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
835 &disk_cache_module);
836 apr_status_t rv;
837 apr_size_t amt;
838 disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
840 disk_cache_info_t disk_info;
841 struct iovec iov[2];
843 /* This is flaky... we need to manage the cache_info differently */
844 h->cache_obj->info = *info;
846 if (r->headers_out) {
847 const char *tmp;
849 tmp = apr_table_get(r->headers_out, "Vary");
851 if (tmp) {
852 apr_array_header_t* varray;
853 apr_uint32_t format = VARY_FORMAT_VERSION;
855 /* If we were initially opened as a vary format, rollback
856 * that internal state for the moment so we can recreate the
857 * vary format hints in the appropriate directory.
859 if (dobj->prefix) {
860 dobj->hdrsfile = dobj->prefix;
861 dobj->prefix = NULL;
864 rv = mkdir_structure(conf, dobj->hdrsfile, r->pool);
866 rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
867 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
868 r->pool);
870 if (rv != APR_SUCCESS) {
871 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
872 "disk_cache: could not create temp file %s",
873 dobj->tempfile);
874 return rv;
877 amt = sizeof(format);
878 apr_file_write(dobj->tfd, &format, &amt);
880 amt = sizeof(info->expire);
881 apr_file_write(dobj->tfd, &info->expire, &amt);
883 varray = apr_array_make(r->pool, 6, sizeof(char*));
884 tokens_to_array(r->pool, tmp, varray);
886 store_array(dobj->tfd, varray);
888 apr_file_close(dobj->tfd);
890 dobj->tfd = NULL;
892 rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile,
893 r->pool);
894 if (rv != APR_SUCCESS) {
895 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
896 "disk_cache: rename tempfile to varyfile failed: %s -> %s",
897 dobj->tempfile, dobj->hdrsfile);
898 apr_file_remove(dobj->tempfile, r->pool);
899 return rv;
902 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
903 tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
904 dobj->prefix = dobj->hdrsfile;
905 dobj->hashfile = NULL;
906 dobj->datafile = data_file(r->pool, conf, dobj, tmp);
907 dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp);
912 rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
913 APR_CREATE | APR_WRITE | APR_BINARY |
914 APR_BUFFERED | APR_EXCL, r->pool);
916 if (rv != APR_SUCCESS) {
917 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
918 "disk_cache: could not create temp file %s",
919 dobj->tempfile);
920 return rv;
923 disk_info.format = DISK_FORMAT_VERSION;
924 disk_info.date = info->date;
925 disk_info.expire = info->expire;
926 disk_info.entity_version = dobj->disk_info.entity_version++;
927 disk_info.request_time = info->request_time;
928 disk_info.response_time = info->response_time;
929 disk_info.status = info->status;
931 disk_info.name_len = strlen(dobj->name);
933 iov[0].iov_base = (void*)&disk_info;
934 iov[0].iov_len = sizeof(disk_cache_info_t);
935 iov[1].iov_base = (void*)dobj->name;
936 iov[1].iov_len = disk_info.name_len;
938 rv = apr_file_writev(dobj->hfd, (const struct iovec *) &iov, 2, &amt);
939 if (rv != APR_SUCCESS) {
940 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
941 "disk_cache: could not write info to header file %s",
942 dobj->hdrsfile);
943 return rv;
946 if (r->headers_out) {
947 apr_table_t *headers_out;
949 headers_out = ap_cache_cacheable_headers_out(r);
951 rv = store_table(dobj->hfd, headers_out);
952 if (rv != APR_SUCCESS) {
953 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
954 "disk_cache: could not write out-headers to header file %s",
955 dobj->hdrsfile);
956 return rv;
960 /* Parse the vary header and dump those fields from the headers_in. */
961 /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
962 if (r->headers_in) {
963 apr_table_t *headers_in;
965 headers_in = ap_cache_cacheable_headers_in(r);
967 rv = store_table(dobj->hfd, headers_in);
968 if (rv != APR_SUCCESS) {
969 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
970 "disk_cache: could not write in-headers to header file %s",
971 dobj->hdrsfile);
972 return rv;
976 apr_file_close(dobj->hfd); /* flush and close */
978 /* Remove old file with the same name. If remove fails, then
979 * perhaps we need to create the directory tree where we are
980 * about to write the new headers file.
982 rv = apr_file_remove(dobj->hdrsfile, r->pool);
983 if (rv != APR_SUCCESS) {
984 rv = mkdir_structure(conf, dobj->hdrsfile, r->pool);
987 rv = safe_file_rename(conf, dobj->tempfile, dobj->hdrsfile, r->pool);
988 if (rv != APR_SUCCESS) {
989 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, r->server,
990 "disk_cache: rename tempfile to hdrsfile failed: %s -> %s",
991 dobj->tempfile, dobj->hdrsfile);
992 apr_file_remove(dobj->tempfile, r->pool);
993 return rv;
996 dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
998 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
999 "disk_cache: Stored headers for URL %s", dobj->name);
1000 return APR_SUCCESS;
1003 static apr_status_t store_body(cache_handle_t *h, request_rec *r,
1004 apr_bucket_brigade *bb)
1006 apr_bucket *e;
1007 apr_status_t rv;
1008 disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1009 disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
1010 &disk_cache_module);
1012 /* We write to a temp file and then atomically rename the file over
1013 * in file_cache_el_final().
1015 if (!dobj->tfd) {
1016 rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
1017 APR_CREATE | APR_WRITE | APR_BINARY |
1018 APR_BUFFERED | APR_EXCL, r->pool);
1019 if (rv != APR_SUCCESS) {
1020 return rv;
1022 dobj->file_size = 0;
1025 for (e = APR_BRIGADE_FIRST(bb);
1026 e != APR_BRIGADE_SENTINEL(bb);
1027 e = APR_BUCKET_NEXT(e))
1029 const char *str;
1030 apr_size_t length, written;
1031 rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1032 if (rv != APR_SUCCESS) {
1033 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1034 "disk_cache: Error when reading bucket for URL %s",
1035 h->cache_obj->key);
1036 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1037 file_cache_errorcleanup(dobj, r);
1038 return rv;
1040 rv = apr_file_write_full(dobj->tfd, str, length, &written);
1041 if (rv != APR_SUCCESS) {
1042 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
1043 "disk_cache: Error when writing cache file for URL %s",
1044 h->cache_obj->key);
1045 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1046 file_cache_errorcleanup(dobj, r);
1047 return rv;
1049 dobj->file_size += written;
1050 if (dobj->file_size > conf->maxfs) {
1051 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1052 "disk_cache: URL %s failed the size check "
1053 "(%" APR_OFF_T_FMT ">%" APR_OFF_T_FMT ")",
1054 h->cache_obj->key, dobj->file_size, conf->maxfs);
1055 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1056 file_cache_errorcleanup(dobj, r);
1057 return APR_EGENERAL;
1061 /* Was this the final bucket? If yes, close the temp file and perform
1062 * sanity checks.
1064 if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) {
1065 if (r->connection->aborted || r->no_cache) {
1066 ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
1067 "disk_cache: Discarding body for URL %s "
1068 "because connection has been aborted.",
1069 h->cache_obj->key);
1070 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1071 file_cache_errorcleanup(dobj, r);
1072 return APR_EGENERAL;
1074 if (dobj->file_size < conf->minfs) {
1075 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1076 "disk_cache: URL %s failed the size check "
1077 "(%" APR_OFF_T_FMT "<%" APR_OFF_T_FMT ")",
1078 h->cache_obj->key, dobj->file_size, conf->minfs);
1079 /* Remove the intermediate cache file and return non-APR_SUCCESS */
1080 file_cache_errorcleanup(dobj, r);
1081 return APR_EGENERAL;
1084 /* All checks were fine. Move tempfile to final destination */
1085 /* Link to the perm file, and close the descriptor */
1086 file_cache_el_final(dobj, r);
1087 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
1088 "disk_cache: Body for URL %s cached.", dobj->name);
1091 return APR_SUCCESS;
1094 static void *create_config(apr_pool_t *p, server_rec *s)
1096 disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
1098 /* XXX: Set default values */
1099 conf->dirlevels = DEFAULT_DIRLEVELS;
1100 conf->dirlength = DEFAULT_DIRLENGTH;
1101 conf->maxfs = DEFAULT_MAX_FILE_SIZE;
1102 conf->minfs = DEFAULT_MIN_FILE_SIZE;
1104 conf->cache_root = NULL;
1105 conf->cache_root_len = 0;
1107 return conf;
1111 * mod_disk_cache configuration directives handlers.
1113 static const char
1114 *set_cache_root(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1116 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1117 &disk_cache_module);
1118 conf->cache_root = arg;
1119 conf->cache_root_len = strlen(arg);
1120 /* TODO: canonicalize cache_root and strip off any trailing slashes */
1122 return NULL;
1126 * Consider eliminating the next two directives in favor of
1127 * Ian's prime number hash...
1128 * key = hash_fn( r->uri)
1129 * filename = "/key % prime1 /key %prime2/key %prime3"
1131 static const char
1132 *set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1134 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1135 &disk_cache_module);
1136 int val = atoi(arg);
1137 if (val < 1)
1138 return "CacheDirLevels value must be an integer greater than 0";
1139 if (val * conf->dirlength > CACHEFILE_LEN)
1140 return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1141 conf->dirlevels = val;
1142 return NULL;
1144 static const char
1145 *set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1147 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1148 &disk_cache_module);
1149 int val = atoi(arg);
1150 if (val < 1)
1151 return "CacheDirLength value must be an integer greater than 0";
1152 if (val * conf->dirlevels > CACHEFILE_LEN)
1153 return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1155 conf->dirlength = val;
1156 return NULL;
1159 static const char
1160 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1162 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1163 &disk_cache_module);
1165 if (apr_strtoff(&conf->minfs, arg, NULL, 0) != APR_SUCCESS ||
1166 conf->minfs < 0)
1168 return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1170 return NULL;
1173 static const char
1174 *set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1176 disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1177 &disk_cache_module);
1178 if (apr_strtoff(&conf->maxfs, arg, NULL, 0) != APR_SUCCESS ||
1179 conf->maxfs < 0)
1181 return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1183 return NULL;
1186 static const command_rec disk_cache_cmds[] =
1188 AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
1189 "The directory to store cache files"),
1190 AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
1191 "The number of levels of subdirectories in the cache"),
1192 AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
1193 "The number of characters in subdirectory names"),
1194 AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
1195 "The minimum file size to cache a document"),
1196 AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
1197 "The maximum file size to cache a document"),
1198 {NULL}
1201 static const cache_provider cache_disk_provider =
1203 &remove_entity,
1204 &store_headers,
1205 &store_body,
1206 &recall_headers,
1207 &recall_body,
1208 &create_entity,
1209 &open_entity,
1210 &remove_url,
1213 static void disk_cache_register_hook(apr_pool_t *p)
1215 /* cache initializer */
1216 ap_register_provider(p, CACHE_PROVIDER_GROUP, "disk", "0",
1217 &cache_disk_provider);
1220 module AP_MODULE_DECLARE_DATA disk_cache_module = {
1221 STANDARD20_MODULE_STUFF,
1222 NULL, /* create per-directory config structure */
1223 NULL, /* merge per-directory config structures */
1224 create_config, /* create per-server config structure */
1225 NULL, /* merge per-server config structures */
1226 disk_cache_cmds, /* command apr_table_t */
1227 disk_cache_register_hook /* register hooks */