enabled block processing properly.
[httpd-crcsyncproxy.git] / modules / cache / cache_storage.c
blobf19375b9529ec848e05ecab37f467b734f4f5a5b
1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "mod_cache.h"
19 extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
21 extern module AP_MODULE_DECLARE_DATA cache_module;
23 /* -------------------------------------------------------------- */
26 * delete all URL entities from the cache
29 int cache_remove_url(cache_request_rec *cache, apr_pool_t *p)
31 cache_provider_list *list;
32 cache_handle_t *h;
34 list = cache->providers;
36 /* Remove the stale cache entry if present. If not, we're
37 * being called from outside of a request; remove the
38 * non-stalle handle.
40 h = cache->stale_handle ? cache->stale_handle : cache->handle;
41 if (!h) {
42 return OK;
44 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
45 "cache: Removing url %s from the cache", h->cache_obj->key);
47 /* for each specified cache type, delete the URL */
48 while(list) {
49 list->provider->remove_url(h, p);
50 list = list->next;
52 return OK;
57 * create a new URL entity in the cache
59 * It is possible to store more than once entity per URL. This
60 * function will always create a new entity, regardless of whether
61 * other entities already exist for the same URL.
63 * The size of the entity is provided so that a cache module can
64 * decide whether or not it wants to cache this particular entity.
65 * If the size is unknown, a size of -1 should be set.
67 int cache_create_entity(request_rec *r, apr_off_t size)
69 cache_provider_list *list;
70 cache_handle_t *h = apr_pcalloc(r->pool, sizeof(cache_handle_t));
71 char *key;
72 apr_status_t rv;
73 cache_request_rec *cache = (cache_request_rec *)
74 ap_get_module_config(r->request_config, &cache_module);
76 rv = cache_generate_key(r, r->pool, &key);
77 if (rv != APR_SUCCESS) {
78 return rv;
81 list = cache->providers;
82 /* for each specified cache type, delete the URL */
83 while (list) {
84 switch (rv = list->provider->create_entity(h, r, key, size)) {
85 case OK: {
86 cache->handle = h;
87 cache->provider = list->provider;
88 cache->provider_name = list->provider_name;
89 return OK;
91 case DECLINED: {
92 list = list->next;
93 continue;
95 default: {
96 return rv;
100 return DECLINED;
103 static int set_cookie_doo_doo(void *v, const char *key, const char *val)
105 apr_table_addn(v, key, val);
106 return 1;
109 CACHE_DECLARE(void) ap_cache_accept_headers(cache_handle_t *h, request_rec *r,
110 int preserve_orig)
112 apr_table_t *cookie_table, *hdr_copy;
113 const char *v;
115 v = apr_table_get(h->resp_hdrs, "Content-Type");
116 if (v) {
117 ap_set_content_type(r, v);
118 apr_table_unset(h->resp_hdrs, "Content-Type");
120 * Also unset possible Content-Type headers in r->headers_out and
121 * r->err_headers_out as they may be different to what we have received
122 * from the cache.
123 * Actually they are not needed as r->content_type set by
124 * ap_set_content_type above will be used in the store_headers functions
125 * of the storage providers as a fallback and the HTTP_HEADER filter
126 * does overwrite the Content-Type header with r->content_type anyway.
128 apr_table_unset(r->headers_out, "Content-Type");
129 apr_table_unset(r->err_headers_out, "Content-Type");
132 /* If the cache gave us a Last-Modified header, we can't just
133 * pass it on blindly because of restrictions on future values.
135 v = apr_table_get(h->resp_hdrs, "Last-Modified");
136 if (v) {
137 ap_update_mtime(r, apr_date_parse_http(v));
138 ap_set_last_modified(r);
139 apr_table_unset(h->resp_hdrs, "Last-Modified");
142 /* The HTTP specification says that it is legal to merge duplicate
143 * headers into one. Some browsers that support Cookies don't like
144 * merged headers and prefer that each Set-Cookie header is sent
145 * separately. Lets humour those browsers by not merging.
146 * Oh what a pain it is.
148 cookie_table = apr_table_make(r->pool, 2);
149 apr_table_do(set_cookie_doo_doo, cookie_table, r->err_headers_out,
150 "Set-Cookie", NULL);
151 apr_table_do(set_cookie_doo_doo, cookie_table, h->resp_hdrs,
152 "Set-Cookie", NULL);
153 apr_table_unset(r->err_headers_out, "Set-Cookie");
154 apr_table_unset(h->resp_hdrs, "Set-Cookie");
156 if (preserve_orig) {
157 hdr_copy = apr_table_copy(r->pool, h->resp_hdrs);
158 apr_table_overlap(hdr_copy, r->headers_out, APR_OVERLAP_TABLES_SET);
159 r->headers_out = hdr_copy;
161 else {
162 apr_table_overlap(r->headers_out, h->resp_hdrs, APR_OVERLAP_TABLES_SET);
164 if (!apr_is_empty_table(cookie_table)) {
165 r->err_headers_out = apr_table_overlay(r->pool, r->err_headers_out,
166 cookie_table);
171 * select a specific URL entity in the cache
173 * It is possible to store more than one entity per URL. Content
174 * negotiation is used to select an entity. Once an entity is
175 * selected, details of it are stored in the per request
176 * config to save time when serving the request later.
178 * This function returns OK if successful, DECLINED if no
179 * cached entity fits the bill.
181 int cache_select(request_rec *r)
183 cache_provider_list *list;
184 apr_status_t rv;
185 cache_handle_t *h;
186 char *key;
187 cache_request_rec *cache = (cache_request_rec *)
188 ap_get_module_config(r->request_config, &cache_module);
190 rv = cache_generate_key(r, r->pool, &key);
191 if (rv != APR_SUCCESS) {
192 return rv;
194 /* go through the cache types till we get a match */
195 h = apr_palloc(r->pool, sizeof(cache_handle_t));
197 list = cache->providers;
199 while (list) {
200 switch ((rv = list->provider->open_entity(h, r, key))) {
201 case OK: {
202 char *vary = NULL;
203 int fresh;
205 if (list->provider->recall_headers(h, r) != APR_SUCCESS) {
206 /* TODO: Handle this error */
207 return DECLINED;
211 * Check Content-Negotiation - Vary
213 * At this point we need to make sure that the object we found in
214 * the cache is the same object that would be delivered to the
215 * client, when the effects of content negotiation are taken into
216 * effect.
218 * In plain english, we want to make sure that a language-negotiated
219 * document in one language is not given to a client asking for a
220 * language negotiated document in a different language by mistake.
222 * This code makes the assumption that the storage manager will
223 * cache the req_hdrs if the response contains a Vary
224 * header.
226 * RFC2616 13.6 and 14.44 describe the Vary mechanism.
228 vary = apr_pstrdup(r->pool, apr_table_get(h->resp_hdrs, "Vary"));
229 while (vary && *vary) {
230 char *name = vary;
231 const char *h1, *h2;
233 /* isolate header name */
234 while (*vary && !apr_isspace(*vary) && (*vary != ','))
235 ++vary;
236 while (*vary && (apr_isspace(*vary) || (*vary == ','))) {
237 *vary = '\0';
238 ++vary;
242 * is this header in the request and the header in the cached
243 * request identical? If not, we give up and do a straight get
245 h1 = apr_table_get(r->headers_in, name);
246 h2 = apr_table_get(h->req_hdrs, name);
247 if (h1 == h2) {
248 /* both headers NULL, so a match - do nothing */
250 else if (h1 && h2 && !strcmp(h1, h2)) {
251 /* both headers exist and are equal - do nothing */
253 else {
254 /* headers do not match, so Vary failed */
255 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
256 r->server,
257 "cache_select_url(): Vary header mismatch.");
258 return DECLINED;
262 cache->provider = list->provider;
263 cache->provider_name = list->provider_name;
265 /* Is our cached response fresh enough? */
266 fresh = ap_cache_check_freshness(h, r);
267 if (!fresh) {
268 const char *etag, *lastmod;
270 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
271 "Cached response for %s isn't fresh. Adding/replacing "
272 "conditional request headers.", r->uri);
274 /* Make response into a conditional */
275 cache->stale_headers = apr_table_copy(r->pool,
276 r->headers_in);
278 /* We can only revalidate with our own conditionals: remove the
279 * conditions from the original request.
281 apr_table_unset(r->headers_in, "If-Match");
282 apr_table_unset(r->headers_in, "If-Modified-Since");
283 apr_table_unset(r->headers_in, "If-None-Match");
284 apr_table_unset(r->headers_in, "If-Range");
285 apr_table_unset(r->headers_in, "If-Unmodified-Since");
288 * Do not do Range requests with our own conditionals: If
289 * we get 304 the Range does not matter and otherwise the
290 * entity changed and we want to have the complete entity
292 apr_table_unset(r->headers_in, "Range");
294 etag = apr_table_get(h->resp_hdrs, "ETag");
295 lastmod = apr_table_get(h->resp_hdrs, "Last-Modified");
297 if (etag || lastmod) {
298 /* If we have a cached etag and/or Last-Modified add in
299 * our own conditionals.
302 if (etag) {
303 apr_table_set(r->headers_in, "If-None-Match", etag);
306 if (lastmod) {
307 apr_table_set(r->headers_in, "If-Modified-Since",
308 lastmod);
310 cache->stale_handle = h;
312 else {
313 int irv;
316 * The copy isn't fresh enough, but we cannot revalidate.
317 * So it is the same case as if there had not been a cached
318 * entry at all. Thus delete the entry from cache.
320 irv = cache->provider->remove_url(h, r->pool);
321 if (irv != OK) {
322 ap_log_error(APLOG_MARK, APLOG_DEBUG, irv, r->server,
323 "cache: attempt to remove url from cache unsuccessful.");
327 return DECLINED;
330 /* Okay, this response looks okay. Merge in our stuff and go. */
331 ap_cache_accept_headers(h, r, 0);
333 cache->handle = h;
334 return OK;
336 case DECLINED: {
337 /* try again with next cache type */
338 list = list->next;
339 continue;
341 default: {
342 /* oo-er! an error */
343 return rv;
347 return DECLINED;
350 apr_status_t cache_generate_key_default(request_rec *r, apr_pool_t* p,
351 char**key)
353 cache_server_conf *conf;
354 cache_request_rec *cache;
355 char *port_str, *hn, *lcs;
356 const char *hostname, *scheme;
357 int i;
359 cache = (cache_request_rec *) ap_get_module_config(r->request_config,
360 &cache_module);
361 if (!cache) {
362 /* This should never happen */
363 ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
364 "cache: No cache request information available for key"
365 " generation");
366 *key = "";
367 return APR_EGENERAL;
369 if (cache->key) {
371 * We have been here before during the processing of this request.
372 * So return the key we already have.
374 *key = apr_pstrdup(p, cache->key);
375 return APR_SUCCESS;
379 * Get the module configuration. We need this for the CacheIgnoreQueryString
380 * option below.
382 conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
383 &cache_module);
386 * Use the canonical name to improve cache hit rate, but only if this is
387 * not a proxy request or if this is a reverse proxy request.
388 * We need to handle both cases in the same manner as for the reverse proxy
389 * case we have the following situation:
391 * If a cached entry is looked up by mod_cache's quick handler r->proxyreq
392 * is still unset in the reverse proxy case as it only gets set in the
393 * translate name hook (either by ProxyPass or mod_rewrite) which is run
394 * after the quick handler hook. This is different to the forward proxy
395 * case where it gets set before the quick handler is run (in the
396 * post_read_request hook).
397 * If a cache entry is created by the CACHE_SAVE filter we always have
398 * r->proxyreq set correctly.
399 * So we must ensure that in the reverse proxy case we use the same code
400 * path and using the canonical name seems to be the right thing to do
401 * in the reverse proxy case.
403 if (!r->proxyreq || (r->proxyreq == PROXYREQ_REVERSE)) {
404 /* Use _default_ as the hostname if none present, as in mod_vhost */
405 hostname = ap_get_server_name(r);
406 if (!hostname) {
407 hostname = "_default_";
410 else if(r->parsed_uri.hostname) {
411 /* Copy the parsed uri hostname */
412 hn = apr_pstrdup(p, r->parsed_uri.hostname);
413 ap_str_tolower(hn);
414 /* const work-around */
415 hostname = hn;
417 else {
418 /* We are a proxied request, with no hostname. Unlikely
419 * to get very far - but just in case */
420 hostname = "_default_";
424 * Copy the scheme, ensuring that it is lower case. If the parsed uri
425 * contains no string or if this is not a proxy request get the http
426 * scheme for this request. As r->parsed_uri.scheme is not set if this
427 * is a reverse proxy request, it is ensured that the cases
428 * "no proxy request" and "reverse proxy request" are handled in the same
429 * manner (see above why this is needed).
431 if (r->proxyreq && r->parsed_uri.scheme) {
432 /* Copy the scheme and lower-case it */
433 lcs = apr_pstrdup(p, r->parsed_uri.scheme);
434 ap_str_tolower(lcs);
435 /* const work-around */
436 scheme = lcs;
438 else {
439 scheme = ap_http_scheme(r);
443 * If this is a proxy request, but not a reverse proxy request (see comment
444 * above why these cases must be handled in the same manner), copy the
445 * URI's port-string (which may be a service name). If the URI contains
446 * no port-string, use apr-util's notion of the default port for that
447 * scheme - if available. Otherwise use the port-number of the current
448 * server.
450 if(r->proxyreq && (r->proxyreq != PROXYREQ_REVERSE)) {
451 if (r->parsed_uri.port_str) {
452 port_str = apr_pcalloc(p, strlen(r->parsed_uri.port_str) + 2);
453 port_str[0] = ':';
454 for (i = 0; r->parsed_uri.port_str[i]; i++) {
455 port_str[i + 1] = apr_tolower(r->parsed_uri.port_str[i]);
458 else if (apr_uri_port_of_scheme(scheme)) {
459 port_str = apr_psprintf(p, ":%u", apr_uri_port_of_scheme(scheme));
461 else {
462 /* No port string given in the AbsoluteUri, and we have no
463 * idea what the default port for the scheme is. Leave it
464 * blank and live with the inefficiency of some extra cached
465 * entities.
467 port_str = "";
470 else {
471 /* Use the server port */
472 port_str = apr_psprintf(p, ":%u", ap_get_server_port(r));
475 /* Key format is a URI, optionally without the query-string */
476 if (conf->ignorequerystring) {
477 *key = apr_pstrcat(p, scheme, "://", hostname, port_str,
478 r->parsed_uri.path, "?", NULL);
480 else {
481 *key = apr_pstrcat(p, scheme, "://", hostname, port_str,
482 r->parsed_uri.path, "?", r->parsed_uri.query, NULL);
486 * Store the key in the request_config for the cache as r->parsed_uri
487 * might have changed in the time from our first visit here triggered by the
488 * quick handler and our possible second visit triggered by the CACHE_SAVE
489 * filter (e.g. r->parsed_uri got unescaped). In this case we would save the
490 * resource in the cache under a key where it is never found by the quick
491 * handler during following requests.
493 cache->key = apr_pstrdup(r->pool, *key);
495 return APR_SUCCESS;