1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 * Find a page for a similar URL as the newly requested page
17 * Created on: 02/08/2010
23 #include <apr_strings.h>
36 #ifdef AP_NEED_SET_MUTEX_PERMS
41 #include "mod_crccache_client_find_similar.h"
42 #include "ap_log_helper.h"
45 RMM_OFF_T_DECLARE(char);
47 typedef struct vary_headers_s vary_headers_t
;
48 RMM_OFF_T_DECLARE(vary_headers_t
);
49 struct vary_headers_s
{
50 RMM_OFF_T(vary_headers_t
) next
;
52 RMM_OFF_T(char) value
;
56 typedef struct cached_files_info_s cached_files_info_t
;
57 RMM_OFF_T_DECLARE(cached_files_info_t
);
58 struct cached_files_info_s
{
59 RMM_OFF_T(cached_files_info_t
) prev
;
60 RMM_OFF_T(cached_files_info_t
) next
;
61 RMM_OFF_T(char) basepath
; // Path without .header or .data postfix
62 RMM_OFF_T(char) uri
; // URI of the page (useful for logging purposes)
63 RMM_OFF_T(vary_headers_t
) vary_headers
;
66 typedef struct sp_per_content_type_s sp_per_content_type_t
;
67 RMM_OFF_T_DECLARE(sp_per_content_type_t
);
68 struct sp_per_content_type_s
{
69 RMM_OFF_T(sp_per_content_type_t
) next
;
70 RMM_OFF_T(char) content_type
;
71 RMM_OFF_T(cached_files_info_t
) cached_files_info
;
72 RMM_OFF_T(rmm_hash_t
) cached_files_info_by_path
;
73 RMM_OFF_T(cached_files_info_t
) tail_file_info
;
76 typedef struct sp_per_regex_s sp_per_regex_t
;
77 RMM_OFF_T_DECLARE(sp_per_regex_t
);
78 struct sp_per_regex_s
{
79 RMM_OFF_T(sp_per_regex_t
) next
;
80 /* The regex parameter stored here is the non-compiled regex string.
81 * The compiled version must be cached in a per-process cache pool.
82 * Reason is that the ap_regex compiler allocates an internal structure
83 * for the compiled data using malloc. The ap_preg structure does not provide
84 * any info about that internal structure (like the length) and as such,
85 * the internal structure can not be transferred to the shared memory :-(
87 RMM_OFF_T(char) regex
;
89 RMM_OFF_T(sp_per_content_type_t
) similar_pages_per_content_type
;
92 RMM_OFF_T_DECLARE(int);
93 struct similar_page_cache_s
{
94 const char* cache_root
;
95 apr_size_t cache_root_len
;
97 apr_global_mutex_t
*fs_cache_lock
;
98 apr_size_t cache_bytes
; /* Size (in bytes) of shared memory cache */
99 #if APR_HAS_SHARED_MEMORY
103 RMM_OFF_T(rmm_hash_t
) similar_pages_per_host
;
104 const char *cache_file
; /* filename for shm backing cache file */
105 const char *lock_file
; /* filename for shm lock mutex */
106 RMM_OFF_T(int) lock_is_available
; /* lock is available in all threads/subprocesses */
107 apr_hash_t
*similar_pages_regexs
; /* compiled regular expressions for similar pages */
108 RMM_OFF_T(rmm_hash_t
) vary_headers_cache
;
109 int similar_pages_cache_initialized
;
113 * Returns 1 when the lock is available in all threads/subprocesses and 0 otherwise
115 static int is_lock_available(similar_page_cache_t
*sp_cache
)
117 return *APR_RMM_ADDR_GET(int, sp_cache
->rmm
, sp_cache
->lock_is_available
);
121 * Duplicate a string value into the a memory segment allocated from the relocatable memory.
122 * Returns: RMM_OFF_NULL on memory allocation error
123 * offset of duplicated string when all fine
125 static RMM_OFF_T(char) rmm_strdup(apr_rmm_t
*rmm
, const char *value
)
127 size_t valuelen
= strlen(value
);
128 RMM_OFF_T(char) rslt
= apr_rmm_malloc(rmm
, valuelen
+1);
129 if (rslt
== RMM_OFF_NULL
)
133 memcpy(APR_RMM_ADDR_GET(char, rmm
, rslt
), value
, valuelen
+1);
137 static apr_status_t
similar_page_cache_kill(void *data
)
139 similar_page_cache_t
*sp_cache
= data
;
141 sp_cache
->similar_pages_cache_initialized
= 0;
142 if (sp_cache
->rmm
!= NULL
)
144 apr_rmm_destroy(sp_cache
->rmm
);
145 sp_cache
->rmm
= NULL
;
147 #if APR_HAS_SHARED_MEMORY
148 if (sp_cache
->shm
!= NULL
) {
149 apr_status_t result
= apr_shm_destroy(sp_cache
->shm
);
150 sp_cache
->shm
= NULL
;
160 } compiled_regex_info_t
;
162 static int fsp_regex_match(request_rec
*r
, const char *regex
, const char *uri_key
, similar_page_cache_t
*sp_cache
)
164 if (sp_cache
->similar_pages_regexs
== NULL
) {
165 sp_cache
->similar_pages_regexs
= apr_hash_make(r
->server
->process
->pool
);
166 if (sp_cache
->similar_pages_regexs
== NULL
)
168 // Not enough memory to cache the regexs, so probably also not enough memory to
169 // compile the regex.
170 return 0; // Return a mismatch
173 compiled_regex_info_t
*regex_info
= (compiled_regex_info_t
*)apr_hash_get(sp_cache
->similar_pages_regexs
, regex
, APR_HASH_KEY_STRING
);
174 if (regex_info
== NULL
)
176 regex_info
= apr_palloc(r
->server
->process
->pool
, sizeof(compiled_regex_info_t
));
177 if (regex_info
== NULL
)
179 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "Could not allocate memory for regex_info");
180 return 0; // Return a mismatch
182 regex_info
->preg
= apr_palloc(r
->server
->process
->pool
, sizeof(ap_regex_t
));
183 if (regex_info
->preg
== NULL
)
185 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "Could not allocate memory for regex_info->preg");
186 return 0; // Return a mismatch
188 int rslt
= ap_regcomp(regex_info
->preg
, regex
, 0);
191 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
, "Could not compile regexp %s, return code: %d", regex
, rslt
);
192 regex_info
->compiled
= 0;
196 regex_info
->compiled
= 1;
198 // Store the 'compiled' regex even when the compilation failed. This prevents the same warning from re-appearing. Otherwise, the
199 // compilation will fail on each request for a page that might match this regex.
200 apr_hash_set(sp_cache
->similar_pages_regexs
, regex
, APR_HASH_KEY_STRING
, regex_info
);
202 if (regex_info
->compiled
)
204 return ap_regexec(regex_info
->preg
, uri_key
, 0, NULL
, AP_REG_ICASE
) == 0;
206 return 0; // Compilation of regex has failed at least once. Return a mismatch
209 /*****************************************************************
210 * Record of available info on a media type specified by the client
211 * (we also use 'em for encodings and languages)
213 * - Taken from mod_negotation.c
215 typedef struct accept_rec
{
216 char *name
; /* MUST be lowercase */
219 char *charset
; /* for content-type only */
222 /*****************************************************************
223 * parse quality value. atof(3) is not well-usable here, because it
224 * depends on the locale (argh).
226 * However, RFC 2616 states:
229 * [...] HTTP/1.1 applications MUST NOT generate more than three digits
230 * after the decimal point. User configuration of these values SHOULD also
231 * be limited in this fashion.
233 * qvalue = ( "0" [ "." 0*3DIGIT ] )
234 * | ( "1" [ "." 0*3("0") ] )
236 * This is quite easy. If the supplied string doesn't match the above
237 * definition (loosely), we simply return 1 (same as if there's no qvalue)
239 * - Taken from mod_negotation.c
241 static float atoq(const char *string
)
243 if (!string
|| !*string
) {
247 while (*string
&& apr_isspace(*string
)) {
251 /* be tolerant and accept qvalues without leading zero
252 * (also for backwards compat, where atof() was in use)
254 if (*string
!= '.' && *string
++ != '0') {
258 if (*string
== '.') {
259 /* better only one division later, than dealing with fscking
260 * IEEE format 0.1 factors ...
264 if (*++string
>= '0' && *string
<= '9') {
265 i
+= (*string
- '0') * 100;
267 if (*++string
>= '0' && *string
<= '9') {
268 i
+= (*string
- '0') * 10;
270 if (*++string
> '0' && *string
<= '9') {
271 i
+= (*string
- '0');
276 return (float)i
/ 1000.0f
;
282 /*****************************************************************
283 * Get a single mime type entry --- one media type and parameters;
284 * enter the values we recognize into the argument accept_rec
286 * - Taken from mod_negotation.c
288 static const char *get_accept_entry(apr_pool_t
*p
, accept_rec
*result
,
289 const char *accept_line
)
291 result
->quality
= 1.0f
;
292 result
->level
= 0.0f
;
293 result
->charset
= "";
296 * Note that this handles what I gather is the "old format",
298 * Accept: text/html text/plain moo/zot
300 * without any compatibility kludges --- if the token after the
301 * MIME type begins with a semicolon, we know we're looking at parms,
302 * otherwise, we know we aren't. (So why all the pissing and moaning
303 * in the CERN server code? I must be missing something).
306 result
->name
= ap_get_token(p
, &accept_line
, 0);
307 ap_str_tolower(result
->name
); /* You want case insensitive,
308 * you'll *get* case insensitive.
311 /* KLUDGE!!! Default HTML to level 2.0 unless the browser
312 * *explicitly* says something else.
315 if (!strcmp(result
->name
, "text/html") && (result
->level
== 0.0)) {
316 result
->level
= 2.0f
;
318 else if (!strcmp(result
->name
, INCLUDES_MAGIC_TYPE
)) {
319 result
->level
= 2.0f
;
321 else if (!strcmp(result
->name
, INCLUDES_MAGIC_TYPE3
)) {
322 result
->level
= 3.0f
;
325 while (*accept_line
== ';') {
333 parm
= ap_get_token(p
, &accept_line
, 1);
335 /* Look for 'var = value' --- and make sure the var is in lcase. */
337 for (cp
= parm
; (*cp
&& !apr_isspace(*cp
) && *cp
!= '='); ++cp
) {
338 *cp
= apr_tolower(*cp
);
342 continue; /* No '='; just ignore it. */
345 *cp
++ = '\0'; /* Delimit var */
346 while (*cp
&& (apr_isspace(*cp
) || *cp
== '=')) {
353 (*end
&& *end
!= '\n' && *end
!= '\r' && *end
!= '\"');
357 for (end
= cp
; (*end
&& !apr_isspace(*end
)); end
++);
360 *end
= '\0'; /* strip ending quote or return */
365 && (parm
[1] == '\0' || (parm
[1] == 's' && parm
[2] == '\0'))) {
366 result
->quality
= atoq(cp
);
368 else if (parm
[0] == 'l' && !strcmp(&parm
[1], "evel")) {
369 result
->level
= (float)atoi(cp
);
371 else if (!strcmp(parm
, "charset")) {
372 result
->charset
= cp
;
376 if (*accept_line
== ',') {
384 /*****************************************************************
385 * Dealing with Accept... header lines ...
386 * Accept, Accept-Charset, Accept-Language and Accept-Encoding
387 * are handled by do_header_line() - they all have the same
388 * basic structure of a list of items of the format
389 * name; q=N; charset=TEXT
391 * where charset is only valid in Accept.
393 * - Taken from mod_negotation.c
395 static apr_array_header_t
*parse_accept_line(apr_pool_t
*p
,
396 const char *accept_line
)
398 apr_array_header_t
*accept_recs
;
404 accept_recs
= apr_array_make(p
, 40, sizeof(accept_rec
));
405 if (accept_recs
== NULL
)
407 return NULL
; // Nothing to allocate
409 while (*accept_line
) {
410 accept_rec
*new = (accept_rec
*) apr_array_push(accept_recs
);
411 accept_line
= get_accept_entry(p
, new, accept_line
);
412 if (!strcmp(new->name
, "*/*"))
414 apr_array_pop(accept_recs
); // Discard this entry
422 static int match_accept_type_vs_mime_type(const char *mime_type
, const char *accept_type
)
424 while (*mime_type
&& *accept_type
&& *mime_type
== *accept_type
)
429 return (*mime_type
== 0 && *accept_type
== 0) || (*accept_type
== '*');
433 // TODO: Refine. Current logic is simplistic. It only checks the mime-type part of the content-type
434 // header of the cached page (e.g. it ignores the charset) and furthermore, it ignores
435 // the 'quality'/'level' indicates in the accept header. The function returns true
436 // if the mime-type of the cached page matches at least one of the content-types indicated
437 // in the accept header
438 // Note that the foundation for more fine-grained logic has been laid. The accept-header
439 // is parsed and broken down in all the constituting elements, using code copied from
440 // module mod-negotation
441 static int fsp_accept_matches_content_type(similar_page_cache_t
*sp_cache
,
442 request_rec
*r
, RMM_OFF_T(char) content_type
)
444 apr_array_header_t
*accepts
= parse_accept_line(r
->pool
, apr_table_get(r
->headers_in
, ACCEPT_HEADER
));
445 const char *content_type_line
= APR_RMM_ADDR_GET(char, sp_cache
->rmm
, content_type
);
449 return 0; // Can't validate content type versus accept header
451 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
452 "Comparing content type line %s versus accept line %s",
453 content_type_line
, apr_table_get(r
->headers_in
, ACCEPT_HEADER
));
455 // Only look at the mime-type (e.g. text/html) of the content-type line.
456 // Discard any other parameters like the charset
457 char *mime_type
= ap_get_token(r
->pool
, &content_type_line
, 0);
458 ap_str_tolower(mime_type
);
460 accept_rec
*accept_elts
= (accept_rec
*)accepts
->elts
;
462 for (cnt
= 0; cnt
!= accepts
->nelts
; cnt
++)
464 const char *accept_type
= accept_elts
[cnt
].name
;
465 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
466 "Comparing mime type %s versus accept type %s", mime_type
, accept_type
);
467 if (match_accept_type_vs_mime_type(mime_type
, accept_type
))
469 return 1; // A good-enough match found. Use this page.
472 return 0; // No match found.Skip this page
475 static void clear_rmm_field(apr_rmm_t
*rmm
, apr_rmm_off_t
*offset_ptr
)
477 if (*offset_ptr
!= RMM_OFF_NULL
) {
478 apr_rmm_free(rmm
, *offset_ptr
);
479 *offset_ptr
= RMM_OFF_NULL
;
484 * Free all memory used by a cached_files_info_t structure
485 * Be aware that this function might get called while the structure is not yet complete. E.g.
486 * it gets called when an out-of-memory condition occurs during the construction
488 static void free_cached_files_info(apr_rmm_t
*rmm
, sp_per_content_type_t
*sp_per_ct_physical
, RMM_OFF_T(cached_files_info_t
) cached_file_info
)
490 cached_files_info_t
*cfi_physical
= APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, cached_file_info
);
492 // Delete the entry from the hash table
493 if (sp_per_ct_physical
->cached_files_info_by_path
!= RMM_OFF_NULL
&& cfi_physical
->basepath
!= RMM_OFF_NULL
) {
494 rmm_hash_set(rmm
, sp_per_ct_physical
->cached_files_info_by_path
, cfi_physical
->basepath
, APR_HASH_KEY_STRING
, RMM_OFF_NULL
);
497 // Update the tail entry if this was the tail entry
498 if (cached_file_info
== sp_per_ct_physical
->tail_file_info
) {
499 sp_per_ct_physical
->tail_file_info
= cfi_physical
->prev
;
502 // Remove the entry from the (double-linked) list
503 if (cfi_physical
->next
!= RMM_OFF_NULL
) {
504 APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, cfi_physical
->next
)->prev
= cfi_physical
->prev
;
506 if (cfi_physical
->prev
!= RMM_OFF_NULL
) {
507 APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, cfi_physical
->prev
)->next
= cfi_physical
->next
;
510 sp_per_ct_physical
->cached_files_info
= cfi_physical
->next
;
513 clear_rmm_field(rmm
, &cfi_physical
->basepath
);
514 clear_rmm_field(rmm
, &cfi_physical
->uri
);
515 apr_rmm_free(rmm
, cached_file_info
);
519 * Verify if the cached file contains a vary header. If yes, then match the headers in the request with
520 * the corresponding headers in the cached page.
521 * Returns true if there is no vary header or if the vary headers match correctly
522 * TODO: refine the logic to match the header values. According to the RFC, the comparison may
523 * ignore white-space characters in the header values (accordingly to the BNF/syntax of that specific header...).
524 * At the moment, the header values are compared literally, so in theory, this comparison is too restrictive.
526 static int match_vary_headers(similar_page_cache_t
*sp_cache
, request_rec
*r
, RMM_OFF_T(vary_headers_t
)vary_headers
)
528 if (vary_headers
== RMM_OFF_NULL
) {
529 return 1; // The cached page did not specify vary header, so the new request matches by definition
531 apr_rmm_t
*rmm
= sp_cache
->rmm
;
532 while (vary_headers
!= RMM_OFF_NULL
) {
533 vary_headers_t
*vary_headers_physical
= APR_RMM_ADDR_GET(vary_headers_t
, rmm
, vary_headers
);
534 const char *headername
= APR_RMM_ADDR_GET(char, rmm
, vary_headers_physical
->name
);
535 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "Comparing vary header %s", headername
);
536 if (strcmp(headername
, "*") == 0) {
537 // The special 'header name' * signifies that the server always varies stuff in an undisclosed manner.
538 // The similar page matching will probably yield bad results. Ignore this page.
541 const char *cached_headervalue
= (vary_headers_physical
->value
== RMM_OFF_NULL
) ?
542 NULL
: APR_RMM_ADDR_GET(char, rmm
, vary_headers_physical
->value
);
543 const char *req_headervalue
= apr_table_get(r
->headers_in
, headername
);
544 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "Cached value: %s, request value: %s",
545 cached_headervalue
, req_headervalue
);
546 if (req_headervalue
== NULL
&& cached_headervalue
!= NULL
) {
547 return 0; // Expecting a value but did not get one
549 if (req_headervalue
!= NULL
&& cached_headervalue
== NULL
) {
550 return 0; // Expecting empty header but got a value
552 if (req_headervalue
!= NULL
&& strcmp(req_headervalue
, cached_headervalue
) != 0) {
553 return 0; // The new and old header value differ
555 vary_headers
= vary_headers_physical
->next
;
557 return 1; // All vary headers are the same
561 * Try to open the file indicated in cfi_physical structure
562 * Returns APR_SUCCESS if the file was successfully opened, in which case the dobj structure
563 * will have been properly updated.
564 * Returns other error codes in case of problems.
565 * WARNING: When the file no longer exists, the structure cfi_physical will be deleted from memory and
566 * from the linked-list. It means that the caller should evaluate cfi_physical->next *before* invoking
569 static apr_status_t
open_cached_file(disk_cache_object_t
*dobj
, request_rec
*r
,
570 similar_page_cache_t
*sp_cache
, sp_per_content_type_t
*sp_per_ct_physical
,
571 RMM_OFF_T(cached_files_info_t
) cached_file_info
)
573 apr_rmm_t
*rmm
= sp_cache
->rmm
;
574 cached_files_info_t
*cfi_physical
= APR_RMM_ADDR_GET(cached_files_info_t
, sp_cache
->rmm
, cached_file_info
);
575 const char *fullpath
= apr_pstrcat(r
->pool
, sp_cache
->cache_root
, "/",
576 APR_RMM_ADDR_GET(char, rmm
, cfi_physical
->basepath
), CACHE_DATA_SUFFIX
, NULL
);
577 int flags
= APR_READ
|APR_BINARY
;
578 #ifdef APR_SENDFILE_ENABLED
579 flags
|= APR_SENDFILE_ENABLED
;
581 apr_status_t rc
= apr_file_open(&dobj
->fd
, fullpath
, flags
, 0, r
->pool
);
582 if (rc
== APR_SUCCESS
)
584 // Successfully opened the file. Try to obtain the file-size and return the completed dobj
587 rc
= apr_file_info_get(&finfo
, APR_FINFO_SIZE
, dobj
->fd
);
588 if (rc
== APR_SUCCESS
) {
589 dobj
->file_size
= finfo
.size
;
590 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
591 "Basing CRCSYNC/delta-http for requested URL on cached page for URL %s of size %" APR_SIZE_T_FMT
,
592 APR_RMM_ADDR_GET(char, sp_cache
->rmm
, cfi_physical
->uri
), dobj
->file_size
);
595 // Could not obtain file info for a mysterious reason. Skip this file.
596 apr_file_close(dobj
->fd
);
600 // Apparently the cached file is no longer there. Maybe it got cleaned by htcacheclean?
601 if (is_lock_available(sp_cache
)) {
602 // Remove the entry. But only if this process could obtain the semaphore...
603 free_cached_files_info(rmm
, sp_per_ct_physical
, cached_file_info
);
606 return rc
; // Could not open file or obtain file-info for whatever reason.
610 * Critical section of the code to find similar pages. While this code is in progress, no updates to the data
611 * structures may happen by other threads/processes, like by function 'update_or_add_similar_page(...), which is invoked
612 * when a new file has been saved to the disk cache.
614 * Please note that this function itself can update the 'free-pages' list if the code discovers that the data
615 * structure is referencing a file that no longer exists. Apart from that update-block, the code is fully re-entrant.
616 * With other words: multiple requests can enter this code concurrently, as long as they don't update the 'free-pages'
617 * list and as long as it does not happen concurrently with the 'update_or_add_similar_page(...) function
619 * At the moment, the code block that updates the 'free-pages' list checks if a lock could be obtained. If no lock could
620 * be obtained, it does not update the list. It only updates the list if a lock could be obtained.
622 * The locking is currently rather coarse grained: when locks are available, the (global mutex) makes sure that the access
623 * to this function and to the 'update_or_add_similar_page(...) function is exclusive. On the other hand, when the
624 * global mutex could not be initialized and as such is not available, the 'update_or_add_similar_page(...) function
625 * is disabled and only the 'find-similar-page' function works, for data that got loaded during the server startup.
627 * In order to increase the scalability, a more fine-grained locking could be implemented by carefully assessing which
628 * parts of the 'update_or_add_similar_page(...) function conflict with data structures used by this 'find_similar_page'
629 * function and then adding the appropriate locks where required.
631 static apr_status_t
find_similar_page_cs(disk_cache_object_t
*dobj
, request_rec
*r
, similar_page_cache_t
*sp_cache
, const char *host
)
633 apr_rmm_t
*rmm
= sp_cache
->rmm
;
634 RMM_OFF_T(sp_per_regex_t
) sp_per_regex
= rmm_hash_get(rmm
, sp_cache
->similar_pages_per_host
, host
, APR_HASH_KEY_STRING
);
635 while (sp_per_regex
!= RMM_OFF_NULL
)
637 sp_per_regex_t
*sp_per_regex_physical
= APR_RMM_ADDR_GET(sp_per_regex_t
, rmm
, sp_per_regex
);
638 if (fsp_regex_match(r
, APR_RMM_ADDR_GET(char, rmm
, sp_per_regex_physical
->regex
), r
->unparsed_uri
, sp_cache
))
640 // Found the largest matching regex. Find a group of pages with an appropriate content type
641 RMM_OFF_T(sp_per_content_type_t
) sp_per_ct
= sp_per_regex_physical
->similar_pages_per_content_type
;
642 while (sp_per_ct
!= RMM_OFF_NULL
)
644 sp_per_content_type_t
*sp_per_ct_physical
= APR_RMM_ADDR_GET(sp_per_content_type_t
, rmm
, sp_per_ct
);
645 if (fsp_accept_matches_content_type(sp_cache
, r
, sp_per_ct_physical
->content_type
))
647 // Found list of pages with appropriate content type for the matching regex
648 // Now try to open a page associated with this regex and content type
649 RMM_OFF_T(cached_files_info_t
) cached_file_info
= sp_per_ct_physical
->cached_files_info
;
650 while (cached_file_info
!= RMM_OFF_NULL
)
652 cached_files_info_t
*cfi_physical
= APR_RMM_ADDR_GET(cached_files_info_t
, sp_cache
->rmm
, cached_file_info
);
653 RMM_OFF_T(cached_files_info_t
) next_cfi
= cfi_physical
->next
;
654 if (match_vary_headers(sp_cache
, r
, cfi_physical
->vary_headers
)) {
655 if (open_cached_file(dobj
, r
, sp_cache
, sp_per_ct_physical
, cached_file_info
) == APR_SUCCESS
) {
656 return APR_SUCCESS
; // File successfully opened. Done.
659 cached_file_info
= next_cfi
;
660 } // while (cached_file_info != RMM_OFF_NULL)
661 } // if (find_similar_page_accept_matches_content_type(sp_cache, r, sp_per_ct_physical->content_type))
662 sp_per_ct
= sp_per_ct_physical
->next
;
663 } // while (sp_per_ct != RMM_OFF_NULL)
664 } // if (find_similar_page_regex_match(r, APR_RMM_ADDR_GET(char, rmm, sp_per_regex_physical->regex), r->unparsed_uri, sp_cache))
665 sp_per_regex
= sp_per_regex_physical
->next
;
666 } // while (sp_per_regex != RMM_OFF_NULL)
667 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "Could not find a similar page for the requesed URL");
672 * Find a page in the cache for an URL that is similar to the requested URL and that can
673 * fullfill at least one of the expected mime-types indicated in the "Accept" header
674 * This page can then be used by the CRCCache as basis for the CRCSYNC/Delta-http encoding.
676 apr_status_t
find_similar_page(disk_cache_object_t
*dobj
, request_rec
*r
, similar_page_cache_t
*sp_cache
)
678 if (!sp_cache
->similar_pages_cache_initialized
)
680 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "Similar page cache is not initialized");
683 const char *host
= apr_table_get(r
->headers_in
, HOST_HEADER
);
685 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
, "Can't find host header in the request");
689 apr_status_t findrslt
;
690 if (is_lock_available(sp_cache
)) {
691 apr_status_t lockrslt
= apr_global_mutex_lock(sp_cache
->fs_cache_lock
);
692 if (lockrslt
!= APR_SUCCESS
)
694 ap_log_error(APLOG_MARK
, APLOG_WARNING
, lockrslt
, r
->server
, "Can't obtain the lock");
697 findrslt
= find_similar_page_cs(dobj
, r
, sp_cache
, host
);
698 lockrslt
= apr_global_mutex_unlock(sp_cache
->fs_cache_lock
);
699 if (lockrslt
!= APR_SUCCESS
)
701 ap_log_error(APLOG_MARK
, APLOG_WARNING
, lockrslt
, r
->server
, "Can't release the lock");
705 findrslt
= find_similar_page_cs(dobj
, r
, sp_cache
, host
);
711 * Create info about a cached file
712 * Returns RMM_OFF_NULL when a memory allocation error has occured.
714 static RMM_OFF_T(cached_files_info_t
) create_cached_files_info(apr_rmm_t
*rmm
,
715 const char *basepath
, const char *uri
, RMM_OFF_T(vary_headers_t
) vary_headers
)
717 RMM_OFF_T(cached_files_info_t
) cached_files_info
= apr_rmm_calloc(rmm
, sizeof(cached_files_info_t
));
718 if (cached_files_info
== RMM_OFF_NULL
)
720 return cached_files_info
;
722 cached_files_info_t
*cfi_physical
= APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, cached_files_info
);
723 cfi_physical
->basepath
= rmm_strdup(rmm
, basepath
);
724 cfi_physical
->uri
= rmm_strdup(rmm
, uri
);
725 if (cfi_physical
->basepath
== RMM_OFF_NULL
|| cfi_physical
->uri
== RMM_OFF_NULL
)
727 clear_rmm_field(rmm
, &cfi_physical
->basepath
);
728 clear_rmm_field(rmm
, &cfi_physical
->uri
);
729 apr_rmm_free(rmm
, cached_files_info
);
732 cfi_physical
->prev
= RMM_OFF_NULL
;
733 cfi_physical
->next
= RMM_OFF_NULL
;
734 cfi_physical
->vary_headers
= vary_headers
;
736 return cached_files_info
;
740 * Create a 'similar pages per content type' structure for the current basepath, uri and content_type
741 * Returns NULL when a memory allocation error has occured
743 static RMM_OFF_T(sp_per_content_type_t
) create_sp_per_content_type(apr_rmm_t
*rmm
,
744 const char *basepath
, const char *uri
, const char *content_type
, RMM_OFF_T(vary_headers_t
)vary_headers
)
746 RMM_OFF_T(sp_per_content_type_t
) sp_per_ct
= apr_rmm_calloc(rmm
, sizeof(sp_per_content_type_t
));
747 if (sp_per_ct
== RMM_OFF_NULL
)
749 return RMM_OFF_NULL
; // Memory allocation failure!
751 sp_per_content_type_t
*sp_per_ct_physical
= APR_RMM_ADDR_GET(sp_per_content_type_t
, rmm
, sp_per_ct
);
752 sp_per_ct_physical
->next
= RMM_OFF_NULL
;
753 sp_per_ct_physical
->content_type
= rmm_strdup(rmm
, content_type
);
754 if (sp_per_ct_physical
->content_type
== RMM_OFF_NULL
)
756 apr_rmm_free(rmm
, sp_per_ct
);
760 sp_per_ct_physical
->cached_files_info
= create_cached_files_info(rmm
, basepath
, uri
, vary_headers
);
761 if (sp_per_ct_physical
->cached_files_info
== RMM_OFF_NULL
)
763 apr_rmm_free(rmm
, sp_per_ct_physical
->content_type
);
764 apr_rmm_free(rmm
, sp_per_ct
);
767 sp_per_ct_physical
->tail_file_info
= sp_per_ct_physical
->cached_files_info
;
769 sp_per_ct_physical
->cached_files_info_by_path
= rmm_hash_make(rmm
);
770 if (sp_per_ct_physical
->cached_files_info_by_path
== RMM_OFF_NULL
)
772 free_cached_files_info(rmm
, sp_per_ct_physical
, sp_per_ct_physical
->cached_files_info
);
773 apr_rmm_free(rmm
, sp_per_ct_physical
->content_type
);
774 apr_rmm_free(rmm
, sp_per_ct
);
777 // FIXME: rmm_hash_set should be able to return an out-of-memory condition when appropriate so that *this* function can properly handle
778 // the error condition...
779 rmm_hash_set(rmm
, sp_per_ct_physical
->cached_files_info_by_path
,
780 APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, sp_per_ct_physical
->cached_files_info
)->basepath
, APR_HASH_KEY_STRING
,
781 sp_per_ct_physical
->cached_files_info
);
788 * Create a 'similar pages per regex' structure for the current regex, basepath, uri and content_type
789 * Returns NULL when a memory allocation error has occured
791 static RMM_OFF_T(sp_per_regex_t
) create_sp_per_regex(apr_rmm_t
*rmm
,
792 const char *regex
, const char *basepath
, const char *uri
, const char *content_type
, RMM_OFF_T(vary_headers_t
)vary_headers
)
794 RMM_OFF_T(sp_per_regex_t
) sp_per_regex
= apr_rmm_calloc(rmm
, sizeof(sp_per_regex_t
));
795 if (sp_per_regex
== RMM_OFF_NULL
)
797 return RMM_OFF_NULL
; // Memory allocation failure!
799 sp_per_regex_t
*sp_per_regex_physical
= APR_RMM_ADDR_GET(sp_per_regex_t
, rmm
, sp_per_regex
);
800 sp_per_regex_physical
->next
= RMM_OFF_NULL
;
801 sp_per_regex_physical
->regex_len
= strlen(regex
);
802 sp_per_regex_physical
->regex
= rmm_strdup(rmm
, regex
);
803 if (sp_per_regex_physical
->regex
== RMM_OFF_NULL
)
805 apr_rmm_free(rmm
, sp_per_regex
);
808 sp_per_regex_physical
->similar_pages_per_content_type
= create_sp_per_content_type(rmm
, basepath
, uri
, content_type
, vary_headers
);
809 if (sp_per_regex_physical
->similar_pages_per_content_type
== RMM_OFF_NULL
)
811 apr_rmm_free(rmm
, sp_per_regex_physical
->regex
);
812 apr_rmm_free(rmm
, sp_per_regex
);
819 * Add a new cached file to the list of cached files for the current content type or update the entry if it
821 * Returns: 1 on memory allocation error
824 static int add_cached_file_to_content_type(similar_page_cache_t
*sp_cache
, sp_per_content_type_t
*sp_per_ct_physical
,
825 const char *basepath
, const char *uri
, RMM_OFF_T(vary_headers_t
) vary_headers
)
827 apr_rmm_t
*rmm
= sp_cache
->rmm
;
828 RMM_OFF_T(cached_files_info_t
) cached_file_info
;
829 cached_files_info_t
*cfi_physical
;
831 // Make the cached_file_info record
832 cached_file_info
= create_cached_files_info(rmm
, basepath
, uri
, vary_headers
);
833 if (cached_file_info
== RMM_OFF_NULL
) {
834 return 1; // Could not allocate memory. Can't store the info.
836 cfi_physical
= APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, cached_file_info
);
838 // Insert the new entry at the head of the list
839 cfi_physical
->next
= sp_per_ct_physical
->cached_files_info
;
840 if (cfi_physical
->next
!= RMM_OFF_NULL
) {
841 // There was already something in the list. Make the old head entry point back to
842 // this new head entry
843 APR_RMM_ADDR_GET(cached_files_info_t
, rmm
, cfi_physical
->next
)->prev
= cached_file_info
;
846 // The list was empty. This new entry is now by definition a tail entry
847 sp_per_ct_physical
->tail_file_info
= cached_file_info
;
849 sp_per_ct_physical
->cached_files_info
= cached_file_info
;
851 // Remove old version of the page (if it exists) from the list
852 RMM_OFF_T(cached_files_info_t
) old_cached_file
= rmm_hash_get(rmm
,
853 sp_per_ct_physical
->cached_files_info_by_path
,
854 basepath
, APR_HASH_KEY_STRING
);
855 if (old_cached_file
!= RMM_OFF_NULL
) {
856 free_cached_files_info(rmm
, sp_per_ct_physical
, old_cached_file
);
859 // Add the new version to the reverse index
860 // FIXME: deal with failure of rmm_hash_set (once rmm_hash_set has been fixed to return an out-of-memory condition
862 rmm_hash_set(rmm
, sp_per_ct_physical
->cached_files_info_by_path
, cfi_physical
->basepath
, APR_HASH_KEY_STRING
, cached_file_info
);
864 if (rmm_hash_count(rmm
, sp_per_ct_physical
->cached_files_info_by_path
) > 40 /* TODO: make this threshold configurable */)
866 // Only maintain info about the (40) most recently cached pages per host per regex per content-type
867 // The chance that all of them point to meanwhile deleted/obsolete files is very small, considering
868 // the fact that each freshly cached file gets inserted at the head of the list, so it does not make
869 // much sense to fill-up the memory with a longer list.
870 free_cached_files_info(rmm
, sp_per_ct_physical
, sp_per_ct_physical
->tail_file_info
);
873 return 0; // Cached file info successfully added
877 * Add a new cached file to the list of cached files for the current regular expression or update the page if it
879 * Returns: 1 on memory allocation error
882 static int add_cached_file_to_regex(similar_page_cache_t
*sp_cache
, sp_per_regex_t
*sp_per_regex_physical
,
883 const char *basepath
, const char *uri
, const char *content_type
, RMM_OFF_T(vary_headers_t
)vary_headers
)
885 RMM_OFF_T(sp_per_content_type_t
) sp_per_ct
;
886 apr_rmm_t
*rmm
= sp_cache
->rmm
;
887 sp_per_ct
= sp_per_regex_physical
->similar_pages_per_content_type
;
888 while (sp_per_ct
!= RMM_OFF_NULL
) {
889 sp_per_content_type_t
*sp_per_ct_physical
= APR_RMM_ADDR_GET(sp_per_content_type_t
, rmm
, sp_per_ct
);
890 if (!strcmp(content_type
, APR_RMM_ADDR_GET(char, rmm
, sp_per_ct_physical
->content_type
))) {
891 // Found the correct entry. Add or update the page here
892 return add_cached_file_to_content_type(sp_cache
, sp_per_ct_physical
, basepath
, uri
, vary_headers
);
894 sp_per_ct
= sp_per_ct_physical
->next
;
896 // There is nothing yet for this content type. Add it to the list
897 sp_per_ct
= create_sp_per_content_type(rmm
, basepath
, uri
, content_type
, vary_headers
);
898 if (sp_per_ct
== RMM_OFF_NULL
) {
901 // Add it to the head of the list
902 APR_RMM_ADDR_GET(sp_per_content_type_t
, rmm
, sp_per_ct
)->next
= sp_per_regex_physical
->similar_pages_per_content_type
;
903 sp_per_regex_physical
->similar_pages_per_content_type
= sp_per_ct
;
909 * Add a new page to the list of similar pages for current host or update an existing page
910 * Returns: 1 on memory allocation error
913 static int add_similar_pages_info(similar_page_cache_t
*sp_cache
, RMM_OFF_T(sp_per_regex_t
) *sp_per_regex_p
,
914 const char *regex
, const char *basepath
, const char *uri
, const char *content_type
, RMM_OFF_T(vary_headers_t
)vary_headers
)
916 apr_rmm_t
*rmm
= sp_cache
->rmm
;
917 size_t regex_len
= strlen(regex
);
920 RMM_OFF_T(sp_per_regex_t
) curr_sp_per_regex
= *sp_per_regex_p
;
921 sp_per_regex_t
*sp_per_regex_physical
= APR_RMM_ADDR_GET(sp_per_regex_t
, rmm
, curr_sp_per_regex
);
922 if (regex_len
== sp_per_regex_physical
->regex_len
&& strcmp(regex
, APR_RMM_ADDR_GET(char, rmm
, sp_per_regex_physical
->regex
))==0)
924 // Found a perfect match. Add or update the page to the head of the current pages list
925 return add_cached_file_to_regex(sp_cache
, sp_per_regex_physical
, basepath
, uri
, content_type
, vary_headers
);
929 if (regex_len
> sp_per_regex_physical
->regex_len
)
931 // No matching regex found that is longer then the current regex.
932 // Insert the new entry here in the list, so that the list remains sorted in descending order on regex_len
933 RMM_OFF_T(sp_per_regex_t
) new_sp_per_regex
= create_sp_per_regex(rmm
, regex
, basepath
, uri
, content_type
, vary_headers
);
934 if (new_sp_per_regex
== RMM_OFF_NULL
)
936 return 1; // Out of memory condition occurred
938 APR_RMM_ADDR_GET(sp_per_regex_t
, rmm
, new_sp_per_regex
)->next
= curr_sp_per_regex
;
939 *sp_per_regex_p
= new_sp_per_regex
;
940 return 0; // New page succesfully inserted
944 if (sp_per_regex_physical
->next
== RMM_OFF_NULL
)
946 // Reached tail of the list. The new regex is shorter then any of the existing ones
947 // Insert new entry to the end of the list
948 RMM_OFF_T(sp_per_regex_t
) new_sp_per_regex
= create_sp_per_regex(rmm
, regex
, basepath
, uri
, content_type
, vary_headers
);
949 if (new_sp_per_regex
== RMM_OFF_NULL
)
951 return 1; // Out of memory condition occurred
953 sp_per_regex_physical
->next
= new_sp_per_regex
;
954 return 0; // New page succesfully inserted
956 // Evaluate the next entry
957 sp_per_regex_p
= &sp_per_regex_physical
->next
;
965 * Add (or update) a cached page to the 'similar pages' cache
966 * Returns: 1 on memory allocation error
968 * The invoking function may want to log a warning in case of memory
969 * allocation error so that the system administrator can tune the cache
970 * parameters if this happens too often
972 static int add_cached_page(similar_page_cache_t
*sp_cache
, const char *regex
, const char *host
,
973 const char *basepath
, const char *uri
, const char *content_type
, RMM_OFF_T(vary_headers_t
)vary_headers
)
975 apr_rmm_t
*rmm
= sp_cache
->rmm
;
976 RMM_OFF_T(sp_per_regex_t
) sp_per_regex
= rmm_hash_get(rmm
, sp_cache
->similar_pages_per_host
, host
, APR_HASH_KEY_STRING
);
977 if (sp_per_regex
== RMM_OFF_NULL
)
979 // There is no info yet for the current host. Make the first entry.
980 RMM_OFF_T(char) host_offset
= rmm_strdup(rmm
, host
);
981 if (host_offset
== RMM_OFF_NULL
) {
982 return 1; // Could not allocate memory
984 sp_per_regex
= create_sp_per_regex(rmm
, regex
, basepath
, uri
, content_type
, vary_headers
);
985 if (sp_per_regex
== RMM_OFF_NULL
) {
986 apr_rmm_free(rmm
, host_offset
);
987 return 1; // Could not allocate memory!
989 rmm_hash_set(rmm
, sp_cache
->similar_pages_per_host
, host_offset
, APR_HASH_KEY_STRING
, sp_per_regex
);
990 return 0; // All fine
994 // The current entry already contains similar pages info. Add new or updated page to the list
995 int rslt
= add_similar_pages_info(sp_cache
, &sp_per_regex
, regex
, basepath
, uri
, content_type
, vary_headers
);
1001 * Allocate and initialze an empty similar page cache
1003 static apr_status_t
similar_page_cache_init(apr_pool_t
*pool
, server_rec
*s
, similar_page_cache_t
*sp_cache
)
1005 #if APR_HAS_SHARED_MEMORY
1006 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
, "APR supports shared memory");
1007 apr_status_t result
;
1008 apr_size_t requested_size
;
1009 apr_size_t retrieved_size
;
1011 if (sp_cache
->cache_file
) {
1012 /* Remove any existing shm segment with this name. */
1013 apr_shm_remove(sp_cache
->cache_file
, pool
);
1016 requested_size
= APR_ALIGN_DEFAULT(sp_cache
->cache_bytes
);
1017 result
= apr_shm_create(&sp_cache
->shm
, requested_size
, sp_cache
->cache_file
, pool
);
1018 if (result
!= APR_SUCCESS
) {
1019 ap_log_error(APLOG_MARK
, APLOG_ERR
, result
, s
,
1020 "Unable to obtain %" APR_SIZE_T_FMT
" bytes shared memory", requested_size
);
1024 /* Determine the usable size of the shm segment. */
1025 retrieved_size
= apr_shm_size_get(sp_cache
->shm
);
1026 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
,
1027 "Requested %" APR_SIZE_T_FMT
" bytes shared memory, retrieved %" APR_SIZE_T_FMT
" bytes",
1028 requested_size
, retrieved_size
);
1030 /* This will create a rmm "handler" to get into the shared memory area */
1031 result
= apr_rmm_init(&sp_cache
->rmm
, NULL
,
1032 apr_shm_baseaddr_get(sp_cache
->shm
), retrieved_size
,
1034 if (result
!= APR_SUCCESS
) {
1035 ap_log_error(APLOG_MARK
, APLOG_ERR
, result
, s
, "Unable to initialize rmm handler for (shared) memory");
1039 void *local_memory
= apr_palloc(pool
, sp_cache
->cache_bytes
);
1040 if (local_memory
== NULL
)
1042 ap_log_error(APLOG_MARK
, APLOG_ERR
, result
, s
,
1043 "Unable to obtain %" APR_SIZE_T_FMT
" bytes of memory", requested_size
);
1046 /* This will create a rmm "handler" to get into the memory area */
1047 result
= apr_rmm_init(&sp_cache
->rmm
, NULL
,
1048 local_memory
, sp_cache
->cache_bytes
,
1050 if (result
!= APR_SUCCESS
) {
1051 ap_log_error(APLOG_MARK
, APLOG_ERR
, result
, s
, "Unable to initialize rmm handler for (shared) memory");
1057 apr_pool_cleanup_register(pool
, sp_cache
, similar_page_cache_kill
, apr_pool_cleanup_null
);
1059 sp_cache
->similar_pages_per_host
= rmm_hash_make(sp_cache
->rmm
);
1060 if (sp_cache
->similar_pages_per_host
== RMM_OFF_NULL
) {
1061 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, s
, "Unable to allocate memory for similar pages info cache");
1062 return APR_EGENERAL
;
1065 sp_cache
->lock_is_available
= apr_rmm_calloc(sp_cache
->rmm
, sizeof(int));
1066 if (sp_cache
->lock_is_available
== RMM_OFF_NULL
) {
1067 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, s
, "Unable to allocate memory for similar pages info cache");
1068 return APR_EGENERAL
;
1071 sp_cache
->vary_headers_cache
= rmm_hash_make(sp_cache
->rmm
);
1072 if (sp_cache
->vary_headers_cache
== RMM_OFF_NULL
) {
1073 ap_log_error(APLOG_MARK
, APLOG_ERR
, APR_EGENERAL
, s
, "Unable to allocate memory for similar pages info cache");
1074 return APR_EGENERAL
;
1080 static apr_status_t
make_vary_headers(apr_pool_t
*p
, server_rec
*s
, similar_page_cache_t
*sp_cache
,
1081 apr_table_t
*req_hdrs
, apr_table_t
*resp_hdrs
, RMM_OFF_T(vary_headers_t
) *vary_headers_p
)
1083 *vary_headers_p
= RMM_OFF_NULL
;
1084 apr_rmm_t
*rmm
= sp_cache
->rmm
;
1085 const char *vary
= apr_table_get(resp_hdrs
, VARY_HEADER
);
1089 char *vary_cache_key
= "";
1091 while ((headername
= ap_get_token(p
, &vary
, 1)) != NULL
&& strlen(headername
) != 0)
1093 // Ignore 'Accept-Encoding' vary header; we transform anything anyway to identity coding before storing it in the cache
1094 // so it does not matter what the server has done with respect to the content-encoding.
1095 if (strcmp(headername
, ACCEPT_ENCODING_HEADER
) != 0) {
1096 vary_cache_key
= apr_pstrcat(p
, vary_cache_key
, separator
, headername
, "=", apr_table_get(req_hdrs
, headername
), NULL
);
1100 if (*vary_cache_key
== 0) {
1101 // Apparently the content only varies based on the 'Accept-Encoding', which we ignore.
1104 RMM_OFF_T(vary_headers_t
) vary_headers
= rmm_hash_get(sp_cache
->rmm
, sp_cache
->vary_headers_cache
, vary_cache_key
, APR_HASH_KEY_STRING
);
1105 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
, "Vary cache key: %s, found in cache?: %s",
1106 vary_cache_key
, (vary_headers
== RMM_OFF_NULL
) ? "No" : "Yes");
1107 if (vary_headers
== RMM_OFF_NULL
) {
1108 // This vary headers combination is not yet cached. Make the structure and cache it
1109 vary
= apr_table_get(resp_hdrs
, VARY_HEADER
); // Get again the vary header
1110 while ((headername
= ap_get_token(p
, &vary
, 1)) != NULL
&& strlen(headername
) != 0)
1112 // Ignore 'Accept-Encoding' vary header; we transform anything anyway to identity coding before storing it in the cache
1113 // so it does not matter what the server has done with respect to the content-encoding.
1114 if (strcmp(headername
, ACCEPT_ENCODING_HEADER
) != 0) {
1115 // Allocate the new entry
1116 RMM_OFF_T(vary_headers_t
) new_vary_header
= apr_rmm_malloc(rmm
, sizeof(vary_headers_t
));
1117 if (new_vary_header
== RMM_OFF_NULL
) {
1118 return 1; // Could not allocate memory
1120 vary_headers_t
*new_vh_physical
= APR_RMM_ADDR_GET(vary_headers_t
, rmm
, new_vary_header
);
1122 // Put the new vary header at the head of the list of entries
1123 new_vh_physical
->next
= vary_headers
;
1124 vary_headers
= new_vary_header
;
1126 if ((new_vh_physical
->name
= rmm_strdup(rmm
, headername
)) == RMM_OFF_NULL
) {
1130 new_vh_physical
->value
= RMM_OFF_NULL
;
1131 const char *value
= apr_table_get(req_hdrs
, headername
);
1134 if ((new_vh_physical
->value
= rmm_strdup(rmm
, value
)) == RMM_OFF_NULL
) {
1140 rmm_hash_set(sp_cache
->rmm
, sp_cache
->vary_headers_cache
, rmm_strdup(rmm
, vary_cache_key
), APR_HASH_KEY_STRING
, vary_headers
);
1143 *vary_headers_p
= vary_headers
;
1149 * Load the info from the file-cache into the 'find similar page' cache
1151 static apr_status_t
similar_page_cache_load(apr_pool_t
*ptemp
, server_rec
*s
, const char *abs_dirname
, const char *rel_dirname
, similar_page_cache_t
*sp_cache
)
1153 apr_status_t result
;
1154 apr_dir_t
*dirinfo
; // structure for referencing directories
1155 apr_finfo_t fileinfo
; // file information structure
1157 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Opening directory %s", abs_dirname);
1158 result
= apr_dir_open(&dirinfo
, abs_dirname
, ptemp
);
1159 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Result: %d", result);
1160 if (result
!= APR_SUCCESS
)
1162 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
, "Unable to open directory %s", abs_dirname
);
1165 while (apr_dir_read(&fileinfo
, 0, dirinfo
) == APR_SUCCESS
)
1167 if (!strcmp(fileinfo
.name
, ".") || !strcmp(fileinfo
.name
, ".."))
1169 // Do not recursively go into current or parent directory!
1172 if (fileinfo
.filetype
== APR_DIR
)
1174 const char *sub_abs_dirname
= apr_pstrcat(ptemp
, abs_dirname
, "/", fileinfo
.name
, NULL
);
1175 const char *sub_rel_dirname
= (*rel_dirname
== 0) ? apr_pstrdup(ptemp
, fileinfo
.name
) :
1176 apr_pstrcat(ptemp
, rel_dirname
, "/", fileinfo
.name
, NULL
);
1177 if (similar_page_cache_load(ptemp
, s
, sub_abs_dirname
, sub_rel_dirname
, sp_cache
) != APR_SUCCESS
)
1179 continue; // skip this sub directory and process the next one
1182 else if (fileinfo
.filetype
== APR_REG
)
1184 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "About to open file: %s", fileinfo.name);
1185 if (strstr(fileinfo
.name
, CACHE_HEADER_SUFFIX
) != NULL
)
1187 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Its a header file");
1188 // Build the key (basepath) for the cache.
1189 // It consists of the relative path name exluding the .header extension
1190 char *basepath
= apr_pstrdup(ptemp
, fileinfo
.name
);
1191 *strstr(basepath
, CACHE_HEADER_SUFFIX
)=0;
1192 basepath
= apr_pstrcat(ptemp
, rel_dirname
, "/", basepath
, NULL
);
1193 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Basepath: %s", basepath);
1195 char *full_filepath
= apr_pstrcat(ptemp
, abs_dirname
, "/", fileinfo
.name
, NULL
);
1196 // ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, s, "Full_filepath: %s", full_filepath);
1199 result
= apr_file_open(&fd
, full_filepath
, APR_READ
|APR_BINARY
|APR_BUFFERED
, 0, ptemp
);
1200 if (result
!= APR_SUCCESS
)
1202 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
, "Failed to open file %s", full_filepath
);
1203 continue; // Skip this file
1206 apr_uint32_t format
;
1209 /* Read and evaluate the format from the cache file */
1210 len
= sizeof(format
);
1211 apr_file_read_full(fd
, &format
, len
, &len
);
1212 if (format
== VARY_FORMAT_VERSION
) {
1213 // TODO: Smartly handle "vary" header files. But skip them for the time being.
1214 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
, "Skipping vary header file %s", full_filepath
);
1216 continue; // Skip this file
1218 if (format
!= DISK_FORMAT_VERSION
) {
1219 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, s
,
1220 "File %s has a version mismatch. File had version %d, but expected version is %d",
1221 full_filepath
, format
, DISK_FORMAT_VERSION
);
1223 continue; // Skip this file
1225 // Format OK, rewind to file begin
1226 apr_off_t offset
= 0;
1227 apr_file_seek(fd
, APR_SET
, &offset
);
1229 // Read metadata from file
1230 cache_object_t
*obj
= apr_pcalloc(ptemp
, sizeof(cache_object_t
));;
1231 disk_cache_object_t
*dobj
= apr_pcalloc(ptemp
, sizeof(disk_cache_object_t
));;
1232 cache_info_t
*cache_info
= &(obj
->info
);
1233 result
= file_cache_recall_mydata(ptemp
, fd
, cache_info
, dobj
, 0);
1234 if (result
!= APR_SUCCESS
)
1236 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
,
1237 "Problem encountered reading meta data from %s", full_filepath
);
1239 continue; // Skip this file
1242 // Read request and response headers
1243 apr_table_t
*req_hdrs
= apr_table_make(ptemp
, 20);
1244 apr_table_t
*resp_hdrs
= apr_table_make(ptemp
, 20);
1245 result
= read_table(s
, resp_hdrs
, fd
);
1246 if (result
!= APR_SUCCESS
)
1248 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
, "Failed to read response headers from file %s", full_filepath
);
1250 continue; // Skip this file
1252 result
= read_table(s
, req_hdrs
, fd
);
1254 if (result
!= APR_SUCCESS
)
1256 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
, "Failed to read request headers from file %s", full_filepath
);
1257 continue; // Skip this file
1260 // Add file to 'similar pages' cache if host, crcsync_similar and content_type headers are present
1261 const char *hostname
= apr_table_get(req_hdrs
, HOST_HEADER
);
1262 const char *crcsync_similar
= apr_table_get(resp_hdrs
, CRCSYNC_SIMILAR_HEADER
);
1263 const char *content_type
= apr_table_get(resp_hdrs
, CONTENT_TYPE_HEADER
);
1264 if (hostname
!= NULL
&& crcsync_similar
!= NULL
&& content_type
!= NULL
)
1266 RMM_OFF_T(vary_headers_t
) vary_headers
;
1267 result
= make_vary_headers(ptemp
, s
, sp_cache
, req_hdrs
, resp_hdrs
, &vary_headers
);
1269 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, s
,
1270 "Could not allocate memory to cache vary headers");
1271 continue; // Skip this file
1273 result
= add_cached_page(sp_cache
, crcsync_similar
, hostname
, basepath
, cache_info
->uri
, content_type
, vary_headers
);
1276 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
,
1277 "Successfully added file %s to 'find similar page' cache (host: %s, content-type: %s, regex: %s, uri: %s)",
1278 basepath
, hostname
, content_type
, crcsync_similar
, cache_info
->uri
);
1282 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, s
,
1283 "Failed to add file %s with regex %s for host %s, content-type %s, uri %s to 'find similar page' cache, result: %d",
1284 basepath
, crcsync_similar
, hostname
, content_type
, cache_info
->uri
, result
);
1291 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, s
, "Unknown file type %d for file %s/%s",
1292 fileinfo
.filetype
, abs_dirname
, fileinfo
.name
);
1296 apr_dir_close(dirinfo
);
1300 const char *crccache_client_fsp_set_cache_bytes(cmd_parms
*parms
, void *in_struct_ptr
,
1301 const char *arg
, similar_page_cache_t
*sp_cache
)
1303 apr_size_t val
= atol(arg
);
1305 return "CRCClientSharedCacheSize value must be an integer greater than or equal to 0";
1306 sp_cache
->cache_bytes
= val
;
1311 similar_page_cache_t
*create_similar_page_cache(apr_pool_t
*p
)
1313 similar_page_cache_t
*sp_cache
= apr_pcalloc(p
, sizeof(similar_page_cache_t
));
1314 if (sp_cache
!= NULL
) {
1315 sp_cache
->cache_bytes
= 10*1024*1024; // Default to 10 MB
1320 static void create_global_mutex(similar_page_cache_t
*sp_cache
, apr_pool_t
*p
, apr_pool_t
*ptemp
, server_rec
*s
)
1322 apr_status_t result
;
1323 result
= apr_global_mutex_create(&sp_cache
->fs_cache_lock
,
1324 sp_cache
->lock_file
, APR_LOCK_DEFAULT
,
1326 if (result
!= APR_SUCCESS
) {
1327 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
,
1328 "Failed to allocate mutex on vhost %s. Similar page cache will only be loaded on start-up but not maintained for new pages cached while the server is running",
1329 format_hostinfo(ptemp
, s
));
1330 sp_cache
->fs_cache_lock
= NULL
;
1334 #ifdef AP_NEED_SET_MUTEX_PERMS
1335 result
= unixd_set_global_mutex_perms(sp_cache
->fs_cache_lock
);
1336 if (result
!= APR_SUCCESS
) {
1337 ap_log_error(APLOG_MARK
, APLOG_WARNING
, result
, s
,
1338 "Failed to set mutex permissions on vhost %s. Similar page cache will only be loaded on start-up but not maintained for new pages cached while the server is running",
1339 format_hostinfo(ptemp
, s
));
1340 apr_global_mutex_destroy(sp_cache
->fs_cache_lock
);
1341 sp_cache
->fs_cache_lock
= NULL
;
1346 // Lock is available for all threads/subprocesses
1347 *APR_RMM_ADDR_GET(int, sp_cache
->rmm
, sp_cache
->lock_is_available
)=1;
1350 int crccache_client_fsp_post_config_per_virtual_host(apr_pool_t
*p
, apr_pool_t
*plog
,
1351 apr_pool_t
*ptemp
, server_rec
*s
, similar_page_cache_t
*sp_cache
, const char *cache_root
)
1353 apr_status_t result
;
1356 * Set-up the shared memory block and the mutex for the 'find similar page' memory cache
1359 // Need to know the CacheRootClient value in order to make the SHM
1360 // cache backing file and the mutex lock backing file
1362 const char *cache_file_tmp
= apr_pstrcat(ptemp
, cache_root
, "/crccache_client_shm", NULL
);
1363 const char *lock_file_tmp
= apr_pstrcat(ptemp
, cache_file_tmp
, ".lck", NULL
);
1365 const char *userdata_key
= apr_pstrcat(p
, "crccache_client_init:", cache_root
, NULL
);
1367 /* util_crccache_client_post_config() will be called twice. Don't bother
1368 * going through all of the initialization on the first call
1369 * because it will just be thrown away.*/
1370 apr_pool_userdata_get(&data
, userdata_key
, s
->process
->pool
);
1371 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
,"vhost %s, data=%s",
1372 format_hostinfo(ptemp
, s
),
1373 data
== NULL
? "null" : "not null");
1375 // This code-block is only executed on first invocation of post_config
1376 apr_pool_userdata_set((const void *)1, userdata_key
,
1377 apr_pool_cleanup_null
, s
->process
->pool
);
1378 #if APR_HAS_SHARED_MEMORY
1379 /* If the lock file already exists then delete it. Otherwise we are
1380 * going to run into problems creating the shared memory mutex. */
1381 if (lock_file_tmp
) {
1382 apr_file_remove(lock_file_tmp
, ptemp
);
1389 // Below code-block is only executed on second invocation of post_config
1390 sp_cache
->cache_root
= cache_root
;
1391 sp_cache
->cache_root_len
= strlen(cache_root
);
1392 sp_cache
->cache_file
= apr_pstrdup(p
, cache_file_tmp
);
1393 sp_cache
->lock_file
= apr_pstrdup(p
, lock_file_tmp
);
1395 #if APR_HAS_SHARED_MEMORY
1396 /* initializing cache if we don't have shm address
1398 if (!sp_cache
->shm
) {
1400 /* initializing cache if shared memory size or entries is not zero
1402 if (sp_cache
->cache_bytes
> 0) {
1403 result
= similar_page_cache_init(p
, s
, sp_cache
);
1404 if (result
!= APR_SUCCESS
) {
1405 ap_log_error(APLOG_MARK
, APLOG_ERR
, result
, s
,
1406 "Could not initialize in-memory cache to efficiently find similar pages on vhost %s. Find similar page functionality is disabled",
1407 format_hostinfo(ptemp
, s
));
1411 create_global_mutex(sp_cache
, p
, ptemp
, s
);
1413 result
= similar_page_cache_load(ptemp
, s
, sp_cache
->cache_root
, "", sp_cache
);
1414 if (result
!= APR_SUCCESS
) {
1415 ap_log_error(APLOG_MARK
, APLOG_ERR
, result
, s
,
1416 "Failed to load data into in-memory cache to efficiently find similar pages on vhost %s. Find similar page functionality is disabled",
1417 format_hostinfo(ptemp
, s
));
1421 sp_cache
->similar_pages_regexs
= apr_hash_make(p
); // Set-up cache for compiled regular expressions for similar page lookup
1422 sp_cache
->similar_pages_cache_initialized
= 1; // Similar page cache has finally been successfully set-up and is ready to be used
1424 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
,
1425 "Successfully initialized shared memory cache for this context (%s)",
1426 format_hostinfo(ptemp
, s
));
1429 ap_log_error(APLOG_MARK
, APLOG_INFO
, APR_SUCCESS
, s
,
1430 "CRCCacheClientSharedCacheSize is zero on vhost %s. Find similar page functionality is disabled",
1431 format_hostinfo(ptemp
, s
));
1433 #if APR_HAS_SHARED_MEMORY
1437 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, 0, s
,
1438 "vhost (%s): Weird. Shared memory cache is already initialized for this context",
1439 format_hostinfo(ptemp
, s
));
1445 void crccache_client_fsp_child_init_per_virtual_host(apr_pool_t
*p
, server_rec
*s
, similar_page_cache_t
*sp_cache
)
1449 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, s
,
1450 "mod_crccache_client.child_init_per_vhost (%s): cache_lock: %s",
1451 format_hostinfo(p
, s
),
1452 sp_cache
->fs_cache_lock
? "defined" : "empty");
1454 if (sp_cache
->fs_cache_lock
)
1456 sts
= apr_global_mutex_child_init(&sp_cache
->fs_cache_lock
,
1457 sp_cache
->lock_file
, p
);
1458 if (sts
!= APR_SUCCESS
) {
1459 ap_log_error(APLOG_MARK
, APLOG_WARNING
, sts
, s
,
1460 "Failed to initialise global mutex %s in child process %" APR_PID_T_FMT
". The similar page cache will not be maintained for newly cached pages",
1461 sp_cache
->lock_file
, getpid());
1462 sp_cache
->fs_cache_lock
= NULL
; // Disable the global mutex in this child process
1463 *APR_RMM_ADDR_GET(int, sp_cache
->rmm
, sp_cache
->lock_is_available
) = 0; // Disable global mutex in all child processes
1467 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, sts
, s
,
1468 "Successfully initialized global mutex %s in child process %" APR_PID_T_FMT
".",
1469 sp_cache
->lock_file
, getpid());
1474 void update_or_add_similar_page(disk_cache_object_t
*dobj
, request_rec
*r
, similar_page_cache_t
*sp_cache
)
1476 if (!is_lock_available(sp_cache
)) {
1477 return; // Lock is not available. Can't start doing updates
1480 if (strlen(dobj
->hdrsfile
)+1 < sp_cache
->cache_root_len
||
1481 memcmp(dobj
->hdrsfile
, sp_cache
->cache_root
, sp_cache
->cache_root_len
) ||
1482 dobj
->hdrsfile
[sp_cache
->cache_root_len
] != '/') {
1483 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_EGENERAL
, r
->server
,
1484 "FIXME: Header file name %s does not start with cache root path %s while it should",
1485 dobj
->hdrsfile
, sp_cache
->cache_root
);
1488 char *basepath
= apr_pstrdup(r
->pool
, dobj
->hdrsfile
+sp_cache
->cache_root_len
+1);
1489 apr_size_t suffix_len
=strlen(CACHE_HEADER_SUFFIX
);
1490 apr_size_t basepath_len
= strlen(basepath
);
1491 if (basepath_len
< suffix_len
|| memcmp(basepath
+(basepath_len
-suffix_len
), CACHE_HEADER_SUFFIX
, suffix_len
)) {
1492 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_EGENERAL
, r
->server
,
1493 "FIXME: Header file name %s does not end on %s suffix",
1494 dobj
->hdrsfile
, CACHE_HEADER_SUFFIX
);
1498 *(basepath
+(basepath_len
-suffix_len
)) = 0; // Terminate the suffix location
1500 const char *hostname
= apr_table_get(r
->headers_in
, HOST_HEADER
);
1501 const char *crcsync_similar
= apr_table_get(r
->headers_out
, CRCSYNC_SIMILAR_HEADER
);
1502 const char *content_type
= apr_table_get(r
->headers_out
, CONTENT_TYPE_HEADER
);
1503 if (hostname
!= NULL
&& crcsync_similar
!= NULL
&& content_type
!= NULL
)
1505 apr_status_t lockrslt
= apr_global_mutex_lock(sp_cache
->fs_cache_lock
);
1506 if (lockrslt
!= APR_SUCCESS
)
1508 ap_log_error(APLOG_MARK
, APLOG_WARNING
, lockrslt
, r
->server
, "Can't obtain the lock");
1511 RMM_OFF_T(vary_headers_t
) vary_headers
;
1512 int addrslt
= make_vary_headers(r
->pool
, r
->server
, sp_cache
, r
->headers_in
, r
->headers_out
, &vary_headers
);
1515 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
,
1516 "Could not allocate memory to cache vary headers");
1520 addrslt
= add_cached_page(sp_cache
, crcsync_similar
, hostname
, basepath
, dobj
->name
, content_type
, vary_headers
);
1523 ap_log_error(APLOG_MARK
, APLOG_DEBUG
, APR_SUCCESS
, r
->server
,
1524 "Successfully added file %s to 'find similar page' cache (host: %s, content-type: %s, regex: %s, uri: %s)",
1525 basepath
, hostname
, content_type
, crcsync_similar
, dobj
->name
);
1529 ap_log_error(APLOG_MARK
, APLOG_WARNING
, APR_SUCCESS
, r
->server
,
1530 "Failed to add file %s with regex %s for host %s, content-type %s, uri %s to 'find similar page' cache, result: %d",
1531 basepath
, crcsync_similar
, hostname
, content_type
, dobj
->name
, addrslt
);
1534 lockrslt
= apr_global_mutex_unlock(sp_cache
->fs_cache_lock
);
1535 if (lockrslt
!= APR_SUCCESS
)
1537 ap_log_error(APLOG_MARK
, APLOG_WARNING
, lockrslt
, r
->server
, "Can't release the lock");