2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 typedef struct _Domain
{
32 } DomainRec
, *DomainPtr
;
34 AtomPtr forbiddenFile
= NULL
;
35 AtomPtr forbiddenUrl
= NULL
;
36 int forbiddenRedirectCode
= 302;
38 AtomPtr redirector
= NULL
;
39 int redirectorRedirectCode
= 302;
41 DomainPtr
*forbiddenDomains
= NULL
;
42 regex_t
*forbiddenRegex
= NULL
;
44 AtomPtr uncachableFile
= NULL
;
45 DomainPtr
*uncachableDomains
= NULL
;
46 regex_t
*uncachableRegex
= NULL
;
48 /* these three are only used internally by {parse,read}DomainFile */
49 /* to avoid having to pass it all as parameters */
50 static DomainPtr
*domains
;
51 static char *regexbuf
;
52 static int rlen
, rsize
, dlen
, dsize
;
55 static pid_t redirector_pid
= 0;
56 static int redirector_read_fd
= -1, redirector_write_fd
= -1;
57 #define REDIRECTOR_BUFFER_SIZE 1024
58 static char *redirector_buffer
= NULL
;
59 RedirectRequestPtr redirector_request_first
= NULL
,
60 redirector_request_last
= NULL
;
63 static int atomSetterForbidden(ConfigVariablePtr
, void*);
66 preinitForbidden(void)
68 CONFIG_VARIABLE_SETTABLE(forbiddenUrl
, CONFIG_ATOM
, configAtomSetter
,
69 "URL to which forbidden requests "
70 "should be redirected.");
71 CONFIG_VARIABLE_SETTABLE(forbiddenRedirectCode
, CONFIG_INT
,
73 "Redirect code, 301 or 302.");
74 CONFIG_VARIABLE_SETTABLE(forbiddenFile
, CONFIG_ATOM
, atomSetterForbidden
,
75 "File specifying forbidden URLs.");
77 CONFIG_VARIABLE_SETTABLE(redirector
, CONFIG_ATOM
, atomSetterForbidden
,
78 "Squid-style redirector.");
79 CONFIG_VARIABLE_SETTABLE(redirectorRedirectCode
, CONFIG_INT
,
81 "Redirect code to use with redirector.");
83 CONFIG_VARIABLE_SETTABLE(uncachableFile
, CONFIG_ATOM
, atomSetterForbidden
,
84 "File specifying uncachable URLs.");
88 atomSetterForbidden(ConfigVariablePtr var
, void *value
)
91 return configAtomSetter(var
, value
);
95 readDomainFile(char *filename
)
100 int i
, j
, is_regex
, start
;
102 in
= fopen(filename
, "r");
105 do_log_error(L_ERROR
, errno
, "Couldn't open file %s", filename
);
110 rs
= fgets(buf
, 512, in
);
113 for(i
= 0; i
< 512; i
++) {
114 if(buf
[i
] != ' ' && buf
[i
] != '\t')
118 for(i
= start
; i
< 512; i
++) {
119 if(buf
[i
] == '#' || buf
[i
] == '\r' || buf
[i
] == '\n')
123 if(buf
[i
- 1] != ' ' && buf
[i
- 1] != '\t')
131 /* The significant part of the line is now between start and i */
134 for(j
= start
; j
< i
; j
++) {
135 if(buf
[j
] == '\\' || buf
[j
] == '*' || buf
[j
] == '/') {
142 while(rlen
+ i
- start
+ 8 >= rsize
) {
144 new_regexbuf
= realloc(regexbuf
, rsize
* 2 + 1);
145 if(new_regexbuf
== NULL
) {
146 do_log(L_ERROR
, "Couldn't reallocate regex.\n");
150 regexbuf
= new_regexbuf
;
151 rsize
= rsize
* 2 + 1;
154 rlen
= snnprintf(regexbuf
, rlen
, rsize
, "|");
155 rlen
= snnprintf(regexbuf
, rlen
, rsize
, "(");
156 rlen
= snnprint_n(regexbuf
, rlen
, rsize
, buf
+ start
, i
- start
);
157 rlen
= snnprintf(regexbuf
, rlen
, rsize
, ")");
159 DomainPtr new_domain
;
160 if(dlen
>= dsize
- 1) {
161 DomainPtr
*new_domains
;
162 new_domains
= realloc(domains
, (dsize
* 2 + 1) *
164 if(new_domains
== NULL
) {
166 "Couldn't reallocate domain list.\n");
170 domains
= new_domains
;
171 dsize
= dsize
* 2 + 1;
173 new_domain
= malloc(sizeof(DomainRec
) - 1 + i
- start
);
174 if(new_domain
== NULL
) {
175 do_log(L_ERROR
, "Couldn't allocate domain.\n");
179 new_domain
->length
= i
- start
;
180 memcpy(new_domain
->domain
, buf
+ start
, i
- start
);
181 domains
[dlen
++] = new_domain
;
189 parseDomainFile(AtomPtr file
,
190 DomainPtr
**domains_return
, regex_t
**regex_return
)
195 if(*domains_return
) {
196 DomainPtr
*domain
= *domains_return
;
201 free(*domains_return
);
202 *domains_return
= NULL
;
206 regfree(*regex_return
);
207 *regex_return
= NULL
;
210 if(!file
|| file
->length
== 0)
213 domains
= malloc(64 * sizeof(DomainPtr
));
214 if(domains
== NULL
) {
215 do_log(L_ERROR
, "Couldn't allocate domain list.\n");
221 regexbuf
= malloc(512);
222 if(regexbuf
== NULL
) {
223 do_log(L_ERROR
, "Couldn't allocate regex.\n");
230 rc
= stat(file
->string
, &ss
);
233 do_log_error(L_WARN
, errno
, "Couldn't stat file %s", file
->string
);
235 if(!S_ISDIR(ss
.st_mode
))
236 readDomainFile(file
->string
);
241 fts_argv
[0] = file
->string
;
243 fts
= fts_open(fts_argv
, FTS_LOGICAL
, NULL
);
248 if(fe
->fts_info
!= FTS_D
&& fe
->fts_info
!= FTS_DP
&&
249 fe
->fts_info
!= FTS_DC
&& fe
->fts_info
!= FTS_DNR
)
250 readDomainFile(fe
->fts_accpath
);
254 do_log_error(L_ERROR
, errno
,
255 "Couldn't scan directory %s", file
->string
);
261 domains
[dlen
] = NULL
;
270 regex
= malloc(sizeof(regex_t
));
271 rc
= regcomp(regex
, regexbuf
, REG_EXTENDED
| REG_NOSUB
);
273 do_log(L_ERROR
, "Couldn't compile regex: %d.\n", rc
);
282 *domains_return
= domains
;
283 *regex_return
= regex
;
294 forbiddenFile
= expandTilde(forbiddenFile
);
296 if(forbiddenFile
== NULL
) {
297 forbiddenFile
= expandTilde(internAtom("~/.polipo-forbidden"));
299 if(access(forbiddenFile
->string
, F_OK
) < 0) {
300 releaseAtom(forbiddenFile
);
301 forbiddenFile
= NULL
;
306 if(forbiddenFile
== NULL
) {
307 if(access("/etc/polipo/forbidden", F_OK
) >= 0)
308 forbiddenFile
= internAtom("/etc/polipo/forbidden");
311 parseDomainFile(forbiddenFile
, &forbiddenDomains
, &forbiddenRegex
);
315 uncachableFile
= expandTilde(uncachableFile
);
317 if(uncachableFile
== NULL
) {
318 uncachableFile
= expandTilde(internAtom("~/.polipo-uncachable"));
320 if(access(uncachableFile
->string
, F_OK
) < 0) {
321 releaseAtom(uncachableFile
);
322 uncachableFile
= NULL
;
327 if(uncachableFile
== NULL
) {
328 if(access("/etc/polipo/uncachable", F_OK
) >= 0)
329 uncachableFile
= internAtom("/etc/polipo/uncachable");
332 parseDomainFile(uncachableFile
, &uncachableDomains
, &uncachableRegex
);
338 urlIsMatched(char *url
, int length
, DomainPtr
*domains
, regex_t
*regex
)
343 if(memcmp(url
, "http://", 7) != 0)
349 for(i
= 8; i
< length
; i
++) {
355 if((*domain
)->length
<= (i
- 7) &&
356 (url
[i
- (*domain
)->length
- 1] == '.' ||
357 url
[i
- (*domain
)->length
- 1] == '/') &&
358 memcmp(url
+ i
- (*domain
)->length
,
360 (*domain
)->length
) == 0)
367 /* url is not necessarily 0-terminated */
375 urlcopy
= malloc(length
+ 1);
379 memcpy(urlcopy
, url
, length
);
380 urlcopy
[length
] = '\0';
382 rc
= regexec(regex
, urlcopy
, 0, NULL
, 0);
384 if(urlcopy
!= smallcopy
)
393 urlIsUncachable(char *url
, int length
)
395 return urlIsMatched(url
, length
, uncachableDomains
, uncachableRegex
);
399 urlForbidden(AtomPtr url
,
400 int (*handler
)(int, AtomPtr
, AtomPtr
, AtomPtr
, void*),
403 int forbidden
= urlIsMatched(url
->string
, url
->length
,
404 forbiddenDomains
, forbiddenRegex
);
406 AtomPtr message
= NULL
, headers
= NULL
;
410 message
= internAtomF("Forbidden URL %s", url
->string
);
412 code
= forbiddenRedirectCode
;
413 headers
= internAtomF("\r\nLocation: %s", forbiddenUrl
->string
);
419 #ifndef NO_REDIRECTOR
420 if(code
== 0 && redirector
) {
421 RedirectRequestPtr request
;
422 request
= malloc(sizeof(RedirectRequestRec
));
423 if(request
== NULL
) {
424 do_log(L_ERROR
, "Couldn't allocate redirect request.\n");
428 request
->handler
= handler
;
429 request
->data
= closure
;
430 if(redirector_request_first
== NULL
)
431 redirector_request_first
= request
;
433 redirector_request_last
->next
= request
;
434 redirector_request_last
= request
;
435 request
->next
= NULL
;
436 if(request
== redirector_request_first
)
444 handler(code
, url
, message
, headers
, closure
);
448 #ifndef NO_REDIRECTOR
450 logExitStatus(int status
)
452 if(WIFEXITED(status
) && WEXITSTATUS(status
) == 142)
453 /* See child code in runRedirector */
454 do_log(L_ERROR
, "Couldn't start redirector.\n");
457 WIFEXITED(status
) ? "with status" :
458 WIFSIGNALED(status
) ? "on signal" :
459 "with unknown status";
461 WIFEXITED(status
) ? WEXITSTATUS(status
) :
462 WIFSIGNALED(status
) ? WTERMSIG(status
) :
465 "Redirector exited %s %d.\n", reason
, value
);
472 int rc
, status
, dead
;
474 if(redirector_read_fd
>= 0) {
475 rc
= waitpid(redirector_pid
, &status
, WNOHANG
);
477 close(redirector_read_fd
);
478 redirector_read_fd
= -1;
479 close(redirector_write_fd
);
480 redirector_write_fd
= -1;
482 rc
= kill(redirector_pid
, SIGTERM
);
483 if(rc
< 0 && errno
!= ESRCH
) {
484 do_log_error(L_ERROR
, errno
, "Couldn't kill redirector");
489 rc
= waitpid(redirector_pid
, &status
, 0);
490 } while(rc
< 0 && errno
== EINTR
);
492 do_log_error(L_ERROR
, errno
,
493 "Couldn't wait for redirector's death");
495 logExitStatus(status
);
501 redirectorDestroyRequest(RedirectRequestPtr request
)
503 assert(redirector_request_first
== request
);
504 redirector_request_first
= request
->next
;
505 if(redirector_request_first
== NULL
)
506 redirector_request_last
= NULL
;
511 redirectorTrigger(void)
513 RedirectRequestPtr request
= redirector_request_first
;
519 if(redirector_read_fd
< 0) {
520 rc
= runRedirector(&redirector_pid
,
521 &redirector_read_fd
, &redirector_write_fd
);
523 request
->handler(rc
, request
->url
, NULL
, NULL
, request
->data
);
524 redirectorDestroyRequest(request
);
528 do_stream_2(IO_WRITE
, redirector_write_fd
, 0,
529 request
->url
->string
, request
->url
->length
,
531 redirectorStreamHandler1
, request
);
535 redirectorStreamHandler1(int status
,
536 FdEventHandlerPtr event
,
537 StreamRequestPtr srequest
)
539 RedirectRequestPtr request
= (RedirectRequestPtr
)srequest
->data
;
544 do_log_error(L_ERROR
, -status
, "Write to redirector failed");
548 if(!streamRequestDone(srequest
))
551 do_stream(IO_READ
, redirector_read_fd
, 0,
552 redirector_buffer
, REDIRECTOR_BUFFER_SIZE
,
553 redirectorStreamHandler2
, request
);
557 request
->handler(status
< 0 ? status
: -EPIPE
,
558 request
->url
, NULL
, NULL
, request
->data
);
559 redirectorDestroyRequest(request
);
565 redirectorStreamHandler2(int status
,
566 FdEventHandlerPtr event
,
567 StreamRequestPtr srequest
)
569 RedirectRequestPtr request
= (RedirectRequestPtr
)srequest
->data
;
576 do_log_error(L_ERROR
, -status
, "Read from redirector failed");
577 request
->handler(status
, request
->url
, NULL
, NULL
, request
->data
);
580 c
= memchr(redirector_buffer
, '\n', srequest
->offset
);
582 if(!status
&& srequest
->offset
< REDIRECTOR_BUFFER_SIZE
)
584 do_log(L_ERROR
, "Redirector returned incomplete reply.\n");
585 request
->handler(-EREDIRECTOR
, request
->url
, NULL
, NULL
, request
->data
);
590 if(srequest
->offset
> c
+ 1 - redirector_buffer
)
591 do_log(L_WARN
, "Stray bytes in redirector output.\n");
593 if(c
> redirector_buffer
+ 1 &&
594 (c
- redirector_buffer
!= request
->url
->length
||
595 memcmp(redirector_buffer
, request
->url
->string
,
596 request
->url
->length
) != 0)) {
597 code
= redirectorRedirectCode
;
598 message
= internAtom("Redirected by external redirector");
599 if(message
== NULL
) {
600 request
->handler(-ENOMEM
, request
->url
, NULL
, NULL
, request
->data
);
604 headers
= internAtomF("\r\nLocation: %s", redirector_buffer
);
605 if(headers
== NULL
) {
606 releaseAtom(message
);
607 request
->handler(-ENOMEM
, request
->url
, NULL
, NULL
, request
->data
);
615 request
->handler(code
, request
->url
,
616 message
, headers
, request
->data
);
620 redirectorDestroyRequest(request
);
630 runRedirector(pid_t
*pid_return
, int *read_fd_return
, int *write_fd_return
)
634 int filedes1
[2], filedes2
[2];
635 sigset_t ss
, old_mask
;
639 if(redirector_buffer
== NULL
) {
640 redirector_buffer
= malloc(REDIRECTOR_BUFFER_SIZE
);
641 if(redirector_buffer
== NULL
)
662 interestingSignals(&ss
);
664 rc
= sigprocmask(SIG_BLOCK
, &ss
, &old_mask
);
665 } while (rc
< 0 && errno
== EINTR
);
679 rc
= sigprocmask(SIG_SETMASK
, &old_mask
, NULL
);
680 } while(rc
< 0 && errno
== EINTR
);
687 rc
= setNonblocking(filedes1
[1], 1);
689 rc
= setNonblocking(filedes2
[0], 1);
695 /* This is completely unnecesary -- if the redirector cannot be
696 started, redirectorStreamHandler1 will get EPIPE straight away --,
697 but it improves error messages somewhat. */
698 rc
= waitpid(pid
, &status
, WNOHANG
);
700 logExitStatus(status
);
708 *read_fd_return
= filedes2
[0];
709 *write_fd_return
= filedes1
[1];
712 /* This comes at the end so that the fail* labels can work */
720 rc
= sigprocmask(SIG_SETMASK
, &old_mask
, NULL
);
721 } while (rc
< 0 && errno
== EINTR
);
726 dup2(filedes1
[0], 0);
728 dup2(filedes2
[1], 1);
730 execlp(redirector
->string
, redirector
->string
, NULL
);
738 rc2
= sigprocmask(SIG_SETMASK
, &old_mask
, NULL
);
739 } while(rc2
< 0 && errno
== EINTR
);
747 free(redirector_buffer
);
748 redirector_buffer
= NULL
;
777 urlIsUncachable(char *url
, int length
)
783 urlForbidden(AtomPtr url
,
784 int (*handler
)(int, AtomPtr
, AtomPtr
, AtomPtr
, void*),
787 handler(0, url
, NULL
, NULL
, closure
);