Updated CHANGES for 1.0.2.
[polipo.git] / forbidden.c
blob04976974f02214d9671a1e750d36c08a4946c4da
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 #ifndef NO_FORBIDDEN
27 #include <regex.h>
29 typedef struct _Domain {
30 int length;
31 char domain[1];
32 } DomainRec, *DomainPtr;
34 AtomPtr forbiddenFile = NULL;
35 AtomPtr forbiddenUrl = NULL;
36 int forbiddenRedirectCode = 302;
38 AtomPtr redirector = NULL;
39 int redirectorRedirectCode = 302;
41 DomainPtr *forbiddenDomains = NULL;
42 regex_t *forbiddenRegex = NULL;
44 AtomPtr uncachableFile = NULL;
45 DomainPtr *uncachableDomains = NULL;
46 regex_t *uncachableRegex = NULL;
48 /* these three are only used internally by {parse,read}DomainFile */
49 /* to avoid having to pass it all as parameters */
50 static DomainPtr *domains;
51 static char *regexbuf;
52 static int rlen, rsize, dlen, dsize;
54 #ifndef NO_REDIRECTOR
55 static pid_t redirector_pid = 0;
56 static int redirector_read_fd = -1, redirector_write_fd = -1;
57 #define REDIRECTOR_BUFFER_SIZE 1024
58 static char *redirector_buffer = NULL;
59 RedirectRequestPtr redirector_request_first = NULL,
60 redirector_request_last = NULL;
61 #endif
63 static int atomSetterForbidden(ConfigVariablePtr, void*);
65 void
66 preinitForbidden(void)
68 CONFIG_VARIABLE_SETTABLE(forbiddenUrl, CONFIG_ATOM, configAtomSetter,
69 "URL to which forbidden requests "
70 "should be redirected.");
71 CONFIG_VARIABLE_SETTABLE(forbiddenRedirectCode, CONFIG_INT,
72 configIntSetter,
73 "Redirect code, 301 or 302.");
74 CONFIG_VARIABLE_SETTABLE(forbiddenFile, CONFIG_ATOM, atomSetterForbidden,
75 "File specifying forbidden URLs.");
76 #ifndef NO_REDIRECTOR
77 CONFIG_VARIABLE_SETTABLE(redirector, CONFIG_ATOM, atomSetterForbidden,
78 "Squid-style redirector.");
79 CONFIG_VARIABLE_SETTABLE(redirectorRedirectCode, CONFIG_INT,
80 configIntSetter,
81 "Redirect code to use with redirector.");
82 #endif
83 CONFIG_VARIABLE_SETTABLE(uncachableFile, CONFIG_ATOM, atomSetterForbidden,
84 "File specifying uncachable URLs.");
87 static int
88 atomSetterForbidden(ConfigVariablePtr var, void *value)
90 initForbidden();
91 return configAtomSetter(var, value);
94 int
95 readDomainFile(char *filename)
97 FILE *in;
98 char buf[512];
99 char *rs;
100 int i, j, is_regex, start;
102 in = fopen(filename, "r");
103 if(in == NULL) {
104 if(errno != ENOENT)
105 do_log_error(L_ERROR, errno, "Couldn't open file %s", filename);
106 return -1;
109 while(1) {
110 rs = fgets(buf, 512, in);
111 if(rs == NULL)
112 break;
113 for(i = 0; i < 512; i++) {
114 if(buf[i] != ' ' && buf[i] != '\t')
115 break;
117 start = i;
118 for(i = start; i < 512; i++) {
119 if(buf[i] == '#' || buf[i] == '\r' || buf[i] == '\n')
120 break;
122 while(i > start) {
123 if(buf[i - 1] != ' ' && buf[i - 1] != '\t')
124 break;
125 i--;
128 if(i <= start)
129 continue;
131 /* The significant part of the line is now between start and i */
133 is_regex = 0;
134 for(j = start; j < i; j++) {
135 if(buf[j] == '\\' || buf[j] == '*' || buf[j] == '/') {
136 is_regex = 1;
137 break;
141 if(is_regex) {
142 while(rlen + i - start + 8 >= rsize) {
143 char *new_regexbuf;
144 new_regexbuf = realloc(regexbuf, rsize * 2 + 1);
145 if(new_regexbuf == NULL) {
146 do_log(L_ERROR, "Couldn't reallocate regex.\n");
147 fclose(in);
148 return -1;
150 regexbuf = new_regexbuf;
151 rsize = rsize * 2 + 1;
153 if(rlen != 0)
154 rlen = snnprintf(regexbuf, rlen, rsize, "|");
155 rlen = snnprintf(regexbuf, rlen, rsize, "(");
156 rlen = snnprint_n(regexbuf, rlen, rsize, buf + start, i - start);
157 rlen = snnprintf(regexbuf, rlen, rsize, ")");
158 } else {
159 DomainPtr new_domain;
160 if(dlen >= dsize - 1) {
161 DomainPtr *new_domains;
162 new_domains = realloc(domains, (dsize * 2 + 1) *
163 sizeof(DomainPtr));
164 if(new_domains == NULL) {
165 do_log(L_ERROR,
166 "Couldn't reallocate domain list.\n");
167 fclose(in);
168 return -1;
170 domains = new_domains;
171 dsize = dsize * 2 + 1;
173 new_domain = malloc(sizeof(DomainRec) - 1 + i - start);
174 if(new_domain == NULL) {
175 do_log(L_ERROR, "Couldn't allocate domain.\n");
176 fclose(in);
177 return -1;
179 new_domain->length = i - start;
180 memcpy(new_domain->domain, buf + start, i - start);
181 domains[dlen++] = new_domain;
184 fclose(in);
185 return 1;
188 void
189 parseDomainFile(AtomPtr file,
190 DomainPtr **domains_return, regex_t **regex_return)
192 struct stat ss;
193 int rc;
195 if(*domains_return) {
196 DomainPtr *domain = *domains_return;
197 while(*domain) {
198 free(*domain);
199 domain++;
201 free(*domains_return);
202 *domains_return = NULL;
205 if(*regex_return) {
206 regfree(*regex_return);
207 *regex_return = NULL;
210 if(!file || file->length == 0)
211 return;
213 domains = malloc(64 * sizeof(DomainPtr));
214 if(domains == NULL) {
215 do_log(L_ERROR, "Couldn't allocate domain list.\n");
216 return;
218 dlen = 0;
219 dsize = 64;
221 regexbuf = malloc(512);
222 if(regexbuf == NULL) {
223 do_log(L_ERROR, "Couldn't allocate regex.\n");
224 free(domains);
225 return;
227 rlen = 0;
228 rsize = 512;
230 rc = stat(file->string, &ss);
231 if(rc < 0) {
232 if(errno != ENOENT)
233 do_log_error(L_WARN, errno, "Couldn't stat file %s", file->string);
234 } else {
235 if(!S_ISDIR(ss.st_mode))
236 readDomainFile(file->string);
237 else {
238 char *fts_argv[2];
239 FTS *fts;
240 FTSENT *fe;
241 fts_argv[0] = file->string;
242 fts_argv[1] = NULL;
243 fts = fts_open(fts_argv, FTS_LOGICAL, NULL);
244 if(fts) {
245 while(1) {
246 fe = fts_read(fts);
247 if(!fe) break;
248 if(fe->fts_info != FTS_D && fe->fts_info != FTS_DP &&
249 fe->fts_info != FTS_DC && fe->fts_info != FTS_DNR)
250 readDomainFile(fe->fts_accpath);
252 fts_close(fts);
253 } else {
254 do_log_error(L_ERROR, errno,
255 "Couldn't scan directory %s", file->string);
260 if(dlen > 0) {
261 domains[dlen] = NULL;
262 } else {
263 free(domains);
264 domains = NULL;
267 regex_t *regex;
269 if(rlen > 0) {
270 regex = malloc(sizeof(regex_t));
271 rc = regcomp(regex, regexbuf, REG_EXTENDED | REG_NOSUB);
272 if(rc != 0) {
273 do_log(L_ERROR, "Couldn't compile regex: %d.\n", rc);
274 free(regex);
275 regex = NULL;
277 } else {
278 regex = NULL;
280 free(regexbuf);
282 *domains_return = domains;
283 *regex_return = regex;
285 return;
288 void
289 initForbidden(void)
291 redirectorKill();
293 if(forbiddenFile)
294 forbiddenFile = expandTilde(forbiddenFile);
296 if(forbiddenFile == NULL) {
297 forbiddenFile = expandTilde(internAtom("~/.polipo-forbidden"));
298 if(forbiddenFile) {
299 if(access(forbiddenFile->string, F_OK) < 0) {
300 releaseAtom(forbiddenFile);
301 forbiddenFile = NULL;
306 if(forbiddenFile == NULL) {
307 if(access("/etc/polipo/forbidden", F_OK) >= 0)
308 forbiddenFile = internAtom("/etc/polipo/forbidden");
311 parseDomainFile(forbiddenFile, &forbiddenDomains, &forbiddenRegex);
314 if(uncachableFile)
315 uncachableFile = expandTilde(uncachableFile);
317 if(uncachableFile == NULL) {
318 uncachableFile = expandTilde(internAtom("~/.polipo-uncachable"));
319 if(uncachableFile) {
320 if(access(uncachableFile->string, F_OK) < 0) {
321 releaseAtom(uncachableFile);
322 uncachableFile = NULL;
327 if(uncachableFile == NULL) {
328 if(access("/etc/polipo/uncachable", F_OK) >= 0)
329 uncachableFile = internAtom("/etc/polipo/uncachable");
332 parseDomainFile(uncachableFile, &uncachableDomains, &uncachableRegex);
334 return;
338 urlIsMatched(char *url, int length, DomainPtr *domains, regex_t *regex)
340 if(length < 8)
341 return 0;
343 if(memcmp(url, "http://", 7) != 0)
344 return 0;
346 if(domains) {
347 int i;
348 DomainPtr *domain;
349 for(i = 8; i < length; i++) {
350 if(url[i] == '/')
351 break;
353 domain = domains;
354 while(*domain) {
355 if((*domain)->length <= (i - 7) &&
356 (url[i - (*domain)->length - 1] == '.' ||
357 url[i - (*domain)->length - 1] == '/') &&
358 memcmp(url + i - (*domain)->length,
359 (*domain)->domain,
360 (*domain)->length) == 0)
361 return 1;
362 domain++;
365 if(regex) {
366 if(!regexec(regex, url, 0, NULL, 0))
367 return 1;
369 return 0;
373 urlIsUncachable(char *url, int length)
375 return urlIsMatched(url, length, uncachableDomains, uncachableRegex);
379 urlForbidden(AtomPtr url,
380 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
381 void *closure)
383 int forbidden = urlIsMatched(url->string, url->length,
384 forbiddenDomains, forbiddenRegex);
385 int code = 0;
386 AtomPtr message = NULL, headers = NULL;
389 if(forbidden) {
390 message = internAtomF("Forbidden URL %s", url->string);
391 if(forbiddenUrl) {
392 code = forbiddenRedirectCode;
393 headers = internAtomF("\r\nLocation: %s", forbiddenUrl->string);
394 } else {
395 code = 403;
399 #ifndef NO_REDIRECTOR
400 if(code == 0 && redirector) {
401 RedirectRequestPtr request;
402 request = malloc(sizeof(RedirectRequestRec));
403 if(request == NULL) {
404 do_log(L_ERROR, "Couldn't allocate redirect request.\n");
405 goto done;
407 request->url = url;
408 request->handler = handler;
409 request->data = closure;
410 if(redirector_request_first == NULL)
411 redirector_request_first = request;
412 else
413 redirector_request_last->next = request;
414 redirector_request_last = request;
415 request->next = NULL;
416 if(request == redirector_request_first)
417 redirectorTrigger();
418 return 1;
421 #endif
423 done:
424 handler(code, url, message, headers, closure);
425 return 1;
428 #ifndef NO_REDIRECTOR
429 static void
430 logExitStatus(int status)
432 if(WIFEXITED(status) && WEXITSTATUS(status) == 142)
433 /* See child code in runRedirector */
434 do_log(L_ERROR, "Couldn't start redirector.\n");
435 else {
436 char *reason =
437 WIFEXITED(status) ? "with status" :
438 WIFSIGNALED(status) ? "on signal" :
439 "with unknown status";
440 int value =
441 WIFEXITED(status) ? WEXITSTATUS(status) :
442 WIFSIGNALED(status) ? WTERMSIG(status) :
443 status;
444 do_log(L_ERROR,
445 "Redirector exited %s %d.\n", reason, value);
449 void
450 redirectorKill(void)
452 int rc, status, dead;
454 if(redirector_read_fd >= 0) {
455 rc = waitpid(redirector_pid, &status, WNOHANG);
456 dead = (rc > 0);
457 close(redirector_read_fd);
458 redirector_read_fd = -1;
459 close(redirector_write_fd);
460 redirector_write_fd = -1;
461 if(!dead) {
462 rc = kill(redirector_pid, SIGTERM);
463 if(rc < 0 && errno != ESRCH) {
464 do_log_error(L_ERROR, errno, "Couldn't kill redirector");
465 redirector_pid = -1;
466 return;
468 do {
469 rc = waitpid(redirector_pid, &status, 0);
470 } while(rc < 0 && errno == EINTR);
471 if(rc < 0)
472 do_log_error(L_ERROR, errno,
473 "Couldn't wait for redirector's death");
474 } else
475 logExitStatus(status);
476 redirector_pid = -1;
480 static void
481 redirectorDestroyRequest(RedirectRequestPtr request)
483 assert(redirector_request_first == request);
484 redirector_request_first = request->next;
485 if(redirector_request_first == NULL)
486 redirector_request_last = NULL;
487 free(request);
490 void
491 redirectorTrigger(void)
493 RedirectRequestPtr request = redirector_request_first;
494 int rc;
496 if(!request)
497 return;
499 if(redirector_read_fd < 0) {
500 rc = runRedirector(&redirector_pid,
501 &redirector_read_fd, &redirector_write_fd);
502 if(rc < 0) {
503 request->handler(rc, request->url, NULL, NULL, request->data);
504 redirectorDestroyRequest(request);
505 return;
508 do_stream_2(IO_WRITE, redirector_write_fd, 0,
509 request->url->string, request->url->length,
510 "\n", 1,
511 redirectorStreamHandler1, request);
515 redirectorStreamHandler1(int status,
516 FdEventHandlerPtr event,
517 StreamRequestPtr srequest)
519 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
521 if(status) {
522 if(status >= 0)
523 status = -EPIPE;
524 do_log_error(L_ERROR, -status, "Write to redirector failed");
525 goto fail;
528 if(!streamRequestDone(srequest))
529 return 0;
531 do_stream(IO_READ, redirector_read_fd, 0,
532 redirector_buffer, REDIRECTOR_BUFFER_SIZE,
533 redirectorStreamHandler2, request);
534 return 1;
536 fail:
537 request->handler(status < 0 ? status : -EPIPE,
538 request->url, NULL, NULL, request->data);
539 redirectorDestroyRequest(request);
540 redirectorKill();
541 return 1;
545 redirectorStreamHandler2(int status,
546 FdEventHandlerPtr event,
547 StreamRequestPtr srequest)
549 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
550 char *c;
551 AtomPtr message;
552 AtomPtr headers;
553 int code;
555 if(status < 0) {
556 do_log_error(L_ERROR, -status, "Read from redirector failed");
557 request->handler(status, request->url, NULL, NULL, request->data);
558 goto kill;
560 c = memchr(redirector_buffer, '\n', srequest->offset);
561 if(!c) {
562 if(!status && srequest->offset < REDIRECTOR_BUFFER_SIZE)
563 return 0;
564 do_log(L_ERROR, "Redirector returned incomplete reply.\n");
565 request->handler(-EREDIRECTOR, request->url, NULL, NULL, request->data);
566 goto kill;
568 *c = '\0';
570 if(srequest->offset > c + 1 - redirector_buffer)
571 do_log(L_WARN, "Stray bytes in redirector output.\n");
573 if(c > redirector_buffer + 1 &&
574 (c - redirector_buffer != request->url->length ||
575 memcmp(redirector_buffer, request->url->string,
576 request->url->length) != 0)) {
577 code = redirectorRedirectCode;
578 message = internAtom("Redirected by external redirector");
579 if(message == NULL) {
580 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
581 goto kill;
584 headers = internAtomF("\r\nLocation: %s", redirector_buffer);
585 if(headers == NULL) {
586 releaseAtom(message);
587 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
588 goto kill;
590 } else {
591 code = 0;
592 message = NULL;
593 headers = NULL;
595 request->handler(code, request->url,
596 message, headers, request->data);
597 goto cont;
599 cont:
600 redirectorDestroyRequest(request);
601 redirectorTrigger();
602 return 1;
604 kill:
605 redirectorKill();
606 goto cont;
610 runRedirector(pid_t *pid_return, int *read_fd_return, int *write_fd_return)
612 int rc, rc2, status;
613 pid_t pid;
614 int filedes1[2], filedes2[2];
615 sigset_t ss, old_mask;
617 assert(redirector);
619 if(redirector_buffer == NULL) {
620 redirector_buffer = malloc(REDIRECTOR_BUFFER_SIZE);
621 if(redirector_buffer == NULL)
622 return -errno;
625 rc = pipe(filedes1);
626 if(rc < 0) {
627 rc = -errno;
628 goto fail1;
632 rc = pipe(filedes2);
633 if(rc < 0) {
634 rc = -errno;
635 goto fail2;
638 fflush(stdout);
639 fflush(stderr);
640 fflush(logF);
642 interestingSignals(&ss);
643 do {
644 rc = sigprocmask(SIG_BLOCK, &ss, &old_mask);
645 } while (rc < 0 && errno == EINTR);
646 if(rc < 0) {
647 rc = -errno;
648 goto fail3;
651 pid = fork();
652 if(pid < 0) {
653 rc = -errno;
654 goto fail4;
657 if(pid > 0) {
658 do {
659 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
660 } while(rc < 0 && errno == EINTR);
662 if(rc < 0) {
663 rc = -errno;
664 goto fail4;
667 rc = setNonblocking(filedes1[1], 1);
668 if(rc >= 0)
669 rc = setNonblocking(filedes2[0], 1);
670 if(rc < 0) {
671 rc = -errno;
672 goto fail4;
675 /* This is completely unnecesary -- if the redirector cannot be
676 started, redirectorStreamHandler1 will get EPIPE straight away --,
677 but it improves error messages somewhat. */
678 rc = waitpid(pid, &status, WNOHANG);
679 if(rc > 0) {
680 logExitStatus(status);
681 rc = -EREDIRECTOR;
682 goto fail4;
683 } else if(rc < 0) {
684 rc = -errno;
685 goto fail4;
688 *read_fd_return = filedes2[0];
689 *write_fd_return = filedes1[1];
691 *pid_return = pid;
692 /* This comes at the end so that the fail* labels can work */
693 close(filedes1[0]);
694 close(filedes2[1]);
695 } else {
696 close(filedes1[1]);
697 close(filedes2[0]);
698 uninitEvents();
699 do {
700 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
701 } while (rc < 0 && errno == EINTR);
702 if(rc < 0)
703 exit(142);
705 if(filedes1[0] != 0)
706 dup2(filedes1[0], 0);
707 if(filedes2[1] != 1)
708 dup2(filedes2[1], 1);
710 execlp(redirector->string, redirector->string, NULL);
711 exit(142);
712 /* NOTREACHED */
714 return 1;
716 fail4:
717 do {
718 rc2 = sigprocmask(SIG_SETMASK, &old_mask, NULL);
719 } while(rc2 < 0 && errno == EINTR);
720 fail3:
721 close(filedes2[0]);
722 close(filedes2[1]);
723 fail2:
724 close(filedes1[0]);
725 close(filedes1[1]);
726 fail1:
727 free(redirector_buffer);
728 redirector_buffer = NULL;
729 return rc;
732 #else
734 void
735 redirectorKill(void)
737 return;
740 #endif
742 #else
744 void
745 preinitForbidden()
747 return;
750 void
751 initForbidden()
753 return;
757 urlIsUncachable(char *url, int length)
759 return 0;
763 urlForbidden(AtomPtr url,
764 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
765 void *closure)
767 handler(0, url, NULL, NULL, closure);
768 return 1;
771 #endif