Update CHANGES; fix a bogus entry, and mention Jake's work.
[polipo.git] / forbidden.c
blob0aa659c48f41bd33d3322d9a9c64ae31d38b89ca
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 #ifndef NO_FORBIDDEN
27 #include <regex.h>
29 typedef struct _Domain {
30 int length;
31 char domain[1];
32 } DomainRec, *DomainPtr;
34 AtomPtr forbiddenFile = NULL;
35 AtomPtr forbiddenUrl = NULL;
36 int forbiddenRedirectCode = 302;
38 AtomPtr redirector = NULL;
39 int redirectorRedirectCode = 302;
41 DomainPtr *forbiddenDomains = NULL;
42 regex_t *forbiddenRegex = NULL;
44 AtomPtr uncachableFile = NULL;
45 DomainPtr *uncachableDomains = NULL;
46 regex_t *uncachableRegex = NULL;
48 /* these three are only used internally by {parse,read}DomainFile */
49 /* to avoid having to pass it all as parameters */
50 static DomainPtr *domains;
51 static char *regexbuf;
52 static int rlen, rsize, dlen, dsize;
54 #ifndef NO_REDIRECTOR
55 static pid_t redirector_pid = 0;
56 static int redirector_read_fd = -1, redirector_write_fd = -1;
57 #define REDIRECTOR_BUFFER_SIZE 1024
58 static char *redirector_buffer = NULL;
59 RedirectRequestPtr redirector_request_first = NULL,
60 redirector_request_last = NULL;
61 #endif
63 static int atomSetterForbidden(ConfigVariablePtr, void*);
65 void
66 preinitForbidden(void)
68 CONFIG_VARIABLE_SETTABLE(forbiddenUrl, CONFIG_ATOM, configAtomSetter,
69 "URL to which forbidden requests "
70 "should be redirected.");
71 CONFIG_VARIABLE_SETTABLE(forbiddenRedirectCode, CONFIG_INT,
72 configIntSetter,
73 "Redirect code, 301 or 302.");
74 CONFIG_VARIABLE_SETTABLE(forbiddenFile, CONFIG_ATOM, atomSetterForbidden,
75 "File specifying forbidden URLs.");
76 #ifndef NO_REDIRECTOR
77 CONFIG_VARIABLE_SETTABLE(redirector, CONFIG_ATOM, atomSetterForbidden,
78 "Squid-style redirector.");
79 CONFIG_VARIABLE_SETTABLE(redirectorRedirectCode, CONFIG_INT,
80 configIntSetter,
81 "Redirect code to use with redirector.");
82 #endif
83 CONFIG_VARIABLE_SETTABLE(uncachableFile, CONFIG_ATOM, atomSetterForbidden,
84 "File specifying uncachable URLs.");
87 static int
88 atomSetterForbidden(ConfigVariablePtr var, void *value)
90 initForbidden();
91 return configAtomSetter(var, value);
94 int
95 readDomainFile(char *filename)
97 FILE *in;
98 char buf[512];
99 char *rs;
100 int i, j, is_regex, start;
102 in = fopen(filename, "r");
103 if(in == NULL) {
104 if(errno != ENOENT)
105 do_log_error(L_ERROR, errno, "Couldn't open file %s", filename);
106 return -1;
109 while(1) {
110 rs = fgets(buf, 512, in);
111 if(rs == NULL)
112 break;
113 for(i = 0; i < 512; i++) {
114 if(buf[i] != ' ' && buf[i] != '\t')
115 break;
117 start = i;
118 for(i = start; i < 512; i++) {
119 if(buf[i] == '#' || buf[i] == '\r' || buf[i] == '\n')
120 break;
122 while(i > start) {
123 if(buf[i - 1] != ' ' && buf[i - 1] != '\t')
124 break;
125 i--;
128 if(i <= start)
129 continue;
131 /* The significant part of the line is now between start and i */
133 is_regex = 0;
134 for(j = start; j < i; j++) {
135 if(buf[j] == '\\' || buf[j] == '*' || buf[j] == '/') {
136 is_regex = 1;
137 break;
141 if(is_regex) {
142 while(rlen + i - start + 8 >= rsize) {
143 char *new_regexbuf;
144 new_regexbuf = realloc(regexbuf, rsize * 2 + 1);
145 if(new_regexbuf == NULL) {
146 do_log(L_ERROR, "Couldn't reallocate regex.\n");
147 fclose(in);
148 return -1;
150 regexbuf = new_regexbuf;
151 rsize = rsize * 2 + 1;
153 if(rlen != 0)
154 rlen = snnprintf(regexbuf, rlen, rsize, "|");
155 rlen = snnprintf(regexbuf, rlen, rsize, "(");
156 rlen = snnprint_n(regexbuf, rlen, rsize, buf + start, i - start);
157 rlen = snnprintf(regexbuf, rlen, rsize, ")");
158 } else {
159 DomainPtr new_domain;
160 if(dlen >= dsize - 1) {
161 DomainPtr *new_domains;
162 new_domains = realloc(domains, (dsize * 2 + 1) *
163 sizeof(DomainPtr));
164 if(new_domains == NULL) {
165 do_log(L_ERROR,
166 "Couldn't reallocate domain list.\n");
167 fclose(in);
168 return -1;
170 domains = new_domains;
171 dsize = dsize * 2 + 1;
173 new_domain = malloc(sizeof(DomainRec) - 1 + i - start);
174 if(new_domain == NULL) {
175 do_log(L_ERROR, "Couldn't allocate domain.\n");
176 fclose(in);
177 return -1;
179 new_domain->length = i - start;
180 memcpy(new_domain->domain, buf + start, i - start);
181 domains[dlen++] = new_domain;
184 fclose(in);
185 return 1;
188 void
189 parseDomainFile(AtomPtr file,
190 DomainPtr **domains_return, regex_t **regex_return)
192 struct stat ss;
193 int rc;
195 if(*domains_return) {
196 DomainPtr *domain = *domains_return;
197 while(*domain) {
198 free(*domain);
199 domain++;
201 free(*domains_return);
202 *domains_return = NULL;
205 if(*regex_return) {
206 regfree(*regex_return);
207 *regex_return = NULL;
210 if(!file || file->length == 0)
211 return;
213 domains = malloc(64 * sizeof(DomainPtr));
214 if(domains == NULL) {
215 do_log(L_ERROR, "Couldn't allocate domain list.\n");
216 return;
218 dlen = 0;
219 dsize = 64;
221 regexbuf = malloc(512);
222 if(regexbuf == NULL) {
223 do_log(L_ERROR, "Couldn't allocate regex.\n");
224 free(domains);
225 return;
227 rlen = 0;
228 rsize = 512;
230 rc = stat(file->string, &ss);
231 if(rc < 0) {
232 if(errno != ENOENT)
233 do_log_error(L_WARN, errno, "Couldn't stat file %s", file->string);
234 } else {
235 if(!S_ISDIR(ss.st_mode))
236 readDomainFile(file->string);
237 else {
238 char *fts_argv[2];
239 FTS *fts;
240 FTSENT *fe;
241 fts_argv[0] = file->string;
242 fts_argv[1] = NULL;
243 fts = fts_open(fts_argv, FTS_LOGICAL, NULL);
244 if(fts) {
245 while(1) {
246 fe = fts_read(fts);
247 if(!fe) break;
248 if(fe->fts_info != FTS_D && fe->fts_info != FTS_DP &&
249 fe->fts_info != FTS_DC && fe->fts_info != FTS_DNR)
250 readDomainFile(fe->fts_accpath);
252 fts_close(fts);
253 } else {
254 do_log_error(L_ERROR, errno,
255 "Couldn't scan directory %s", file->string);
260 if(dlen > 0) {
261 domains[dlen] = NULL;
262 } else {
263 free(domains);
264 domains = NULL;
267 regex_t *regex;
269 if(rlen > 0) {
270 regex = malloc(sizeof(regex_t));
271 rc = regcomp(regex, regexbuf, REG_EXTENDED | REG_NOSUB);
272 if(rc != 0) {
273 do_log(L_ERROR, "Couldn't compile regex: %d.\n", rc);
274 free(regex);
275 regex = NULL;
277 } else {
278 regex = NULL;
280 free(regexbuf);
282 *domains_return = domains;
283 *regex_return = regex;
285 return;
288 void
289 initForbidden(void)
291 redirectorKill();
293 if(forbiddenFile)
294 forbiddenFile = expandTilde(forbiddenFile);
296 if(forbiddenFile == NULL) {
297 forbiddenFile = expandTilde(internAtom("~/.polipo-forbidden"));
298 if(forbiddenFile) {
299 if(access(forbiddenFile->string, F_OK) < 0) {
300 releaseAtom(forbiddenFile);
301 forbiddenFile = NULL;
306 if(forbiddenFile == NULL) {
307 if(access("/etc/polipo/forbidden", F_OK) >= 0)
308 forbiddenFile = internAtom("/etc/polipo/forbidden");
311 parseDomainFile(forbiddenFile, &forbiddenDomains, &forbiddenRegex);
314 if(uncachableFile)
315 uncachableFile = expandTilde(uncachableFile);
317 if(uncachableFile == NULL) {
318 uncachableFile = expandTilde(internAtom("~/.polipo-uncachable"));
319 if(uncachableFile) {
320 if(access(uncachableFile->string, F_OK) < 0) {
321 releaseAtom(uncachableFile);
322 uncachableFile = NULL;
327 if(uncachableFile == NULL) {
328 if(access("/etc/polipo/uncachable", F_OK) >= 0)
329 uncachableFile = internAtom("/etc/polipo/uncachable");
332 parseDomainFile(uncachableFile, &uncachableDomains, &uncachableRegex);
334 return;
338 urlIsMatched(char *url, int length, DomainPtr *domains, regex_t *regex)
340 if(length < 8)
341 return 0;
343 if(memcmp(url, "http://", 7) != 0)
344 return 0;
346 if(domains) {
347 int i;
348 DomainPtr *domain;
349 for(i = 8; i < length; i++) {
350 if(url[i] == '/')
351 break;
353 domain = domains;
354 while(*domain) {
355 if((*domain)->length <= (i - 7) &&
356 (url[i - (*domain)->length - 1] == '.' ||
357 url[i - (*domain)->length - 1] == '/') &&
358 memcmp(url + i - (*domain)->length,
359 (*domain)->domain,
360 (*domain)->length) == 0)
361 return 1;
362 domain++;
366 if(regex) {
367 /* url is not necessarily 0-terminated */
368 char smallcopy[50];
369 char *urlcopy;
370 int rc;
372 if(length < 50) {
373 urlcopy = smallcopy;
374 } else {
375 urlcopy = malloc(length + 1);
376 if(urlcopy == NULL)
377 return 0;
379 memcpy(urlcopy, url, length);
380 urlcopy[length] = '\0';
382 rc = regexec(regex, urlcopy, 0, NULL, 0);
384 if(urlcopy != smallcopy)
385 free(urlcopy);
387 return !rc;
389 return 0;
393 urlIsUncachable(char *url, int length)
395 return urlIsMatched(url, length, uncachableDomains, uncachableRegex);
399 urlForbidden(AtomPtr url,
400 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
401 void *closure)
403 int forbidden = urlIsMatched(url->string, url->length,
404 forbiddenDomains, forbiddenRegex);
405 int code = 0;
406 AtomPtr message = NULL, headers = NULL;
409 if(forbidden) {
410 message = internAtomF("Forbidden URL %s", url->string);
411 if(forbiddenUrl) {
412 code = forbiddenRedirectCode;
413 headers = internAtomF("\r\nLocation: %s", forbiddenUrl->string);
414 } else {
415 code = 403;
419 #ifndef NO_REDIRECTOR
420 if(code == 0 && redirector) {
421 RedirectRequestPtr request;
422 request = malloc(sizeof(RedirectRequestRec));
423 if(request == NULL) {
424 do_log(L_ERROR, "Couldn't allocate redirect request.\n");
425 goto done;
427 request->url = url;
428 request->handler = handler;
429 request->data = closure;
430 if(redirector_request_first == NULL)
431 redirector_request_first = request;
432 else
433 redirector_request_last->next = request;
434 redirector_request_last = request;
435 request->next = NULL;
436 if(request == redirector_request_first)
437 redirectorTrigger();
438 return 1;
441 #endif
443 done:
444 handler(code, url, message, headers, closure);
445 return 1;
448 #ifndef NO_REDIRECTOR
449 static void
450 logExitStatus(int status)
452 if(WIFEXITED(status) && WEXITSTATUS(status) == 142)
453 /* See child code in runRedirector */
454 do_log(L_ERROR, "Couldn't start redirector.\n");
455 else {
456 char *reason =
457 WIFEXITED(status) ? "with status" :
458 WIFSIGNALED(status) ? "on signal" :
459 "with unknown status";
460 int value =
461 WIFEXITED(status) ? WEXITSTATUS(status) :
462 WIFSIGNALED(status) ? WTERMSIG(status) :
463 status;
464 do_log(L_ERROR,
465 "Redirector exited %s %d.\n", reason, value);
469 void
470 redirectorKill(void)
472 int rc, status, dead;
474 if(redirector_read_fd >= 0) {
475 rc = waitpid(redirector_pid, &status, WNOHANG);
476 dead = (rc > 0);
477 close(redirector_read_fd);
478 redirector_read_fd = -1;
479 close(redirector_write_fd);
480 redirector_write_fd = -1;
481 if(!dead) {
482 rc = kill(redirector_pid, SIGTERM);
483 if(rc < 0 && errno != ESRCH) {
484 do_log_error(L_ERROR, errno, "Couldn't kill redirector");
485 redirector_pid = -1;
486 return;
488 do {
489 rc = waitpid(redirector_pid, &status, 0);
490 } while(rc < 0 && errno == EINTR);
491 if(rc < 0)
492 do_log_error(L_ERROR, errno,
493 "Couldn't wait for redirector's death");
494 } else
495 logExitStatus(status);
496 redirector_pid = -1;
500 static void
501 redirectorDestroyRequest(RedirectRequestPtr request)
503 assert(redirector_request_first == request);
504 redirector_request_first = request->next;
505 if(redirector_request_first == NULL)
506 redirector_request_last = NULL;
507 free(request);
510 void
511 redirectorTrigger(void)
513 RedirectRequestPtr request = redirector_request_first;
514 int rc;
516 if(!request)
517 return;
519 if(redirector_read_fd < 0) {
520 rc = runRedirector(&redirector_pid,
521 &redirector_read_fd, &redirector_write_fd);
522 if(rc < 0) {
523 request->handler(rc, request->url, NULL, NULL, request->data);
524 redirectorDestroyRequest(request);
525 return;
528 do_stream_2(IO_WRITE, redirector_write_fd, 0,
529 request->url->string, request->url->length,
530 "\n", 1,
531 redirectorStreamHandler1, request);
535 redirectorStreamHandler1(int status,
536 FdEventHandlerPtr event,
537 StreamRequestPtr srequest)
539 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
541 if(status) {
542 if(status >= 0)
543 status = -EPIPE;
544 do_log_error(L_ERROR, -status, "Write to redirector failed");
545 goto fail;
548 if(!streamRequestDone(srequest))
549 return 0;
551 do_stream(IO_READ, redirector_read_fd, 0,
552 redirector_buffer, REDIRECTOR_BUFFER_SIZE,
553 redirectorStreamHandler2, request);
554 return 1;
556 fail:
557 request->handler(status < 0 ? status : -EPIPE,
558 request->url, NULL, NULL, request->data);
559 redirectorDestroyRequest(request);
560 redirectorKill();
561 return 1;
565 redirectorStreamHandler2(int status,
566 FdEventHandlerPtr event,
567 StreamRequestPtr srequest)
569 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
570 char *c;
571 AtomPtr message;
572 AtomPtr headers;
573 int code;
575 if(status < 0) {
576 do_log_error(L_ERROR, -status, "Read from redirector failed");
577 request->handler(status, request->url, NULL, NULL, request->data);
578 goto kill;
580 c = memchr(redirector_buffer, '\n', srequest->offset);
581 if(!c) {
582 if(!status && srequest->offset < REDIRECTOR_BUFFER_SIZE)
583 return 0;
584 do_log(L_ERROR, "Redirector returned incomplete reply.\n");
585 request->handler(-EREDIRECTOR, request->url, NULL, NULL, request->data);
586 goto kill;
588 *c = '\0';
590 if(srequest->offset > c + 1 - redirector_buffer)
591 do_log(L_WARN, "Stray bytes in redirector output.\n");
593 if(c > redirector_buffer + 1 &&
594 (c - redirector_buffer != request->url->length ||
595 memcmp(redirector_buffer, request->url->string,
596 request->url->length) != 0)) {
597 code = redirectorRedirectCode;
598 message = internAtom("Redirected by external redirector");
599 if(message == NULL) {
600 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
601 goto kill;
604 headers = internAtomF("\r\nLocation: %s", redirector_buffer);
605 if(headers == NULL) {
606 releaseAtom(message);
607 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
608 goto kill;
610 } else {
611 code = 0;
612 message = NULL;
613 headers = NULL;
615 request->handler(code, request->url,
616 message, headers, request->data);
617 goto cont;
619 cont:
620 redirectorDestroyRequest(request);
621 redirectorTrigger();
622 return 1;
624 kill:
625 redirectorKill();
626 goto cont;
630 runRedirector(pid_t *pid_return, int *read_fd_return, int *write_fd_return)
632 int rc, rc2, status;
633 pid_t pid;
634 int filedes1[2], filedes2[2];
635 sigset_t ss, old_mask;
637 assert(redirector);
639 if(redirector_buffer == NULL) {
640 redirector_buffer = malloc(REDIRECTOR_BUFFER_SIZE);
641 if(redirector_buffer == NULL)
642 return -errno;
645 rc = pipe(filedes1);
646 if(rc < 0) {
647 rc = -errno;
648 goto fail1;
652 rc = pipe(filedes2);
653 if(rc < 0) {
654 rc = -errno;
655 goto fail2;
658 fflush(stdout);
659 fflush(stderr);
660 flushLog();
662 interestingSignals(&ss);
663 do {
664 rc = sigprocmask(SIG_BLOCK, &ss, &old_mask);
665 } while (rc < 0 && errno == EINTR);
666 if(rc < 0) {
667 rc = -errno;
668 goto fail3;
671 pid = fork();
672 if(pid < 0) {
673 rc = -errno;
674 goto fail4;
677 if(pid > 0) {
678 do {
679 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
680 } while(rc < 0 && errno == EINTR);
682 if(rc < 0) {
683 rc = -errno;
684 goto fail4;
687 rc = setNonblocking(filedes1[1], 1);
688 if(rc >= 0)
689 rc = setNonblocking(filedes2[0], 1);
690 if(rc < 0) {
691 rc = -errno;
692 goto fail4;
695 /* This is completely unnecesary -- if the redirector cannot be
696 started, redirectorStreamHandler1 will get EPIPE straight away --,
697 but it improves error messages somewhat. */
698 rc = waitpid(pid, &status, WNOHANG);
699 if(rc > 0) {
700 logExitStatus(status);
701 rc = -EREDIRECTOR;
702 goto fail4;
703 } else if(rc < 0) {
704 rc = -errno;
705 goto fail4;
708 *read_fd_return = filedes2[0];
709 *write_fd_return = filedes1[1];
711 *pid_return = pid;
712 /* This comes at the end so that the fail* labels can work */
713 close(filedes1[0]);
714 close(filedes2[1]);
715 } else {
716 close(filedes1[1]);
717 close(filedes2[0]);
718 uninitEvents();
719 do {
720 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
721 } while (rc < 0 && errno == EINTR);
722 if(rc < 0)
723 exit(142);
725 if(filedes1[0] != 0)
726 dup2(filedes1[0], 0);
727 if(filedes2[1] != 1)
728 dup2(filedes2[1], 1);
730 execlp(redirector->string, redirector->string, NULL);
731 exit(142);
732 /* NOTREACHED */
734 return 1;
736 fail4:
737 do {
738 rc2 = sigprocmask(SIG_SETMASK, &old_mask, NULL);
739 } while(rc2 < 0 && errno == EINTR);
740 fail3:
741 close(filedes2[0]);
742 close(filedes2[1]);
743 fail2:
744 close(filedes1[0]);
745 close(filedes1[1]);
746 fail1:
747 free(redirector_buffer);
748 redirector_buffer = NULL;
749 return rc;
752 #else
754 void
755 redirectorKill(void)
757 return;
760 #endif
762 #else
764 void
765 preinitForbidden()
767 return;
770 void
771 initForbidden()
773 return;
777 urlIsUncachable(char *url, int length)
779 return 0;
783 urlForbidden(AtomPtr url,
784 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
785 void *closure)
787 handler(0, url, NULL, NULL, closure);
788 return 1;
791 #endif