Fix indexing inconsistency in manual.
[polipo.git] / forbidden.c
blob536d3d647c26f2ccf73b4edeb13837bb1b43c591
1 /*
2 Copyright (c) 2003-2006 by Juliusz Chroboczek
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in
12 all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 THE SOFTWARE.
23 #include "polipo.h"
25 #ifndef NO_FORBIDDEN
27 #include <regex.h>
29 typedef struct _Domain {
30 int length;
31 char domain[1];
32 } DomainRec, *DomainPtr;
34 AtomPtr forbiddenFile = NULL;
35 AtomPtr forbiddenUrl = NULL;
36 int forbiddenRedirectCode = 302;
38 AtomPtr redirector = NULL;
39 int redirectorRedirectCode = 302;
41 DomainPtr *forbiddenDomains = NULL;
42 regex_t *forbiddenRegex = NULL;
44 AtomPtr uncachableFile = NULL;
45 DomainPtr *uncachableDomains = NULL;
46 regex_t *uncachableRegex = NULL;
48 /* these three are only used internally by {parse,read}DomainFile */
49 /* to avoid having to pass it all as parameters */
50 static DomainPtr *domains;
51 static char *regexbuf;
52 static int rlen, rsize, dlen, dsize;
54 static pid_t redirector_pid = 0;
55 static int redirector_read_fd = -1, redirector_write_fd = -1;
56 static char redirector_buffer[512];
57 RedirectRequestPtr redirector_request_first = NULL,
58 redirector_request_last = NULL;
60 static int atomSetterForbidden(ConfigVariablePtr, void*);
62 void
63 preinitForbidden(void)
65 CONFIG_VARIABLE_SETTABLE(forbiddenUrl, CONFIG_ATOM, configAtomSetter,
66 "URL to which forbidden requests "
67 "should be redirected.");
68 CONFIG_VARIABLE_SETTABLE(forbiddenRedirectCode, CONFIG_INT,
69 configIntSetter,
70 "Redirect code, 301 or 302.");
71 CONFIG_VARIABLE_SETTABLE(forbiddenFile, CONFIG_ATOM, atomSetterForbidden,
72 "File specifying forbidden URLs.");
73 #ifndef NO_REDIRECTOR
74 CONFIG_VARIABLE_SETTABLE(redirector, CONFIG_ATOM, atomSetterForbidden,
75 "Squid-style redirector.");
76 CONFIG_VARIABLE_SETTABLE(redirectorRedirectCode, CONFIG_INT,
77 configIntSetter,
78 "Redirect code to use with redirector.");
79 #endif
80 CONFIG_VARIABLE_SETTABLE(uncachableFile, CONFIG_ATOM, atomSetterForbidden,
81 "File specifying uncachable URLs.");
84 static int
85 atomSetterForbidden(ConfigVariablePtr var, void *value)
87 initForbidden();
88 return configAtomSetter(var, value);
91 int
92 readDomainFile(char *filename)
94 FILE *in;
95 char buf[512];
96 char *rs;
97 int i, j, is_regex, start;
99 in = fopen(filename, "r");
100 if(in == NULL) {
101 if(errno != ENOENT)
102 do_log_error(L_ERROR, errno, "Couldn't open file %s", filename);
103 return -1;
106 while(1) {
107 rs = fgets(buf, 512, in);
108 if(rs == NULL)
109 break;
110 for(i = 0; i < 512; i++) {
111 if(buf[i] != ' ' && buf[i] != '\t')
112 break;
114 start = i;
115 for(i = start; i < 512; i++) {
116 if(buf[i] == '#' || buf[i] == '\r' || buf[i] == '\n')
117 break;
119 while(i > start) {
120 if(buf[i - 1] != ' ' && buf[i - 1] != '\t')
121 break;
122 i--;
125 if(i <= start)
126 continue;
128 /* The significant part of the line is now between start and i */
130 is_regex = 0;
131 for(j = start; j < i; j++) {
132 if(buf[j] == '\\' || buf[j] == '*' || buf[j] == '/') {
133 is_regex = 1;
134 break;
138 if(is_regex) {
139 while(rlen + i - start + 8 >= rsize) {
140 char *new_regexbuf;
141 new_regexbuf = realloc(regexbuf, rsize * 2 + 1);
142 if(new_regexbuf == NULL) {
143 do_log(L_ERROR, "Couldn't reallocate regex.\n");
144 fclose(in);
145 return -1;
147 regexbuf = new_regexbuf;
148 rsize = rsize * 2 + 1;
150 if(rlen != 0)
151 rlen = snnprintf(regexbuf, rlen, rsize, "|");
152 rlen = snnprintf(regexbuf, rlen, rsize, "(");
153 rlen = snnprint_n(regexbuf, rlen, rsize, buf + start, i - start);
154 rlen = snnprintf(regexbuf, rlen, rsize, ")");
155 } else {
156 DomainPtr new_domain;
157 if(dlen >= dsize - 1) {
158 DomainPtr *new_domains;
159 new_domains = realloc(domains, (dsize * 2 + 1) *
160 sizeof(DomainPtr));
161 if(new_domains == NULL) {
162 do_log(L_ERROR,
163 "Couldn't reallocate domain list.\n");
164 fclose(in);
165 return -1;
167 domains = new_domains;
168 dsize = dsize * 2 + 1;
170 new_domain = malloc(sizeof(DomainRec) - 1 + i - start);
171 if(new_domain == NULL) {
172 do_log(L_ERROR, "Couldn't allocate domain.\n");
173 fclose(in);
174 return -1;
176 new_domain->length = i - start;
177 memcpy(new_domain->domain, buf + start, i - start);
178 domains[dlen++] = new_domain;
181 fclose(in);
182 return 1;
185 void
186 parseDomainFile(AtomPtr file,
187 DomainPtr **domains_return, regex_t **regex_return)
189 struct stat ss;
190 int rc;
192 if(*domains_return) {
193 DomainPtr *domain = *domains_return;
194 while(*domain) {
195 free(*domain);
196 domain++;
198 free(*domains_return);
199 *domains_return = NULL;
202 if(*regex_return) {
203 regfree(*regex_return);
204 *regex_return = NULL;
207 if(!file || file->length == 0)
208 return;
210 domains = malloc(64 * sizeof(DomainPtr));
211 if(domains == NULL) {
212 do_log(L_ERROR, "Couldn't allocate domain list.\n");
213 return;
215 dlen = 0;
216 dsize = 64;
218 regexbuf = malloc(512);
219 if(regexbuf == NULL) {
220 do_log(L_ERROR, "Couldn't allocate regex.\n");
221 free(domains);
222 return;
224 rlen = 0;
225 rsize = 512;
227 rc = stat(file->string, &ss);
228 if(rc < 0) {
229 if(errno != ENOENT)
230 do_log_error(L_WARN, errno, "Couldn't stat file %s", file->string);
231 } else {
232 if(!S_ISDIR(ss.st_mode))
233 readDomainFile(file->string);
234 else {
235 char *fts_argv[2];
236 FTS *fts;
237 FTSENT *fe;
238 fts_argv[0] = file->string;
239 fts_argv[1] = NULL;
240 fts = fts_open(fts_argv, FTS_LOGICAL, NULL);
241 if(fts) {
242 while(1) {
243 fe = fts_read(fts);
244 if(!fe) break;
245 if(fe->fts_info != FTS_D && fe->fts_info != FTS_DP &&
246 fe->fts_info != FTS_DC && fe->fts_info != FTS_DNR)
247 readDomainFile(fe->fts_accpath);
249 fts_close(fts);
250 } else {
251 do_log_error(L_ERROR, errno,
252 "Couldn't scan directory %s", file->string);
257 if(dlen > 0) {
258 domains[dlen] = NULL;
259 } else {
260 free(domains);
261 domains = NULL;
264 regex_t *regex;
266 if(rlen > 0) {
267 regex = malloc(sizeof(regex_t));
268 rc = regcomp(regex, regexbuf, REG_EXTENDED | REG_NOSUB);
269 if(rc != 0) {
270 do_log(L_ERROR, "Couldn't compile regex: %d.\n", rc);
271 free(regex);
272 regex = NULL;
274 } else {
275 regex = NULL;
277 free(regexbuf);
279 *domains_return = domains;
280 *regex_return = regex;
282 return;
285 void
286 initForbidden(void)
288 redirectorKill();
290 if(forbiddenFile)
291 forbiddenFile = expandTilde(forbiddenFile);
293 if(forbiddenFile == NULL) {
294 forbiddenFile = expandTilde(internAtom("~/.polipo-forbidden"));
295 if(forbiddenFile) {
296 if(access(forbiddenFile->string, F_OK) < 0) {
297 releaseAtom(forbiddenFile);
298 forbiddenFile = NULL;
303 if(forbiddenFile == NULL) {
304 if(access("/etc/polipo/forbidden", F_OK) >= 0)
305 forbiddenFile = internAtom("/etc/polipo/forbidden");
308 parseDomainFile(forbiddenFile, &forbiddenDomains, &forbiddenRegex);
311 if(uncachableFile)
312 uncachableFile = expandTilde(uncachableFile);
314 if(uncachableFile == NULL) {
315 uncachableFile = expandTilde(internAtom("~/.polipo-uncachable"));
316 if(uncachableFile) {
317 if(access(uncachableFile->string, F_OK) < 0) {
318 releaseAtom(uncachableFile);
319 uncachableFile = NULL;
324 if(uncachableFile == NULL) {
325 if(access("/etc/polipo/uncachable", F_OK) >= 0)
326 uncachableFile = internAtom("/etc/polipo/uncachable");
329 parseDomainFile(uncachableFile, &uncachableDomains, &uncachableRegex);
331 return;
335 urlIsMatched(char *url, int length, DomainPtr *domains, regex_t *regex)
337 if(length < 8)
338 return 0;
340 if(memcmp(url, "http://", 7) != 0)
341 return 0;
343 if(domains) {
344 int i;
345 DomainPtr *domain;
346 for(i = 8; i < length; i++) {
347 if(url[i] == '/')
348 break;
350 domain = domains;
351 while(*domain) {
352 if((*domain)->length <= (i - 7) &&
353 (url[i - (*domain)->length - 1] == '.' ||
354 url[i - (*domain)->length - 1] == '/') &&
355 memcmp(url + i - (*domain)->length,
356 (*domain)->domain,
357 (*domain)->length) == 0)
358 return 1;
359 domain++;
362 if(regex) {
363 if(!regexec(regex, url, 0, NULL, 0))
364 return 1;
366 return 0;
370 urlIsUncachable(char *url, int length)
372 return urlIsMatched(url, length, uncachableDomains, uncachableRegex);
375 static char lf[1] = "\n";
378 urlForbidden(AtomPtr url,
379 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
380 void *closure)
382 int forbidden = urlIsMatched(url->string, url->length,
383 forbiddenDomains, forbiddenRegex);
384 int code = 0;
385 AtomPtr message = NULL, headers = NULL;
388 if(forbidden) {
389 message = internAtomF("Forbidden URL %s", url->string);
390 if(forbiddenUrl) {
391 code = forbiddenRedirectCode;
392 headers = internAtomF("\r\nLocation: %s", forbiddenUrl->string);
393 } else {
394 code = 403;
398 #ifndef NO_REDIRECTOR
399 if(code == 0 && redirector) {
400 RedirectRequestPtr request;
401 request = malloc(sizeof(RedirectRequestRec));
402 if(request == NULL) {
403 do_log(L_ERROR, "Couldn't allocate redirect request.\n");
404 goto done;
406 request->url = url;
407 request->handler = handler;
408 request->data = closure;
409 if(redirector_request_first == NULL)
410 redirector_request_first = request;
411 else
412 redirector_request_last->next = request;
413 redirector_request_last = request;
414 request->next = NULL;
415 if(request == redirector_request_first)
416 redirectorTrigger();
417 return 1;
419 #endif
421 done:
422 handler(code, url, message, headers, closure);
423 return 1;
426 #ifndef NO_REDIRECTOR
427 void
428 redirectorKill(void)
430 int rc;
431 int status;
432 if(redirector_read_fd >= 0) {
433 close(redirector_read_fd);
434 redirector_read_fd = -1;
435 close(redirector_write_fd);
436 redirector_write_fd = -1;
437 kill(redirector_pid, SIGTERM);
438 do {
439 rc = waitpid(redirector_pid, &status, 0);
440 } while(rc < 0 && errno == EINTR);
441 if(rc < 0) {
442 do_log_error(L_ERROR, errno, "Couldn't wait for redirector");
444 redirector_pid = -1;
448 static void
449 redirectorDestroyRequest(RedirectRequestPtr request)
451 assert(redirector_request_first == request);
452 redirector_request_first = request->next;
453 if(redirector_request_first == NULL)
454 redirector_request_last = NULL;
455 free(request);
458 void
459 redirectorTrigger(void)
461 RedirectRequestPtr request = redirector_request_first;
462 int rc;
464 if(!request)
465 return;
467 if(redirector_read_fd < 0) {
468 rc = runRedirector(&redirector_pid,
469 &redirector_read_fd, &redirector_write_fd);
470 if(rc < 0) {
471 do_log_error(L_ERROR, -rc, "Couldn't run redirector");
472 request->handler(rc, request->url, NULL, NULL, request->data);
473 redirectorDestroyRequest(request);
474 return;
477 do_stream_2(IO_WRITE, redirector_write_fd, 0,
478 request->url->string, request->url->length,
479 lf, 1,
480 redirectorStreamHandler1, request);
484 redirectorStreamHandler1(int status,
485 FdEventHandlerPtr event,
486 StreamRequestPtr srequest)
488 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
490 if(status) {
491 if(status >= 0)
492 status = -EPIPE;
493 do_log_error(L_ERROR, -status, "Write to redirector failed");
494 request->handler(status < 0 ? status : -EPIPE,
495 request->url, NULL, NULL, request->data);
496 redirectorDestroyRequest(request);
497 redirectorKill();
498 return 1;
501 if(!streamRequestDone(srequest))
502 return 0;
504 do_stream(IO_READ, redirector_read_fd, 0,
505 redirector_buffer, 512,
506 redirectorStreamHandler2, request);
507 return 1;
511 redirectorStreamHandler2(int status,
512 FdEventHandlerPtr event,
513 StreamRequestPtr srequest)
515 RedirectRequestPtr request = (RedirectRequestPtr)srequest->data;
516 char *c;
517 AtomPtr message;
518 AtomPtr headers;
519 int code;
521 if(status < 0) {
522 do_log_error(L_ERROR, -status, "Read from redirector failed");
523 request->handler(status, request->url, NULL, NULL, request->data);
524 goto kill;
526 c = memchr(redirector_buffer, '\n', srequest->offset);
527 if(!c) {
528 if(!status && c < redirector_buffer + 512)
529 return 0;
530 do_log(L_ERROR, "Redirector returned incomplete reply.\n");
531 request->handler(-EUNKNOWN, request->url, NULL, NULL, request->data);
532 goto kill;
534 *c = '\0';
536 if(srequest->offset > c + 1 - redirector_buffer)
537 do_log(L_WARN, "Stray bytes in redirector output.\n");
539 if(c > redirector_buffer + 1 &&
540 (c - redirector_buffer != request->url->length ||
541 memcmp(redirector_buffer, request->url->string,
542 request->url->length) != 0)) {
543 code = redirectorRedirectCode;
544 message = internAtom("Redirected by external redirector");
545 if(message == NULL) {
546 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
547 goto kill;
550 headers = internAtomF("\r\nLocation: %s", redirector_buffer);
551 if(headers == NULL) {
552 releaseAtom(message);
553 request->handler(-ENOMEM, request->url, NULL, NULL, request->data);
554 goto kill;
556 } else {
557 code = 0;
558 message = NULL;
559 headers = NULL;
561 request->handler(code, request->url,
562 message, headers, request->data);
563 goto cont;
565 cont:
566 redirectorDestroyRequest(request);
567 redirectorTrigger();
568 return 1;
570 kill:
571 redirectorKill();
572 goto cont;
576 runRedirector(pid_t *pid_return, int *read_fd_return, int *write_fd_return)
578 int rc;
579 pid_t pid;
580 int filedes1[2], filedes2[2];
581 sigset_t ss, old_mask;
583 assert(redirector);
585 rc = pipe(filedes1);
586 if(rc < 0)
587 return -errno;
589 rc = pipe(filedes2);
590 if(rc < 0) {
591 close(filedes1[0]);
592 close(filedes1[1]);
593 return -errno;
596 fflush(stdout);
597 fflush(stderr);
598 fflush(logF);
600 interestingSignals(&ss);
601 do {
602 rc = sigprocmask(SIG_BLOCK, &ss, &old_mask);
603 } while (rc < 0 && errno == EINTR);
604 if(rc < 0)
605 return -errno;
607 pid = fork();
608 if(pid < 0)
609 return -errno;
611 if(pid > 0) {
612 close(filedes1[0]);
613 close(filedes2[1]);
614 do {
615 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
616 } while(rc < 0 && errno == EINTR);
618 if(rc < 0) {
619 rc = errno;
620 close(filedes1[1]);
621 close(filedes2[0]);
622 return -rc;
624 rc = setNonblocking(filedes1[1], 1);
625 if(rc >= 0)
626 rc = setNonblocking(filedes2[0], 1);
627 if(rc < 0) {
628 rc = errno;
629 close(filedes1[1]);
630 close(filedes2[0]);
631 return -rc;
633 *read_fd_return = filedes2[0];
634 *write_fd_return = filedes1[1];
635 *pid_return = pid;
636 } else {
637 close(filedes1[1]);
638 close(filedes2[0]);
639 uninitEvents();
640 do {
641 rc = sigprocmask(SIG_SETMASK, &old_mask, NULL);
642 } while (rc < 0 && errno == EINTR);
643 if(rc < 0)
644 exit(1);
646 if(filedes1[0] != 0)
647 dup2(filedes1[0], 0);
648 if(filedes2[1] != 1)
649 dup2(filedes2[1], 1);
651 execlp(redirector->string, redirector->string, NULL);
652 exit(1);
653 /* NOTREACHED */
655 return 1;
658 #else
660 void
661 redirectorKill(void)
663 return;
666 #endif
668 #else
670 void
671 preinitForbidden()
673 return;
676 void
677 initForbidden()
679 return;
683 urlIsUncachable(char *url, int length)
685 return 0;
689 urlForbidden(AtomPtr url,
690 int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*),
691 void *closure)
693 handler(0, url, NULL, NULL, closure);
694 return 1;
697 #endif