fuck! don't perform ssl handshake for blocked hosts!
[mediator.git] / src / hp_re9.c
blob448258db45851622a95e2c9c7d458d2a89777c93
1 ////////////////////////////////////////////////////////////////////////////////
2 static inline void trim_str (char *str) {
3 while (str[0] && isspace(str[0])) memmove(str, str+1, strlen(str)+1);
4 while (str[0] && isspace(str[strlen(str)-1])) str[strlen(str)-1] = 0;
8 ////////////////////////////////////////////////////////////////////////////////
9 static inline int isGoodHostChar (char ch) {
10 if (ch >= '0' && ch <= '9') return 1;
11 if (ch >= 'a' && ch <= 'z') return 1;
12 if (ch >= 'A' && ch <= 'Z') return 1;
13 if (ch == '-') return 1;
14 return 0;
18 static int isGoodHMPattern (const char *hm) {
19 if (hm != NULL) {
20 if (hm[0] == ':') return 0;
21 if (hm[0] == '*') {
22 while (*hm && *hm == '*') ++hm;
23 if (hm[0] != '.') return 0;
24 //while (*hm && *hm == '.') ++hm;
26 while (*hm) {
27 //if (hm[0] == '*' || hm[0] == '?') return 0;
28 if (hm[0] == ':') return 1;
29 if (hm[0] == '.') {
30 if (!isGoodHostChar(hm[1])) return 0;
31 } else {
32 if (!isGoodHostChar(hm[0])) return 0;
34 ++hm;
36 return 1;
38 return 0;
43 static int isHM (const char *pat, const char *host) {
44 if (host != NULL && pat != NULL && host[0] && pat[0]) {
45 int anySD = 0; // 1:can have any subdomain (or none at all), -1: MUST have subdomain
46 if (pat[0] == '*') {
47 anySD = 1;
48 while (pat[0] && pat[0] == '*') ++pat;
49 if (pat[0] != '.') return 0;
50 while (pat[0] && pat[0] == '.') ++pat;
51 if (!pat[0]) return 0;
52 } else if (pat[0] == '.') {
53 anySD = -1;
54 while (pat[0] && pat[0] == '.') ++pat;
55 if (!pat[0]) return 0;
57 int plen = strlen(pat);
58 int hlen = strlen(host);
59 if (plen > hlen) return 0; // pattern too long
60 if (!anySD && plen != hlen) return 0; // can't match subdomains
61 if (strcasecmp(pat, host+hlen-plen) != 0) return 0; // alas
62 if (anySD) {
63 if (anySD < 0 && hlen <= plen) return 0; // we need at least one subdomain
64 // is it really subdomain?
65 if (hlen > plen && host[hlen-plen-1] != '.') return 0;
67 return 1;
69 return 0;
74 static int isHM (const char *pat, const char *host) {
75 int res = isHMEx(pat, host);
76 ptlogf("isHM: [%s] : [%s] : %d\n", pat, host, res);
77 return res;
82 ////////////////////////////////////////////////////////////////////////////////
83 typedef struct {
84 http_method mt;
85 int port; // <1: any
86 char *litstr;
87 re9_prog_t *re;
88 char *newhost; // for host rewriting
89 } hostpattern_t;
92 typedef struct {
93 hostpattern_t *hp;
94 int used;
95 int alloted;
96 } hostlist_t;
99 static hostlist_t host_blocks;
100 static hostlist_t host_allowrefs;
101 static hostlist_t host_blockuris;
102 static hostlist_t host_iframeuris;
103 static hostlist_t host_ipv4;
104 static hostlist_t host_ipv6;
105 static hostlist_t host_force_https;
108 static __attribute__((constructor)) void hp_ctor_ (void) {
109 memset(&host_blocks, 0, sizeof(host_blocks));
110 memset(&host_allowrefs, 0, sizeof(host_allowrefs));
111 memset(&host_blockuris, 0, sizeof(host_blockuris));
112 memset(&host_iframeuris, 0, sizeof(host_iframeuris));
113 memset(&host_ipv4, 0, sizeof(host_ipv4));
114 memset(&host_ipv6, 0, sizeof(host_ipv6));
115 memset(&host_force_https, 0, sizeof(host_force_https));
119 static void hp_clear (hostlist_t *lst) {
120 if (lst != NULL) {
121 for (int f = 0; f < lst->used; ++f) {
122 if (lst->hp[f].newhost != NULL) free(lst->hp[f].newhost);
123 if (lst->hp[f].litstr != NULL) free(lst->hp[f].litstr);
124 if (lst->hp[f].re != NULL) re9_free(lst->hp[f].re);
126 if (lst->hp != NULL) free(lst->hp);
127 lst->hp = NULL;
128 lst->used = lst->alloted = 0;
133 static __attribute__((unused)) void hp_dump (const hostlist_t *lst) {
135 if (lst != NULL) {
136 for (int f = 0; f < lst->used; ++f) {
137 if (lst->hp[f].litstr) {
138 ptlogf("%4d: [%s]\n", f, lst->hp[f].litstr);
139 } else {
140 ptlogf("%4d: regexp!\n", f);
148 static __attribute__((destructor)) void hp_dtor_ (void) {
149 hp_clear(&host_blocks);
150 hp_clear(&host_allowrefs);
151 hp_clear(&host_blockuris);
152 hp_clear(&host_iframeuris);
153 hp_clear(&host_ipv4);
154 hp_clear(&host_ipv6);
155 hp_clear(&host_force_https);
159 static inline void hp_grow (hostlist_t *lst) {
160 if (lst->used+1 > lst->alloted) {
161 int newsz = ((lst->used+1)|0x7f)+1;
162 lst->hp = realloc(lst->hp, sizeof(lst->hp[0])*newsz); //FIXME: check for errors
163 for (int f = lst->used; f < newsz; ++f) memset(&lst->hp[f], 0, sizeof(lst->hp[f]));
164 lst->alloted = newsz;
166 //ptlogf("hp_add: lst=%p (%p); idx=%d of %d; mt=%d; host=[%s]; port=%d (%p)\n", lst, lst->hp, lst->used, lst->alloted, mt, pat_copy, port, pat_copy);
168 lst->hp[lst->used].mt = mt;
169 lst->hp[lst->used].re = pat_copy;
170 lst->hp[lst->used].port = port;
171 ++lst->used;
176 // starts with 'P:', 'G:', 'C:' -- methods
177 //TODO: add all non-star matches before star matches?
178 static void hp_add (hostlist_t *lst, const char *patstr) {
179 if (patstr != NULL && patstr[0]) {
180 const char *errmsg;
181 char *col, *rephost = NULL;
182 char *pat = alloca(strlen(patstr)+1);
183 http_method mt = MT_ANY;
184 int port = 0;
185 int allowHM = 1;
187 strcpy(pat, patstr);
188 if (pat[1] == ':') {
189 switch (toupper(pat[0])) {
190 case 'P': mt = MT_POST; pat += 2; break;
191 case 'G': mt = MT_GET; pat += 2; break;
192 case 'C': mt = MT_CONNECT; pat += 2; break;
193 case 'A': mt = MT_ANY; pat += 2; break;
196 // process replacement host
197 // host => newhost
198 if ((col = strstr(pat, "=>")) != NULL) {
199 char *cc = col;
200 while (cc > pat && cc[-1] == '=') --cc;
201 while (cc > pat && isspace(cc[-1])) --cc;
202 *cc = 0; // cut it out
203 col += 2;
204 while (*col && isspace(*col)) ++col;
205 while (*col && isspace(col[strlen(col)-1])) col[strlen(col)-1] = 0;
206 if (*col && cc > pat) rephost = col;
207 allowHM = 0;
209 // process port
210 if (pat[1] && (col = strrchr(pat+1, ':')) != NULL) {
211 ++col;
212 port = atoi(col); //FIXME!
213 if (port < 1 || port > 65535) {
214 fprintf(stderr, "WARNING: ignored pattern with invalid port '%s'\n", pat);
215 return;
217 --col;
218 } else {
219 col = pat+strlen(pat);
221 // now add properly escaped host pattern
222 //if (pat[0] == '*') while (*pat && pat[1] == '*') ++pat;
223 if (pat[0] == ':') {
224 // regular expression
225 char *pt;
226 if (*(++pat) == 0) return;
227 pt = strdup(pat);
228 if (col[0]) strrchr(pt, ':')[0] = 0; // simple hack
229 hp_grow(lst);
230 lst->hp[lst->used].mt = mt;
231 lst->hp[lst->used].port = port;
232 lst->hp[lst->used].litstr = NULL;
233 lst->hp[lst->used].re = re9_compile(pt, RE9_FLAG_NONUTF8, &errmsg);
234 free(pt);
235 if (lst->hp[lst->used].re == NULL) {
236 _ptlog(NULL, "FATAL: invalid regexp '%s': %s\n", pat, errmsg);
237 } else {
238 lst->hp[lst->used].newhost = (rephost != NULL ? strdup(rephost) : NULL);
239 ++lst->used;
241 } else if (allowHM && isGoodHMPattern(pat)) {
242 // simple HM
243 int insbef = 0;
244 for (int f = 0; f < lst->used; ++f) if (lst->hp[f].litstr == NULL) { insbef = f; break; }
245 hp_grow(lst);
246 if (insbef < lst->used) for (int f = lst->used; f > insbef; --f) lst->hp[f] = lst->hp[f-1];
247 ++lst->used;
248 lst->hp[insbef].mt = mt;
249 lst->hp[insbef].port = port;
250 lst->hp[insbef].litstr = strdup(pat);
251 lst->hp[insbef].re = NULL;
252 lst->hp[insbef].newhost = (rephost != NULL ? strdup(rephost) : NULL);
253 if (col[0]) strchr(lst->hp[insbef].litstr, ':')[0] = 0; // simple hack
254 //ptlogf("HM: [%s]\n", lst->hp[insbef].litstr);
255 } else {
256 // convert to regexp
257 xstrbuf_t ps;
258 xstrbuf_init(&ps);
259 xstrbuf_addc(&ps, '^');
260 if (pat[0] == '*' && pat[1] == '.') {
261 // special case
262 xstrbuf_addstr(&ps, "(?:.*\\.)?");
263 pat += 2;
264 } else if (pat[0] == '.') {
265 // another special case
266 xstrbuf_addstr(&ps, ".*\\.");
267 pat += 1;
269 // now copy pattern, escaping chars and adding [^.] before '*'
270 while (pat < col) {
271 switch (*pat) {
272 case '.':
273 case '+': case '?':
274 case '[': case ']':
275 case '(': case ')':
276 case '|': case '\\':
277 case '^': case '$':
278 xstrbuf_addc(&ps, '\\');
279 xstrbuf_addc(&ps, *pat);
280 break;
281 case '*':
282 while (pat < col && *pat == '*') ++pat;
283 --pat;
284 xstrbuf_addstr(&ps, ".*"); // maybe use "[^.]" instead of "."?
285 break;
286 default:
287 xstrbuf_addc(&ps, *pat);
288 break;
290 ++pat;
292 xstrbuf_addc(&ps, '$');
293 hp_grow(lst);
294 lst->hp[lst->used].mt = mt;
295 lst->hp[lst->used].port = port;
296 lst->hp[lst->used].litstr = NULL;
297 lst->hp[lst->used].re = re9_compile(ps.data, RE9_FLAG_NONUTF8, &errmsg);
298 if (lst->hp[lst->used].re == NULL) {
299 _ptlog(NULL, "FATAL: invalid regexp '%s': %s\n", ps.data, errmsg);
300 } else {
301 lst->hp[lst->used].newhost = (rephost != NULL ? strdup(rephost) : NULL);
302 ++lst->used;
304 xstrbuf_free(&ps);
310 // NOT PTHREAD-SAFE!
311 static int hp_in_list (const hostlist_t *lst, http_method mt, const char *host, int port) {
312 for (int f = 0; f < lst->used; ++f) {
313 //fprintf(stderr, "checking: mt=%d; host=%s; port=%d : mt=%d; host=%s; port=%d\n", mt, host, port, lst->hp[f].mt, lst->hp[f].re, lst->hp[f].port);
314 if (lst->hp[f].mt != MT_ANY && lst->hp[f].mt != mt) continue;
315 if (lst->hp[f].port != 0 && lst->hp[f].port != port) continue;
316 if (lst->hp[f].litstr != NULL) {
317 if (isHM(lst->hp[f].litstr, host)) return 1;
318 } else {
319 if (re9_execute(lst->hp[f].re, RE9_FLAG_NONUTF8, host, NULL, 0)) return 1;
321 //fprintf(stderr, " HIT!\n");
323 //fprintf(stderr, " NO HIT!\n");
324 return 0;
328 // NOT PTHREAD-SAFE!
329 static const char *hp_in_list_newhost (const hostlist_t *lst, http_method mt, const char *host, int port) {
330 for (int hpnum = 0; hpnum < lst->used; ++hpnum) {
331 //fprintf(stderr, "checking: mt=%d; host=%s; port=%d : mt=%d; host=%s; port=%d\n", mt, host, port, lst->hp[hpnum].mt, lst->hp[hpnum].re, lst->hp[hpnum].port);
332 if (lst->hp[hpnum].mt != MT_ANY && lst->hp[hpnum].mt != mt) continue;
333 if (lst->hp[hpnum].port != 0 && lst->hp[hpnum].port != port) continue;
334 if (lst->hp[hpnum].litstr != NULL) {
335 if (isHM(lst->hp[hpnum].litstr, host)) return (lst->hp[hpnum].newhost != NULL ? lst->hp[hpnum].newhost : host);
336 } else {
337 static re9_sub_t mt[16];
338 if (re9_execute(lst->hp[hpnum].re, RE9_FLAG_NONUTF8, host, mt, 16)) {
339 static char rep[1024];
340 if (lst->hp[hpnum].newhost == NULL) return host;
341 re9_subst(rep, sizeof(rep), lst->hp[hpnum].newhost, mt, re9_nsub(lst->hp[hpnum].re));
342 return rep;
345 //fprintf(stderr, " HIT!\n");
347 //fprintf(stderr, " NO HIT!\n");
348 return NULL;
352 static void hp_load (hostlist_t *lst, const char *fname) {
353 FILE *fl = fopen(fname, "r");
354 hp_clear(lst);
355 if (fl != NULL) {
356 char *str = malloc(8192);
357 //ptlogf("FILE: [%s]\n", fname);
358 while (fgets(str, 8191, fl) != NULL) {
359 trim_str(str);
360 if (!str[0] || str[0] == '#') continue; // skip comments and empty lines
361 hp_add(lst, str);
363 free(str);
364 fclose(fl);
369 static void hp_add_re (hostlist_t *lst, const char *re) {
370 const char *errmsg;
371 http_method mt = MT_ANY;
372 if (re[1] == ':') {
373 switch (toupper(re[0])) {
374 case 'P': mt = MT_POST; re += 2; break;
375 case 'G': mt = MT_GET; re += 2; break;
376 case 'C': mt = MT_CONNECT; re += 2; break;
377 case 'A': mt = MT_ANY; re += 2; break;
380 hp_grow(lst);
381 lst->hp[lst->used].mt = mt;
382 lst->hp[lst->used].port = 0;
383 lst->hp[lst->used].litstr = NULL;
384 lst->hp[lst->used].re = re9_compile(re, RE9_FLAG_NONUTF8, &errmsg);
385 if (lst->hp[lst->used].re == NULL) {
386 _ptlog(NULL, "FATAL: invalid regexp '%s': %s\n", re, errmsg);
387 } else {
388 ++lst->used;
393 static void hp_load_re (hostlist_t *lst, const char *fname) {
394 FILE *fl = fopen(fname, "r");
395 hp_clear(lst);
396 if (fl != NULL) {
397 char *str = malloc(8192);
398 //ptlogf("FILE: [%s]\n", fname);
399 while (fgets(str, 8191, fl) != NULL) {
400 trim_str(str);
401 if (!str[0] || str[0] == '#') continue; // skip comments and empty lines
402 hp_add_re(lst, str);
404 free(str);
405 fclose(fl);
410 static int hp_check_re_uri (hostlist_t *lst, http_method mt, const char *proto, const char *hostname, int port, const char *path) {
411 static char *uri = NULL, pstr[16];
412 static int uri_size = 0;
413 int len;
414 if (path == NULL || !path[0]) path = "/";
415 if ((port == 80 && strcmp(proto, "http") == 0) || (port == 443 && strcmp(proto, "https") == 0)) {
416 pstr[0] = 0;
417 } else {
418 snprintf(pstr, sizeof(pstr), ":%d", port);
420 len = strlen(proto)+strlen(hostname)+strlen(pstr)+strlen(path)+8;
421 if (len > uri_size) {
422 uri = realloc(uri, len); //FIXME: check for errors
423 uri_size = len;
425 //_ptlog(NULL, "\3checking uri: proto=[%s]; hostname=[%s]; pstr=[%s]; path=[%s]\n", proto, hostname, pstr, path);
426 sprintf(uri, "%s://%s%s%s", proto, hostname, pstr, path);
427 //_ptlog(NULL, "\3checking uri: [%s]\n", uri);
428 for (int hpnum = 0; hpnum < lst->used; ++hpnum) {
429 if (lst->hp[hpnum].mt != MT_ANY && lst->hp[hpnum].mt != mt) continue;
430 if (lst->hp[hpnum].litstr != NULL) {
431 if (isHM(lst->hp[hpnum].litstr, uri)) return 1;
432 } else {
433 if (re9_execute(lst->hp[hpnum].re, RE9_FLAG_NONUTF8, uri, NULL, 0)) return 1;
436 return 0;
440 static inline int is_blocked_uri (http_method mt, const char *proto, const char *hostname, int port, const char *path) {
441 return hp_check_re_uri(&host_blockuris, mt, proto, hostname, port, path);
445 static inline int is_iframe_uri (http_method mt, const char *proto, const char *hostname, int port, const char *path) {
446 return hp_check_re_uri(&host_iframeuris, mt, proto, hostname, port, path);
450 static inline int is_v4_host (http_method mt, const char *hostname, int port) {
451 return hp_in_list(&host_ipv4, mt, hostname, port);
455 static inline int is_v6_host (http_method mt, const char *hostname, int port) {
456 return hp_in_list(&host_ipv6, mt, hostname, port);
460 ////////////////////////////////////////////////////////////////////////////////
461 typedef struct {
462 char *repl;
463 http_method mt;
464 re9_prog_t *re;
465 int re_flags;
466 } repattern_t;
469 typedef struct {
470 repattern_t *pat;
471 int used;
472 int alloted;
473 } relist_t;
476 static relist_t re_uareplace;
479 static __attribute__((constructor)) void relist_ctor_ (void) {
480 memset(&re_uareplace, 0, sizeof(re_uareplace));
484 static void relist_clear (relist_t *lst) {
485 if (lst != NULL) {
486 for (int f = 0; f < lst->used; ++f) {
487 if (lst->pat[f].repl != NULL) free(lst->pat[f].repl);
488 if (lst->pat[f].re != NULL) re9_free(lst->pat[f].re);
490 if (lst->pat != NULL) free(lst->pat);
491 lst->pat = NULL;
492 lst->used = lst->alloted = 0;
497 static __attribute__((destructor)) void relist_dtor_ (void) {
498 relist_clear(&re_uareplace);
504 static inline void relist_grow (relist_t *lst) {
505 if (lst->used+1 > lst->alloted) {
506 int newsz = ((lst->used+1)|0x7f)+1;
507 lst->pat = realloc(lst->pat, sizeof(lst->pat[0])*newsz); //FIXME: check for errors
508 lst->alloted = newsz;
513 // if `pat` starts with 'P:', 'G:', 'C:' -- methods
514 static void relist_add (relist_t *lst, const char *pat, const char *repl) {
515 if (pat != NULL && pat[0]) {
516 const char *errmsg;
517 http_method mt = MT_ANY;
518 re9_prog_t *re;
519 int flags = RE9_FLAG_NONUTF8;
521 if (pat[1] == ':') {
522 switch (toupper(pat[0])) {
523 case 'P': mt = MT_POST; pat += 2; break;
524 case 'G': mt = MT_GET; pat += 2; break;
525 case 'C': mt = MT_CONNECT; pat += 2; break;
526 case 'A': mt = MT_ANY; pat += 2; break;
529 if (pat[0] == '/' && strchr(pat+1, '/') != NULL) {
530 /* regexp options */
531 for (++pat; *pat != '/'; ++pat) {
532 switch (*pat) {
533 case 'i': flags |= RE9_FLAG_CASEINSENS; break;
534 case 'I': flags &= ~RE9_FLAG_CASEINSENS; break;
535 case 'u': flags &= ~RE9_FLAG_NONUTF8; break;
536 case 'U': flags |= RE9_FLAG_NONUTF8; break;
537 case 'l': flags |= RE9_FLAG_LITERAL; break;
538 case 'L': flags &= ~RE9_FLAG_LITERAL; break;
541 ++pat;
543 if (!pat[0]) {
544 _ptlog(NULL, "FATAL: empty pattern!\n");
545 return;
547 if ((re = re9_compile(pat, flags, &errmsg)) == NULL) {
548 _ptlog(NULL, "FATAL: invalid regexp '%s': %s\n", pat, errmsg);
549 return;
551 //_ptlog(NULL, "RELIST: flags=0x%04x; re=[%s]; repl=[%s]\n", flags, pat, repl);
552 relist_grow(lst);
553 lst->pat[lst->used].mt = mt;
554 lst->pat[lst->used].repl = (repl != NULL ? strdup(repl) : NULL);
555 lst->pat[lst->used].re = re;
556 lst->pat[lst->used].re_flags = flags;
557 ++lst->used;
562 // NOT PTHREAD-SAFE!
563 static repattern_t *relist_find (relist_t *lst, http_method mt, const char *proto, const char *hostname, int port, const char *path) {
564 static char *uri = NULL, pstr[16];
565 static int uri_size = 0;
566 int len;
567 if (path == NULL || !path[0]) path = "/";
568 if ((port == 80 && strcmp(proto, "http") == 0) || (port == 443 && strcmp(proto, "https") == 0)) {
569 pstr[0] = 0;
570 } else {
571 snprintf(pstr, sizeof(pstr), ":%d", port);
573 len = strlen(proto)+strlen(hostname)+strlen(pstr)+strlen(path)+8;
574 if (len > uri_size) {
575 uri = realloc(uri, len); //FIXME: check for errors
576 uri_size = len;
578 sprintf(uri, "%s://%s%s%s", proto, hostname, pstr, path);
579 //_ptlog(NULL, "RELIST: checking uri: [%s]\n", uri);
580 for (int f = 0; f < lst->used; ++f) {
581 if (lst->pat[f].mt != MT_ANY && lst->pat[f].mt != mt) continue;
582 if (re9_execute(lst->pat[f].re, lst->pat[f].re_flags, uri, NULL, 0)) {
583 //_ptlog(NULL, " RELIST HIT: flags=0x%04x; repl=[%s]\n", lst->pat[f].re_flags, lst->pat[f].repl);
584 return &lst->pat[f];
587 return NULL;
591 static void relist_load_ua (relist_t *lst, const char *fname) {
592 FILE *fl = fopen(fname, "r");
593 relist_clear(lst);
594 if (fl != NULL) {
595 char *str = malloc(8192);
596 char *last_ua = NULL;
597 //ptlogf("FILE: [%s]\n", fname);
598 while (fgets(str, 8191, fl) != NULL) {
599 trim_str(str);
600 if (str[0] == '#' || !str[0]) continue; // skip comments and empty lines
601 if (strncmp(str, "ua=", 3) == 0) {
602 /* new UA */
603 if (last_ua != NULL) free(last_ua);
604 last_ua = NULL;
605 if (!str[3]) continue;
606 last_ua = strdup(str+3);
607 continue;
609 /* new regexp */
610 if (last_ua != NULL) {
611 relist_add(lst, str, (strcmp(last_ua, "-") == 0 ? NULL : last_ua));
614 free(str);
615 if (last_ua != NULL) free(last_ua);
616 fclose(fl);
621 ////////////////////////////////////////////////////////////////////////////////
622 typedef struct {
623 char *repl;
624 int use_location;
625 int dont_stop; // process all list again?
626 re9_prog_t *re;
627 int re_flags;
628 } rewrite_t;
631 typedef struct {
632 rewrite_t *pat;
633 int used;
634 int alloted;
635 } rewrite_list_t;
638 static rewrite_list_t re_rewrites;
641 static __attribute__((constructor)) void rewrite_list_ctor_ (void) {
642 memset(&re_rewrites, 0, sizeof(re_rewrites));
646 static void rewrite_list_clear (rewrite_list_t *lst) {
647 if (lst != NULL) {
648 for (int f = 0; f < lst->used; ++f) {
649 if (lst->pat[f].repl != NULL) free(lst->pat[f].repl);
650 if (lst->pat[f].re != NULL) re9_free(lst->pat[f].re);
652 if (lst->pat != NULL) free(lst->pat);
653 lst->pat = NULL;
654 lst->used = lst->alloted = 0;
659 static __attribute__((destructor)) void rewrite_list_dtor_ (void) {
660 rewrite_list_clear(&re_rewrites);
664 static inline void rewrite_list_grow (rewrite_list_t *lst) {
665 if (lst->used+1 > lst->alloted) {
666 int newsz = ((lst->used+1)|0x7f)+1;
667 lst->pat = realloc(lst->pat, sizeof(lst->pat[0])*newsz); //FIXME: check for errors
668 lst->alloted = newsz;
673 static void rewrite_list_add (rewrite_list_t *lst, const char *pat) {
674 if (pat != NULL && pat[0]) {
675 const char *errmsg;
676 re9_prog_t *re;
677 int flags = RE9_FLAG_NONUTF8;
678 char *pat_str;
679 const char *rep_str, *orig_pat = pat;
680 int use_loc = 0;
681 int dont_stop = 0;
683 if (pat[0] == '<') {
684 dont_stop = 1;
685 ++pat;
687 if (pat[0] == '/' && strchr(pat+1, '/') != NULL) {
688 /* regexp options */
689 for (++pat; *pat != '/'; ++pat) {
690 switch (*pat) {
691 case 'i': flags |= RE9_FLAG_CASEINSENS; break;
692 case 'I': flags &= ~RE9_FLAG_CASEINSENS; break;
693 case 'u': flags &= ~RE9_FLAG_NONUTF8; break;
694 case 'U': flags |= RE9_FLAG_NONUTF8; break;
695 case 'l': flags |= RE9_FLAG_LITERAL; break;
696 case 'L': flags &= ~RE9_FLAG_LITERAL; break;
699 ++pat;
701 if (!pat[0]) {
702 _ptlog(NULL, "FATAL: empty pattern!\n");
703 return;
705 /* find replacement string */
706 rep_str = strchr(pat, '>');
707 use_loc = 0;
708 while (rep_str != NULL) {
709 const char *p = rep_str-1;
710 use_loc = 0;
711 if (p >= pat && *p == ':') { use_loc = 1; --p; }
712 if (p >= pat && *p == '=') {
713 while (p >= pat && *p == '=') --p;
714 while (p > pat && isspace(p[-1])) --p;
715 ++rep_str;
716 while (*rep_str && isspace(*rep_str)) ++rep_str;
717 if (p > pat && *rep_str) {
718 /* found "=>"; p points to first space */
719 pat_str = calloc(1, p-pat+1);
720 memcpy(pat_str, pat, p-pat);
721 break;
723 } else {
724 ++rep_str;
726 rep_str = strchr(rep_str, '>');
728 if (rep_str == NULL) {
729 _ptlog(NULL, "FATAL: pattern without replacement: [%s]\n", orig_pat);
730 return;
732 if ((re = re9_compile(pat_str, flags, &errmsg)) == NULL) {
733 _ptlog(NULL, "FATAL: invalid regexp '%s': %s\n", pat_str, errmsg);
734 free(pat_str);
735 return;
737 free(pat_str);
738 rewrite_list_grow(lst);
739 lst->pat[lst->used].repl = strdup(rep_str);
740 lst->pat[lst->used].use_location = use_loc;
741 lst->pat[lst->used].dont_stop = dont_stop;
742 lst->pat[lst->used].re = re;
743 lst->pat[lst->used].re_flags = flags;
744 ++lst->used;
749 // NOT PTHREAD-SAFE!
750 static const char *rewrite_list_find (rewrite_list_t *lst, const char *proto, const char *hostname, int port, const char *path, int *ret_location, int *again) {
751 static char *uri = NULL, pstr[16];
752 static int uri_size = 0;
753 int len;
754 if (ret_location != NULL) *ret_location = 0;
755 if (again != NULL) *again = 0;
756 if (path == NULL || !path[0]) path = "/";
757 if ((port == 80 && strcmp(proto, "http") == 0) || (port == 443 && strcmp(proto, "https") == 0)) {
758 pstr[0] = 0;
759 } else {
760 snprintf(pstr, sizeof(pstr), ":%d", port);
762 len = strlen(proto)+strlen(hostname)+strlen(pstr)+strlen(path)+8;
763 if (len > uri_size) {
764 uri = realloc(uri, len); //FIXME: check for errors
765 uri_size = len;
767 sprintf(uri, "%s://%s%s%s", proto, hostname, pstr, path);
768 //_ptlog(NULL, "RELIST: checking uri: [%s]\n", uri);
769 for (int hpnum = 0; hpnum < lst->used; ++hpnum) {
770 static re9_sub_t mt[RE9_SUBEXP_MAX];
771 if (re9_execute(lst->pat[hpnum].re, lst->pat[hpnum].re_flags, uri, mt, RE9_SUBEXP_MAX)) {
772 static char rep[1024];
773 re9_subst(rep, sizeof(rep), lst->pat[hpnum].repl, mt, re9_nsub(lst->pat[hpnum].re));
774 if (ret_location != NULL) *ret_location = lst->pat[hpnum].use_location;
775 //_ptlog(NULL, "\x04REWRITE: [%s] ==> [%s]\n", uri, rep);
776 if (again != NULL) *again = lst->pat[hpnum].dont_stop;
777 return rep;
780 return NULL;
784 static void rewrite_list_load (rewrite_list_t *lst, const char *fname) {
785 FILE *fl = fopen(fname, "r");
786 rewrite_list_clear(lst);
787 if (fl != NULL) {
788 char *str = malloc(8192);
789 while (fgets(str, 8191, fl) != NULL) {
790 trim_str(str);
791 if (str[0] == '#' || !str[0]) continue; // skip comments and empty lines
792 rewrite_list_add(lst, str);
794 free(str);
795 fclose(fl);