some tweaks
[mkp224o.git] / filters_main.inc.h
blobf1bd7edee98f9bd68e8196401cd940c7da6f008c
2 #include "filters_common.inc.h"
4 #ifdef INTFILTER
6 # ifdef OMITMASK
8 static inline int filter_compare(const void *p1,const void *p2)
10 if (((const struct intfilter *)p1)->f < ((const struct intfilter *)p2)->f)
11 return -1;
12 if (((const struct intfilter *)p1)->f > ((const struct intfilter *)p2)->f)
13 return 1;
14 return 0;
17 # ifdef EXPANDMASK
20 * for mask expansion, we need to figure out how much bits
21 * we need to fill in with different values.
22 * while in big endian machines this is quite easy,
23 * representation we use for little endian ones may
24 * leave gap of bits we don't want to touch.
26 * initial idea draft:
28 * raw representation -- FF.FF.F0.00
29 * big endian -- 0xFFFFF000
30 * little endian -- 0x00F0FFFF
31 * b: 0xFFffF000 ^ 0xFFff0000 -> 0x0000F000
32 * 0x0000F000 + 1 -> 0x0000F001
33 * 0x0000F000 & 0x0000F001 -> 0x0000F000 <- shifted mask
34 * 0x0000F000 ^ 0x0000F000 -> 0x00000000 <- direct mask
35 * 0x0000F000 ^ 0x00000000 -> 0x0000F000 <- shifted mask
36 * l: 0x00f0FFff ^ 0x0000FFff -> 0x00f00000
37 * 0x00f00000 + 1 -> 0x00f00001
38 * 0x00f00000 & 0x00f00001 -> 0x00f00000 <- shifted mask
39 * 0x00f00000 ^ 0x00f00000 -> 0x00000000 <- direct mask
40 * 0x00f00000 ^ 0x00000000 -> 0x00f00000 <- shifted mask
42 * b: 0xFFffFFff ^ 0xF0000000 -> 0x0FffFFff
43 * 0x0FffFFff + 1 -> 0x10000000
44 * 0x0FffFFff & 0x10000000 -> 0x00000000 <- shifted mask
45 * 0x0FffFFff ^ 0x00000000 -> 0x0FffFFff <- direct mask
46 * 0x0FffFFff ^ 0x0FffFFff -> 0x00000000 <- shifted mask
47 * l: 0xFFffFFff ^ 0x000000f0 -> 0xFFffFF0f
48 * 0xFFffFF0f + 1 -> 0xFFffFF10
49 * 0xFFffFF0f & 0xFFffFF10 -> 0xFFffFF00 <- shifted mask
50 * 0xFFffFF0f ^ 0xFFffFF00 -> 0x0000000f <- direct mask
51 * 0xFFffFF0f ^ 0x0000000f -> 0xFFffFF00 <- shifted mask
53 * essentially, we have to make direct mask + shifted mask bits worth of information
54 * and then split it into 2 parts
55 * we do not need absolute shifted mask shifting value, just relative to direct mask
56 * 0x0sss00dd - shifted & direct mask combo
57 * 0x000sssdd - combined mask
58 * 8 - relshiftval
59 * generate values from 0x00000000 to 0x000sssdd
60 * for each value, realmask <- (val & 0x000000dd) | ((val & 0x000sss00) << relshiftval)
61 * or..
62 * realmask <- (val & 0x000000dd) | ((val << relshiftval) & 0x0sss0000)
63 * ...
65 * above method doesn't work in some cases. better way:
67 * l: 0x80ffFFff ^ 0x00f0FFff -> 0x800f0000
68 * 0x800f0000 >> 16 -> 0x0000800f
69 * 0x0000800f + 1 -> 0x00008010
70 * 0x0000800f & 0x00008010 -> 0x00008000 <- smask
71 * 0x0000800f ^ 0x00008000 -> 0x0000000f <- dmask
73 * cross <- difference between mask we desire and mask we currently have
74 * shift cross to left variable ammount of times to eliminate zeros
75 * save shift ammount as ishift (initial shift)
76 * then, we eliminate first area of ones; if there was no gap, result is already all zeros
77 * save this thing as smask. it's only higher bits.
78 * XOR smask and cross; result is only lower bits.
79 * shift smask to left variable ammount of times until gap is eliminated.
80 * save resulting mask as cmask;
81 * save resulting shift value as rshift.
84 int flattened = 0;
86 #define EXPVAL(init,j,dmask,smask,ishift,rshift) \
87 ((init) | ((((j) & (dmask)) | (((j) << (rshift)) & (smask))) << (ishift)))
88 // add expanded set of values
89 // allocates space on its own
90 static void ifilter_addexpanded(
91 struct intfilter *ifltr,
92 IFT dmask,IFT smask,IFT cmask,
93 int ishift,int rshift)
95 flattened = 1;
96 size_t i = VEC_LENGTH(filters);
97 VEC_ADDN(filters,cmask + 1);
98 for (size_t j = 0;;++j) {
99 VEC_BUF(filters,i + j).f =
100 EXPVAL(ifltr->f,j,dmask,smask,ishift,rshift);
101 if (j == cmask)
102 break;
106 // expand existing stuff
107 // allocates needed stuff on its own
108 static void ifilter_expand(IFT dmask,IFT smask,IFT cmask,int ishift,int rshift)
110 flattened = 1;
111 size_t len = VEC_LENGTH(filters);
112 VEC_ADDN(filters,cmask * len);
113 size_t esz = cmask + 1; // size of expanded elements
114 for (size_t i = len - 1;;--i) {
115 for (IFT j = 0;;++j) {
116 VEC_BUF(filters,i * esz + j).f =
117 EXPVAL(VEC_BUF(filters,i).f,j,dmask,smask,ishift,rshift);
118 if (j == cmask)
119 break;
121 if (i == 0)
122 break;
126 static inline void ifilter_addflatten(struct intfilter *ifltr,IFT mask)
128 if (VEC_LENGTH(filters) == 0) {
129 // simple
130 VEC_ADD(filters,*ifltr);
131 ifiltermask = mask;
132 return;
134 if (ifiltermask == mask) {
135 // lucky
136 VEC_ADD(filters,*ifltr);
137 return;
139 IFT cross = ifiltermask ^ mask;
140 int ishift = 0;
141 while ((cross & 1) == 0) {
142 ++ishift;
143 cross >>= 1;
145 IFT smask = cross & (cross + 1); // shift mask
146 IFT dmask = cross ^ smask; // direct mask
147 IFT cmask; // combined mask
148 int rshift = 0; // relative shift
149 while (cmask = (smask >> rshift) | dmask,(cmask & (cmask + 1)) != 0)
150 ++rshift;
151 // preparations done
152 if (ifiltermask > mask) {
153 // already existing stuff has more precise mask than we
154 // so we need to expand our stuff
155 ifilter_addexpanded(ifltr,dmask,smask,cmask,ishift,rshift);
157 else {
158 ifiltermask = mask;
159 ifilter_expand(dmask,smask,cmask,ishift,rshift);
160 VEC_ADD(filters,*ifltr);
164 # endif // EXPANDMASK
166 # else // OMITMASK
169 * struct intfilter layout: filter,mask
170 * stuff is compared in big-endian way, so memcmp
171 * filter needs to be compared first
172 * if its equal, mask needs to be compared
173 * memcmp is aplicable there too
174 * due to struct intfilter layout, it all can be stuffed into one memcmp call
176 static inline int filter_compare(const void *p1,const void *p2)
178 return memcmp(p1,p2,sizeof(struct intfilter));
181 # endif // OMITMASK
183 static void filter_sort(void)
185 size_t len = VEC_LENGTH(filters);
186 if (len > 0)
187 qsort(&VEC_BUF(filters,0),len,sizeof(struct intfilter),&filter_compare);
190 #endif // INTFILTER
192 #ifdef BINFILTER
194 static inline int filter_compare(const void *p1,const void *p2)
196 const struct binfilter *b1 = (const struct binfilter *)p1;
197 const struct binfilter *b2 = (const struct binfilter *)p2;
199 size_t l = b1->len <= b2->len ? b1->len : b2->len;
201 int cmp = memcmp(b1->f,b2->f,l);
202 if (cmp != 0)
203 return cmp;
205 if (b1->len < b2->len)
206 return -1;
207 if (b1->len > b2->len)
208 return +1;
210 u8 cmask = b1->mask & b2->mask;
211 if ((b1->f[l] & cmask) < (b2->f[l] & cmask))
212 return -1;
213 if ((b1->f[l] & cmask) > (b2->f[l] & cmask))
214 return +1;
216 if (b1->mask < b2->mask)
217 return -1;
218 if (b1->mask > b2->mask)
219 return +1;
221 return 0;
224 static void filter_sort(void)
226 size_t len = VEC_LENGTH(filters);
227 if (len > 0)
228 qsort(&VEC_BUF(filters,0),len,sizeof(struct binfilter),&filter_compare);
231 #endif // BINFILTER
235 #ifndef PCRE2FILTER
236 static inline int filters_a_includes_b(size_t a,size_t b)
238 # ifdef INTFILTER
239 # ifdef OMITMASK
240 return VEC_BUF(filters,a).f == VEC_BUF(filters,b).f;
241 # else // OMITMASK
242 return VEC_BUF(filters,a).f == (VEC_BUF(filters,b).f & VEC_BUF(filters,a).m);
243 # endif // OMITMASK
244 # else // INTFILTER
245 const struct binfilter *fa = &VEC_BUF(filters,a);
246 const struct binfilter *fb = &VEC_BUF(filters,b);
248 if (fa->len > fb->len)
249 return 0;
250 size_t l = fa->len;
252 int cmp = memcmp(fa->f,fb->f,l);
253 if (cmp != 0)
254 return 0;
256 if (fa->len < fb->len)
257 return 1;
259 if (fa->mask > fb->mask)
260 return 0;
262 return fa->f[l] == (fb->f[l] & fa->mask);
263 # endif // INTFILTER
266 static void filters_dedup(void)
268 size_t last = ~(size_t)0; // index after last matching element
269 size_t chk; // element to compare against
270 size_t st; // start of area to destroy
272 size_t len = VEC_LENGTH(filters);
273 for (size_t i = 1;i < len;++i) {
274 if (last != i) {
275 if (filters_a_includes_b(i - 1,i)) {
276 if (last != ~(size_t)0) {
277 memmove(&VEC_BUF(filters,st),
278 &VEC_BUF(filters,last),
279 (i - last) * VEC_ELSIZE(filters));
280 st += i - last;
282 else
283 st = i;
284 chk = i - 1;
285 last = i + 1;
288 else {
289 if (filters_a_includes_b(chk,i))
290 last = i + 1;
293 if (last != ~(size_t)0) {
294 memmove(&VEC_BUF(filters,st),
295 &VEC_BUF(filters,last),
296 (len - last) * VEC_ELSIZE(filters));
297 st += len - last;
298 VEC_SETLENGTH(filters,st);
301 #endif // !PCRE2FILTER
303 static void filters_clean(void)
305 #ifdef PCRE2FILTER
306 for (size_t i = 0;i < VEC_LENGTH(filters);++i) {
307 pcre2_code_free(VEC_BUF(filters,i).re);
308 free(VEC_BUF(filters,i).str);
310 #endif
311 VEC_FREE(filters);
314 size_t filters_count(void)
316 return VEC_LENGTH(filters);
320 static void filters_print(void)
322 if (quietflag)
323 return;
324 size_t i,l;
325 l = VEC_LENGTH(filters);
326 if (l)
327 fprintf(stderr,"filters:\n");
329 for (i = 0;i < l;++i) {
330 #ifdef NEEDBINFILTER
331 char buf0[256],buf1[256];
332 u8 bufx[128];
333 #endif
335 if (!verboseflag && i >= 20) {
336 size_t notshown = l - i;
337 fprintf(stderr,"[another " FSZ " %s not shown]\n",
338 notshown,notshown == 1 ? "filter" : "filters");
339 break;
342 #ifdef INTFILTER
343 size_t len = 0;
344 u8 *imraw;
346 # ifndef OMITMASK
347 imraw = (u8 *)&VEC_BUF(filters,i).m;
348 # else
349 imraw = (u8 *)&ifiltermask;
350 # endif
351 while (len < sizeof(IFT) && imraw[len] != 0x00) ++len;
352 u8 mask = imraw[len-1];
353 u8 *ifraw = (u8 *)&VEC_BUF(filters,i).f;
354 #endif // INTFILTER
356 #ifdef BINFILTER
357 size_t len = VEC_BUF(filters,i).len + 1;
358 u8 mask = VEC_BUF(filters,i).mask;
359 u8 *ifraw = VEC_BUF(filters,i).f;
360 #endif // BINFILTER
361 #ifdef NEEDBINFILTER
362 base32_to(buf0,ifraw,len);
363 memcpy(bufx,ifraw,len);
364 bufx[len - 1] |= ~mask;
365 base32_to(buf1,bufx,len);
366 char *a = buf0,*b = buf1;
367 while (*a && *a == *b)
368 ++a, ++b;
369 *a = 0;
370 fprintf(stderr,"\t%s\n",buf0);
371 #endif // NEEDBINFILTER
372 #ifdef PCRE2FILTER
373 fprintf(stderr,"\t%s\n",VEC_BUF(filters,i).str);
374 #endif // PCRE2FILTER
376 fprintf(stderr,"in total, " FSZ " %s\n",l,l == 1 ? "filter" : "filters");
379 void filters_add(const char *filter)
381 #ifdef NEEDBINFILTER
382 struct binfilter bf;
383 size_t ret;
384 # ifdef INTFILTER
385 union intconv {
386 IFT i;
387 u8 b[sizeof(IFT)];
388 } fc,mc;
389 # endif
391 // skip regex start symbol. we do not support regex tho
392 if (*filter == '^')
393 ++filter;
395 memset(&bf,0,sizeof(bf));
397 if (!base32_valid(filter,&ret)) {
398 fprintf(stderr,"filter \"%s\" is invalid\n",filter);
399 fprintf(stderr," ");
400 while (ret--)
401 fputc(' ',stderr);
402 fprintf(stderr,"^\n");
403 return;
406 ret = BASE32_FROM_LEN(ret);
407 if (!ret)
408 return;
409 # ifdef INTFILTER
410 size_t maxsz = sizeof(IFT);
411 # else
412 size_t maxsz = sizeof(bf.f);
413 # endif
414 if (ret > maxsz) {
415 fprintf(stderr,"filter \"%s\" is too long\n",filter);
416 fprintf(stderr," ");
417 maxsz = (maxsz * 8) / 5;
418 while (maxsz--)
419 fputc(' ',stderr);
420 fprintf(stderr,"^\n");
421 return;
423 base32_from(bf.f,&bf.mask,filter);
424 bf.len = ret - 1;
426 # ifdef INTFILTER
427 mc.i = 0;
428 for (size_t i = 0;i < bf.len;++i)
429 mc.b[i] = 0xFF;
430 mc.b[bf.len] = bf.mask;
431 memcpy(fc.b,bf.f,sizeof(fc.b));
432 fc.i &= mc.i;
434 struct intfilter ifltr = {
435 .f = fc.i,
436 # ifndef OMITMASK
437 .m = mc.i,
438 # endif
441 # ifdef OMITMASK
442 ifilter_addflatten(&ifltr,mc.i);
443 # else // OMITMASK
444 VEC_ADD(filters,ifltr);
445 # endif // OMITMASK
446 # endif // INTFILTER
448 # ifdef BINFILTER
449 VEC_ADD(filters,bf);
450 # endif // BINFILTER
451 #endif // NEEDBINFILTER
453 #ifdef PCRE2FILTER
454 int errornum;
455 PCRE2_SIZE erroroffset;
456 pcre2_code *re;
458 re = pcre2_compile((PCRE2_SPTR8)filter,PCRE2_ZERO_TERMINATED,
459 PCRE2_NO_UTF_CHECK | PCRE2_ANCHORED,&errornum,&erroroffset,0);
460 if (!re) {
461 PCRE2_UCHAR buffer[1024];
462 pcre2_get_error_message(errornum,buffer,sizeof(buffer));
463 fprintf(stderr,"PCRE2 compilation failed at offset " FSZ ": %s\n",
464 (size_t)erroroffset,buffer);
465 return;
468 // attempt to JIT. ignore error
469 (void) pcre2_jit_compile(re,PCRE2_JIT_COMPLETE);
471 struct pcre2filter f;
472 memset(&f,0,sizeof(f));
473 f.re = re;
474 size_t fl = strlen(filter) + 1;
475 f.str = (char *) malloc(fl);
476 if (!f.str)
477 abort();
478 memcpy(f.str,filter,fl);
479 VEC_ADD(filters,f);
480 #endif // PCRE2FILTER
483 static void filters_prepare(void)
485 #ifndef PCRE2FILTER
486 if (!quietflag)
487 fprintf(stderr,"sorting filters...");
488 filter_sort();
489 if (wantdedup) {
490 if (!quietflag)
491 fprintf(stderr," removing duplicates...");
492 filters_dedup();
494 if (!quietflag)
495 fprintf(stderr," done.\n");
496 #endif
499 static bool loadfilterfile(const char *fname)
501 char buf[128];
502 FILE *f = fopen(fname,"r");
503 if (!f) {
504 fprintf(stderr,"failed to load filter file \"%s\": %s\n",fname,strerror(errno));
505 return false;
507 while (fgets(buf,sizeof(buf),f)) {
508 for (char *p = buf;*p;++p) {
509 if (*p == '\n') {
510 *p = 0;
511 break;
514 if (*buf && *buf != '#' && memcmp(buf,"//",2) != 0)
515 filters_add(buf);
517 int fe = ferror(f);
518 fclose(f);
519 if (fe != 0) {
520 fprintf(stderr,"failure while reading filter file \"%s\": %s\n",fname,strerror(fe));
521 return false;
523 return true;