sd: remove 'ssd' driver support
[unleashed/tickless.git] / usr / src / lib / libast / common / misc / magic.c
blobf4b3037fc063669e28b9aadb6d32a5d08866ae68
1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
24 * Glenn Fowler
25 * AT&T Research
27 * library interface to file
29 * the sum of the hacks {s5,v10,planix} is _____ than the parts
32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n";
34 static const char lib[] = "libast:magic";
36 #include <ast.h>
37 #include <ctype.h>
38 #include <ccode.h>
39 #include <dt.h>
40 #include <modex.h>
41 #include <error.h>
42 #include <regex.h>
43 #include <swap.h>
45 #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m)
47 #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
49 #define MAXNEST 10 /* { ... } nesting limit */
50 #define MINITEM 4 /* magic buffer rounding */
52 typedef struct /* identifier dictionary entry */
54 const char name[16]; /* identifier name */
55 int value; /* identifier value */
56 Dtlink_t link; /* dictionary link */
57 } Info_t;
59 typedef struct Edit /* edit substitution */
61 struct Edit* next; /* next in list */
62 regex_t* from; /* from pattern */
63 } Edit_t;
65 struct Entry;
67 typedef struct /* loop info */
69 struct Entry* lab; /* call this function */
70 int start; /* start here */
71 int size; /* increment by this amount */
72 int count; /* dynamic loop count */
73 int offset; /* dynamic offset */
74 } Loop_t;
76 typedef struct Entry /* magic file entry */
78 struct Entry* next; /* next in list */
79 char* expr; /* offset expression */
80 union
82 unsigned long num;
83 char* str;
84 struct Entry* lab;
85 regex_t* sub;
86 Loop_t* loop;
87 } value; /* comparison value */
88 char* desc; /* file description */
89 char* mime; /* file mime type */
90 unsigned long offset; /* offset in bytes */
91 unsigned long mask; /* mask before compare */
92 char cont; /* continuation operation */
93 char type; /* datum type */
94 char op; /* comparison operation */
95 char nest; /* { or } nesting operation */
96 char swap; /* forced swap order */
97 } Entry_t;
99 #define CC_BIT 5
101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102 typedef unsigned short Cctype_t;
103 #else
104 typedef unsigned long Cctype_t;
105 #endif
107 #define CC_text 0x01
108 #define CC_control 0x02
109 #define CC_latin 0x04
110 #define CC_binary 0x08
111 #define CC_utf_8 0x10
113 #define CC_notext CC_text /* CC_text is flipped before checking */
115 #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text)
117 #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
119 #define ID_NONE 0
120 #define ID_ASM 1
121 #define ID_C 2
122 #define ID_COBOL 3
123 #define ID_COPYBOOK 4
124 #define ID_CPLUSPLUS 5
125 #define ID_FORTRAN 6
126 #define ID_HTML 7
127 #define ID_INCL1 8
128 #define ID_INCL2 9
129 #define ID_INCL3 10
130 #define ID_MAM1 11
131 #define ID_MAM2 12
132 #define ID_MAM3 13
133 #define ID_NOTEXT 14
134 #define ID_PL1 15
135 #define ID_YACC 16
137 #define ID_MAX ID_YACC
139 #define INFO_atime 1
140 #define INFO_blocks 2
141 #define INFO_ctime 3
142 #define INFO_fstype 4
143 #define INFO_gid 5
144 #define INFO_mode 6
145 #define INFO_mtime 7
146 #define INFO_name 8
147 #define INFO_nlink 9
148 #define INFO_size 10
149 #define INFO_uid 11
151 #define _MAGIC_PRIVATE_ \
152 Magicdisc_t* disc; /* discipline */ \
153 Vmalloc_t* vm; /* vmalloc region */ \
154 Entry_t* magic; /* parsed magic table */ \
155 Entry_t* magiclast; /* last entry in magic */ \
156 char* mime; /* MIME type */ \
157 unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \
158 char fbuf[SF_BUFSIZE + 1]; /* file data */ \
159 char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \
160 char nbuf[256]; /* !CC_NATIVE data */ \
161 char mbuf[64]; /* mime string */ \
162 char sbuf[64]; /* type suffix string */ \
163 char tbuf[2 * PATH_MAX]; /* type string */ \
164 Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \
165 unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \
166 unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \
167 int keep[MAXNEST]; /* ckmagic nest stack */ \
168 char* cap[MAXNEST]; /* ckmagic mime stack */ \
169 char* msg[MAXNEST]; /* ckmagic text stack */ \
170 Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \
171 int fbsz; /* fbuf size */ \
172 int fbmx; /* fbuf max size */ \
173 int xbsz; /* xbuf size */ \
174 int swap; /* swap() operation */ \
175 unsigned long flags; /* disc+open flags */ \
176 long xoff; /* xbuf offset */ \
177 int identifier[ID_MAX + 1]; /* Info_t identifier */ \
178 Sfio_t* fp; /* fbuf fp */ \
179 Sfio_t* tmp; /* tmp string */ \
180 regdisc_t redisc; /* regex discipline */ \
181 Dtdisc_t dtdisc; /* dict discipline */ \
182 Dt_t* idtab; /* identifier dict */ \
183 Dt_t* infotab; /* info keyword dict */
185 #include <magic.h>
187 static Info_t dict[] = /* keyword dictionary */
189 { "COMMON", ID_FORTRAN },
190 { "COMPUTE", ID_COBOL },
191 { "COMP", ID_COPYBOOK },
192 { "COMPUTATIONAL",ID_COPYBOOK },
193 { "DCL", ID_PL1 },
194 { "DEFINED", ID_PL1 },
195 { "DIMENSION", ID_FORTRAN },
196 { "DIVISION", ID_COBOL },
197 { "FILLER", ID_COPYBOOK },
198 { "FIXED", ID_PL1 },
199 { "FUNCTION", ID_FORTRAN },
200 { "HTML", ID_HTML },
201 { "INTEGER", ID_FORTRAN },
202 { "MAIN", ID_PL1 },
203 { "OPTIONS", ID_PL1 },
204 { "PERFORM", ID_COBOL },
205 { "PIC", ID_COPYBOOK },
206 { "REAL", ID_FORTRAN },
207 { "REDEFINES", ID_COPYBOOK },
208 { "S9", ID_COPYBOOK },
209 { "SECTION", ID_COBOL },
210 { "SELECT", ID_COBOL },
211 { "SUBROUTINE", ID_FORTRAN },
212 { "TEXT", ID_ASM },
213 { "VALUE", ID_COPYBOOK },
214 { "attr", ID_MAM3 },
215 { "binary", ID_YACC },
216 { "block", ID_FORTRAN },
217 { "bss", ID_ASM },
218 { "byte", ID_ASM },
219 { "char", ID_C },
220 { "class", ID_CPLUSPLUS },
221 { "clr", ID_NOTEXT },
222 { "comm", ID_ASM },
223 { "common", ID_FORTRAN },
224 { "data", ID_ASM },
225 { "dimension", ID_FORTRAN },
226 { "done", ID_MAM2 },
227 { "double", ID_C },
228 { "even", ID_ASM },
229 { "exec", ID_MAM3 },
230 { "extern", ID_C },
231 { "float", ID_C },
232 { "function", ID_FORTRAN },
233 { "globl", ID_ASM },
234 { "h", ID_INCL3 },
235 { "html", ID_HTML },
236 { "include", ID_INCL1 },
237 { "int", ID_C },
238 { "integer", ID_FORTRAN },
239 { "jmp", ID_NOTEXT },
240 { "left", ID_YACC },
241 { "libc", ID_INCL2 },
242 { "long", ID_C },
243 { "make", ID_MAM1 },
244 { "mov", ID_NOTEXT },
245 { "private", ID_CPLUSPLUS },
246 { "public", ID_CPLUSPLUS },
247 { "real", ID_FORTRAN },
248 { "register", ID_C },
249 { "right", ID_YACC },
250 { "sfio", ID_INCL2 },
251 { "static", ID_C },
252 { "stdio", ID_INCL2 },
253 { "struct", ID_C },
254 { "subroutine", ID_FORTRAN },
255 { "sys", ID_NOTEXT },
256 { "term", ID_YACC },
257 { "text", ID_ASM },
258 { "tst", ID_NOTEXT },
259 { "type", ID_YACC },
260 { "typedef", ID_C },
261 { "u", ID_INCL2 },
262 { "union", ID_YACC },
263 { "void", ID_C },
266 static Info_t info[] =
268 { "atime", INFO_atime },
269 { "blocks", INFO_blocks },
270 { "ctime", INFO_ctime },
271 { "fstype", INFO_fstype },
272 { "gid", INFO_gid },
273 { "mode", INFO_mode },
274 { "mtime", INFO_mtime },
275 { "name", INFO_name },
276 { "nlink", INFO_nlink },
277 { "size", INFO_size },
278 { "uid", INFO_uid },
282 * return pointer to data at offset off and size siz
285 static char*
286 getdata(register Magic_t* mp, register long off, register int siz)
288 register long n;
290 if (off < 0)
291 return 0;
292 if (off + siz <= mp->fbsz)
293 return mp->fbuf + off;
294 if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
296 if (off + siz > mp->fbmx)
297 return 0;
298 n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299 if (sfseek(mp->fp, n, SEEK_SET) != n)
300 return 0;
301 if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
303 mp->xoff = 0;
304 mp->xbsz = 0;
305 return 0;
307 mp->xbuf[mp->xbsz] = 0;
308 mp->xoff = n;
309 if (off + siz > mp->xoff + mp->xbsz)
310 return 0;
312 return mp->xbuf + off - mp->xoff;
316 * @... evaluator for strexpr()
319 static long
320 indirect(const char* cs, char** e, void* handle)
322 register char* s = (char*)cs;
323 register Magic_t* mp = (Magic_t*)handle;
324 register long n = 0;
325 register char* p;
327 if (s)
329 if (*s == '@')
331 n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332 switch (*(s = *e))
334 case 'b':
335 case 'B':
336 s++;
337 if (p = getdata(mp, n, 1))
338 n = *(unsigned char*)p;
339 else
340 s = (char*)cs;
341 break;
342 case 'h':
343 case 'H':
344 s++;
345 if (p = getdata(mp, n, 2))
346 n = swapget(mp->swap, p, 2);
347 else
348 s = (char*)cs;
349 break;
350 case 'q':
351 case 'Q':
352 s++;
353 if (p = getdata(mp, n, 8))
354 n = swapget(mp->swap, p, 8);
355 else
356 s = (char*)cs;
357 break;
358 default:
359 if (isalnum(*s))
360 s++;
361 if (p = getdata(mp, n, 4))
362 n = swapget(mp->swap, p, 4);
363 else
364 s = (char*)cs;
365 break;
368 *e = s;
370 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371 (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372 return n;
376 * emit regex error message
379 static void
380 regmessage(Magic_t* mp, regex_t* re, int code)
382 char buf[128];
384 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
386 regerror(code, re, buf, sizeof(buf));
387 (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
392 * decompose vcodex(3) method composition
395 static char*
396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
398 unsigned char* map;
399 const char* o;
400 int c;
401 int n;
402 int i;
403 int a;
405 map = CCMAP(CC_ASCII, CC_NATIVE);
406 a = 0;
407 i = 1;
408 for (;;)
410 if (i)
411 i = 0;
412 else
413 *b++ = '^';
414 if (m < (x - 1) && !*(m + 1))
417 * obsolete indices
420 if (!a)
422 a = 1;
423 o = "old, ";
424 while (b < e && (c = *o++))
425 *b++ = c;
427 switch (*m)
429 case 0: o = "delta"; break;
430 case 1: o = "huffman"; break;
431 case 2: o = "huffgroup"; break;
432 case 3: o = "arith"; break;
433 case 4: o = "bwt"; break;
434 case 5: o = "rle"; break;
435 case 6: o = "mtf"; break;
436 case 7: o = "transpose"; break;
437 case 8: o = "table"; break;
438 case 9: o = "huffpart"; break;
439 case 50: o = "map"; break;
440 case 100: o = "recfm"; break;
441 case 101: o = "ss7"; break;
442 default: o = "UNKNOWN"; break;
444 m += 2;
445 while (b < e && (c = *o++))
446 *b++ = c;
448 else
449 while (b < e && m < x && (c = *m++))
451 if (map)
452 c = map[c];
453 *b++ = c;
455 if (b >= e)
456 break;
457 n = 0;
458 while (m < x)
460 n = (n<<7) | (*m & 0x7f);
461 if (!(*m++ & 0x80))
462 break;
464 if (n >= (x - m))
465 break;
466 m += n;
468 return b;
472 * check for magic table match in buf
475 static char*
476 ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
478 register Entry_t* ep;
479 register char* p;
480 register char* b;
481 register int level = 0;
482 int call = -1;
483 int c;
484 char* q;
485 char* t;
486 char* base = 0;
487 unsigned long num;
488 unsigned long mask;
489 regmatch_t matches[10];
491 mp->swap = 0;
492 b = mp->msg[0] = buf;
493 mp->mime = mp->cap[0] = 0;
494 mp->keep[0] = 0;
495 for (ep = mp->magic; ep; ep = ep->next)
497 fun:
498 if (ep->nest == '{')
500 if (++level >= MAXNEST)
502 call = -1;
503 level = 0;
504 mp->keep[0] = 0;
505 b = mp->msg[0];
506 mp->mime = mp->cap[0];
507 continue;
509 mp->keep[level] = mp->keep[level - 1] != 0;
510 mp->msg[level] = b;
511 mp->cap[level] = mp->mime;
513 switch (ep->cont)
515 case '#':
516 if (mp->keep[level] && b > buf)
518 *b = 0;
519 return buf;
521 mp->swap = 0;
522 b = mp->msg[0] = buf;
523 mp->mime = mp->cap[0] = 0;
524 if (ep->type == ' ')
525 continue;
526 break;
527 case '$':
528 if (mp->keep[level] && call < (MAXNEST - 1))
530 mp->ret[++call] = ep;
531 ep = ep->value.lab;
532 goto fun;
534 continue;
535 case ':':
536 ep = mp->ret[call--];
537 if (ep->op == 'l')
538 goto fun;
539 continue;
540 case '|':
541 if (mp->keep[level] > 1)
542 goto checknest;
543 /*FALLTHROUGH*/
544 default:
545 if (!mp->keep[level])
547 b = mp->msg[level];
548 mp->mime = mp->cap[level];
549 goto checknest;
551 break;
553 p = "";
554 num = 0;
555 if (!ep->expr)
556 num = ep->offset + off;
557 else
558 switch (ep->offset)
560 case 0:
561 num = strexpr(ep->expr, NiL, indirect, mp) + off;
562 break;
563 case INFO_atime:
564 num = st->st_atime;
565 ep->type = 'D';
566 break;
567 case INFO_blocks:
568 num = iblocks(st);
569 ep->type = 'N';
570 break;
571 case INFO_ctime:
572 num = st->st_ctime;
573 ep->type = 'D';
574 break;
575 case INFO_fstype:
576 p = fmtfs(st);
577 ep->type = toupper(ep->type);
578 break;
579 case INFO_gid:
580 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
582 p = fmtgid(st->st_gid);
583 ep->type = toupper(ep->type);
585 else
587 num = st->st_gid;
588 ep->type = 'N';
590 break;
591 case INFO_mode:
592 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
594 p = fmtmode(st->st_mode, 0);
595 ep->type = toupper(ep->type);
597 else
599 num = modex(st->st_mode);
600 ep->type = 'N';
602 break;
603 case INFO_mtime:
604 num = st->st_ctime;
605 ep->type = 'D';
606 break;
607 case INFO_name:
608 if (!base)
610 if (base = strrchr(file, '/'))
611 base++;
612 else
613 base = (char*)file;
615 p = base;
616 ep->type = toupper(ep->type);
617 break;
618 case INFO_nlink:
619 num = st->st_nlink;
620 ep->type = 'N';
621 break;
622 case INFO_size:
623 num = st->st_size;
624 ep->type = 'N';
625 break;
626 case INFO_uid:
627 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
629 p = fmtuid(st->st_uid);
630 ep->type = toupper(ep->type);
632 else
634 num = st->st_uid;
635 ep->type = 'N';
637 break;
639 switch (ep->type)
642 case 'b':
643 if (!(p = getdata(mp, num, 1)))
644 goto next;
645 num = *(unsigned char*)p;
646 break;
648 case 'h':
649 if (!(p = getdata(mp, num, 2)))
650 goto next;
651 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
652 break;
654 case 'd':
655 case 'l':
656 case 'v':
657 if (!(p = getdata(mp, num, 4)))
658 goto next;
659 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
660 break;
662 case 'q':
663 if (!(p = getdata(mp, num, 8)))
664 goto next;
665 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
666 break;
668 case 'e':
669 if (!(p = getdata(mp, num, 0)))
670 goto next;
671 /*FALLTHROUGH*/
672 case 'E':
673 if (!ep->value.sub)
674 goto next;
675 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
677 c = mp->fbsz;
678 if (c >= sizeof(mp->nbuf))
679 c = sizeof(mp->nbuf) - 1;
680 p = (char*)memcpy(mp->nbuf, p, c);
681 p[c] = 0;
682 ccmapstr(mp->x2n, p, c);
683 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
685 if (c != REG_NOMATCH)
686 regmessage(mp, ep->value.sub, c);
687 goto next;
690 p = ep->value.sub->re_sub->re_buf;
691 q = T(ep->desc);
692 t = *q ? q : p;
693 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
694 *b++ = ' ';
695 b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
696 if (ep->mime)
697 mp->mime = ep->mime;
698 goto checknest;
700 case 's':
701 if (!(p = getdata(mp, num, ep->mask)))
702 goto next;
703 goto checkstr;
704 case 'm':
705 if (!(p = getdata(mp, num, 0)))
706 goto next;
707 /*FALLTHROUGH*/
708 case 'M':
709 case 'S':
710 checkstr:
711 for (;;)
713 if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
714 break;
715 if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
716 break;
717 if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
718 goto next;
719 p = (char*)memcpy(mp->nbuf, p, ep->mask);
720 p[ep->mask] = 0;
721 ccmapstr(mp->x2n, p, ep->mask);
723 q = T(ep->desc);
724 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
725 *b++ = ' ';
726 for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
727 *t = 0;
728 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
729 *t = c;
730 if (ep->mime)
731 mp->mime = ep->mime;
732 goto checknest;
735 if (mask = ep->mask)
736 num &= mask;
737 switch (ep->op)
740 case '=':
741 case '@':
742 if (num == ep->value.num)
743 break;
744 if (ep->cont != '#')
745 goto next;
746 if (!mask)
747 mask = ~mask;
748 if (ep->type == 'h')
750 if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
752 if (!(mp->swap & (mp->swap + 1)))
753 mp->swap = 7;
754 goto swapped;
757 else if (ep->type == 'l')
759 for (c = 1; c < 4; c++)
760 if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
762 if (!(mp->swap & (mp->swap + 1)))
763 mp->swap = 7;
764 goto swapped;
767 else if (ep->type == 'q')
769 for (c = 1; c < 8; c++)
770 if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
771 goto swapped;
773 goto next;
775 case '!':
776 if (num != ep->value.num)
777 break;
778 goto next;
780 case '^':
781 if (num ^ ep->value.num)
782 break;
783 goto next;
785 case '>':
786 if (num > ep->value.num)
787 break;
788 goto next;
790 case '<':
791 if (num < ep->value.num)
792 break;
793 goto next;
795 case 'l':
796 if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
798 if (!ep->value.loop->count)
800 ep->value.loop->count = num;
801 ep->value.loop->offset = off;
802 off = ep->value.loop->start;
804 else if (!--ep->value.loop->count)
806 off = ep->value.loop->offset;
807 goto next;
809 else
810 off += ep->value.loop->size;
811 mp->ret[++call] = ep;
812 ep = ep->value.loop->lab;
813 goto fun;
815 goto next;
817 case 'm':
818 c = mp->swap;
819 t = ckmagic(mp, file, b + (b > buf), st, num);
820 mp->swap = c;
821 if (!t)
822 goto next;
823 if (b > buf)
824 *b = ' ';
825 b += strlen(b);
826 break;
828 case 'r':
829 #if _UWIN
831 char* e;
832 Sfio_t* rp;
833 Sfio_t* gp;
835 if (!(t = strrchr(file, '.')))
836 goto next;
837 sfprintf(mp->tmp, "/reg/classes_root/%s", t);
838 if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
839 goto next;
840 *ep->desc = 0;
841 *ep->mime = 0;
842 gp = 0;
843 while (t = sfgetr(rp, '\n', 1))
845 if (strneq(t, "Content Type=", 13))
847 ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
848 strcpy(ep->mime, t + 13);
849 if (gp)
850 break;
852 else
854 sfprintf(mp->tmp, "/reg/classes_root/%s", t);
855 if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
857 ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
858 strcpy(ep->desc, t);
859 if (*ep->mime)
860 break;
864 sfclose(rp);
865 if (!gp)
866 goto next;
867 if (!*ep->mime)
869 t = T(ep->desc);
870 if (!strncasecmp(t, "microsoft", 9))
871 t += 9;
872 while (isspace(*t))
873 t++;
874 e = "application/x-ms-";
875 ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
876 e = strcopy(ep->mime, e);
877 while ((c = *t++) && c != '.' && c != ' ')
878 *e++ = isupper(c) ? tolower(c) : c;
879 *e = 0;
881 while (t = sfgetr(gp, '\n', 1))
882 if (*t && !streq(t, "\"\""))
884 ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
885 strcpy(ep->desc, t);
886 break;
888 sfclose(gp);
889 if (!*ep->desc)
890 goto next;
891 if (!t)
892 for (t = T(ep->desc); *t; t++)
893 if (*t == '.')
894 *t = ' ';
895 if (!mp->keep[level])
896 mp->keep[level] = 2;
897 mp->mime = ep->mime;
898 break;
900 #else
901 if (ep->cont == '#' && !mp->keep[level])
902 mp->keep[level] = 1;
903 goto next;
904 #endif
906 case 'v':
907 if (!(p = getdata(mp, num, 4)))
908 goto next;
909 c = 0;
912 num++;
913 c = (c<<7) | (*p & 0x7f);
914 } while (*p++ & 0x80);
915 if (!(p = getdata(mp, num, c)))
916 goto next;
917 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
919 *b++ = ',';
920 *b++ = ' ';
922 b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
923 goto checknest;
926 swapped:
927 q = T(ep->desc);
928 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
929 *b++ = ' ';
930 if (ep->type == 'd' || ep->type == 'D')
931 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
932 else if (ep->type == 'v')
933 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
934 else
935 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
936 if (ep->mime && *ep->mime)
937 mp->mime = ep->mime;
938 checknest:
939 if (ep->nest == '}')
941 if (!mp->keep[level])
943 b = mp->msg[level];
944 mp->mime = mp->cap[level];
946 else if (level > 0)
947 mp->keep[level - 1] = mp->keep[level];
948 if (--level < 0)
950 level = 0;
951 mp->keep[0] = 0;
954 continue;
955 next:
956 if (ep->cont == '&')
957 mp->keep[level] = 0;
958 goto checknest;
960 if (mp->keep[level] && b > buf)
962 *b = 0;
963 return buf;
965 return 0;
969 * check english language stats
972 static int
973 ckenglish(register Magic_t* mp, int pun, int badpun)
975 register char* s;
976 register int vowl = 0;
977 register int freq = 0;
978 register int rare = 0;
980 if (5 * badpun > pun)
981 return 0;
982 if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
983 return 0;
984 if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
985 return 0;
986 for (s = "aeiou"; *s; s++)
987 vowl += mp->count[toupper(*s)] + mp->count[*s];
988 for (s = "etaion"; *s; s++)
989 freq += mp->count[toupper(*s)] + mp->count[*s];
990 for (s = "vjkqxz"; *s; s++)
991 rare += mp->count[toupper(*s)] + mp->count[*s];
992 return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
996 * check programming language stats
999 static char*
1000 cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
1002 register int c;
1003 register unsigned char* b;
1004 register unsigned char* e;
1005 register int q;
1006 register char* s;
1007 char* t;
1008 char* base;
1009 char* suff;
1010 char* t1;
1011 char* t2;
1012 char* t3;
1013 int n;
1014 int badpun;
1015 int code;
1016 int pun;
1017 Cctype_t flags;
1018 Info_t* ip;
1020 b = (unsigned char*)mp->fbuf;
1021 e = b + mp->fbsz;
1022 memzero(mp->count, sizeof(mp->count));
1023 memzero(mp->multi, sizeof(mp->multi));
1024 memzero(mp->identifier, sizeof(mp->identifier));
1027 * check character coding
1030 flags = 0;
1031 while (b < e)
1032 flags |= mp->cctype[*b++];
1033 b = (unsigned char*)mp->fbuf;
1034 code = 0;
1035 q = CC_ASCII;
1036 n = CC_MASK;
1037 for (c = 0; c < CC_MAPS; c++)
1039 flags ^= CC_text;
1040 if ((flags & CC_MASK) < n)
1042 n = flags & CC_MASK;
1043 q = c;
1045 flags >>= CC_BIT;
1047 flags = n;
1048 if (!(flags & (CC_binary|CC_notext)))
1050 if (q != CC_NATIVE)
1052 code = q;
1053 ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1055 if (b[0] == '#' && b[1] == '!')
1057 for (b += 2; b < e && isspace(*b); b++);
1058 for (s = (char*)b; b < e && isprint(*b); b++);
1059 c = *b;
1060 *b = 0;
1061 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1063 if (t = strrchr(s, '/'))
1064 s = t + 1;
1065 for (t = s; *t; t++)
1066 if (isspace(*t))
1068 *t = 0;
1069 break;
1071 sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1072 mp->mime = mp->mbuf;
1073 if (match(s, "*sh"))
1075 t1 = T("command");
1076 if (streq(s, "sh"))
1077 *s = 0;
1078 else
1080 *b++ = ' ';
1081 *b = 0;
1084 else
1086 t1 = T("interpreter");
1087 *b++ = ' ';
1088 *b = 0;
1090 sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1091 s = mp->sbuf;
1092 goto qualify;
1094 *b = c;
1095 b = (unsigned char*)mp->fbuf;
1097 badpun = 0;
1098 pun = 0;
1099 q = 0;
1100 s = 0;
1101 t = 0;
1102 while (b < e)
1104 c = *b++;
1105 mp->count[c]++;
1106 if (c == q && (q != '*' || *b == '/' && b++))
1108 mp->multi[q]++;
1109 q = 0;
1111 else if (c == '\\')
1113 s = 0;
1114 b++;
1116 else if (!q)
1118 if (isalpha(c) || c == '_')
1120 if (!s)
1121 s = (char*)b - 1;
1123 else if (!isdigit(c))
1125 if (s)
1127 if (s > mp->fbuf)
1128 switch (*(s - 1))
1130 case ':':
1131 if (*b == ':')
1132 mp->multi[':']++;
1133 break;
1134 case '.':
1135 if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1136 mp->multi['.']++;
1137 break;
1138 case '\n':
1139 case '\\':
1140 if (*b == '{')
1141 t = (char*)b + 1;
1142 break;
1143 case '{':
1144 if (s == t && *b == '}')
1145 mp->multi['X']++;
1146 break;
1148 if (!mp->idtab)
1150 if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
1151 for (q = 0; q < elementsof(dict); q++)
1152 dtinsert(mp->idtab, &dict[q]);
1153 else if (mp->disc->errorf)
1154 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1155 q = 0;
1157 if (mp->idtab)
1159 *(b - 1) = 0;
1160 if (ip = (Info_t*)dtmatch(mp->idtab, s))
1161 mp->identifier[ip->value]++;
1162 *(b - 1) = c;
1164 s = 0;
1166 switch (c)
1168 case '\t':
1169 if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1170 mp->multi['\t']++;
1171 break;
1172 case '"':
1173 case '\'':
1174 q = c;
1175 break;
1176 case '/':
1177 if (*b == '*')
1178 q = *b++;
1179 else if (*b == '/')
1180 q = '\n';
1181 break;
1182 case '$':
1183 if (*b == '(' && *(b + 1) != ' ')
1184 mp->multi['$']++;
1185 break;
1186 case '{':
1187 case '}':
1188 case '[':
1189 case ']':
1190 case '(':
1191 mp->multi[c]++;
1192 break;
1193 case ')':
1194 mp->multi[c]++;
1195 goto punctuation;
1196 case ':':
1197 if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1198 mp->multi[':']++;
1199 goto punctuation;
1200 case '.':
1201 case ',':
1202 case '%':
1203 case ';':
1204 case '?':
1205 punctuation:
1206 pun++;
1207 if (*b != ' ' && *b != '\n')
1208 badpun++;
1209 break;
1215 else
1216 while (b < e)
1217 mp->count[*b++]++;
1218 base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1219 suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1220 if (!flags)
1222 if (match(suff, "*sh|bat|cmd"))
1223 goto id_sh;
1224 if (match(base, "*@(mkfile)"))
1225 goto id_mk;
1226 if (match(base, "*@(makefile|.mk)"))
1227 goto id_make;
1228 if (match(base, "*@(mamfile|.mam)"))
1229 goto id_mam;
1230 if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1231 goto id_c;
1232 if (match(suff, "f"))
1233 goto id_fortran;
1234 if (match(suff, "htm+(l)"))
1235 goto id_html;
1236 if (match(suff, "cpy"))
1237 goto id_copybook;
1238 if (match(suff, "cob|cbl|cb2"))
1239 goto id_cobol;
1240 if (match(suff, "pl[1i]"))
1241 goto id_pl1;
1242 if (match(suff, "tex"))
1243 goto id_tex;
1244 if (match(suff, "asm|s"))
1245 goto id_asm;
1246 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1248 id_sh:
1249 s = T("command script");
1250 mp->mime = "application/sh";
1251 goto qualify;
1253 if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1255 s = T("mail message");
1256 mp->mime = "message/rfc822";
1257 goto qualify;
1259 if (match(base, "*@(mkfile)"))
1261 id_mk:
1262 s = "mkfile";
1263 mp->mime = "application/mk";
1264 goto qualify;
1266 if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1268 id_make:
1269 s = "makefile";
1270 mp->mime = "application/make";
1271 goto qualify;
1273 if (mp->multi['.'] >= 3)
1275 s = T("nroff input");
1276 mp->mime = "application/x-troff";
1277 goto qualify;
1279 if (mp->multi['X'] >= 3)
1281 s = T("TeX input");
1282 mp->mime = "application/x-tex";
1283 goto qualify;
1285 if (mp->fbsz < SF_BUFSIZE &&
1286 (mp->multi['('] == mp->multi[')'] &&
1287 mp->multi['{'] == mp->multi['}'] &&
1288 mp->multi['['] == mp->multi[']']) ||
1289 mp->fbsz >= SF_BUFSIZE &&
1290 (mp->multi['('] >= mp->multi[')'] &&
1291 mp->multi['{'] >= mp->multi['}'] &&
1292 mp->multi['['] >= mp->multi[']']))
1294 c = mp->identifier[ID_INCL1];
1295 if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1296 mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1297 mp->count['='] >= 20 && mp->count[';'] >= 20)
1299 id_c:
1300 t1 = "";
1301 t2 = "c ";
1302 t3 = T("program");
1303 switch (*suff)
1305 case 'c':
1306 case 'C':
1307 mp->mime = "application/x-cc";
1308 break;
1309 case 'l':
1310 case 'L':
1311 t1 = "lex ";
1312 mp->mime = "application/x-lex";
1313 break;
1314 default:
1315 t3 = T("header");
1316 if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1318 mp->mime = "application/x-cc";
1319 break;
1321 /*FALLTHROUGH*/
1322 case 'y':
1323 case 'Y':
1324 t1 = "yacc ";
1325 mp->mime = "application/x-yacc";
1326 break;
1328 if (mp->identifier[ID_CPLUSPLUS] >= 3)
1330 t2 = "c++ ";
1331 mp->mime = "application/x-c++";
1333 sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1334 s = mp->sbuf;
1335 goto qualify;
1338 if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1339 (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1340 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1342 id_mam:
1343 s = T("mam program");
1344 mp->mime = "application/x-mam";
1345 goto qualify;
1347 if (mp->identifier[ID_FORTRAN] >= 8)
1349 id_fortran:
1350 s = T("fortran program");
1351 mp->mime = "application/x-fortran";
1352 goto qualify;
1354 if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1356 id_html:
1357 s = T("html input");
1358 mp->mime = "text/html";
1359 goto qualify;
1361 if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1363 id_copybook:
1364 s = T("cobol copybook");
1365 mp->mime = "application/x-cobol";
1366 goto qualify;
1368 if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1370 id_cobol:
1371 s = T("cobol program");
1372 mp->mime = "application/x-cobol";
1373 goto qualify;
1375 if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1377 id_pl1:
1378 s = T("pl1 program");
1379 mp->mime = "application/x-pl1";
1380 goto qualify;
1382 if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1384 id_tex:
1385 s = T("TeX input");
1386 mp->mime = "text/tex";
1387 goto qualify;
1389 if (mp->identifier[ID_ASM] >= 4)
1391 id_asm:
1392 s = T("as program");
1393 mp->mime = "application/x-as";
1394 goto qualify;
1396 if (ckenglish(mp, pun, badpun))
1398 s = T("english text");
1399 mp->mime = "text/plain";
1400 goto qualify;
1403 else if (streq(base, "core"))
1405 mp->mime = "x-system/core";
1406 return T("core dump");
1408 if (flags & (CC_binary|CC_notext))
1410 b = (unsigned char*)mp->fbuf;
1411 e = b + mp->fbsz;
1412 n = 0;
1413 for (;;)
1415 c = *b++;
1416 q = 0;
1417 while (c & 0x80)
1419 c <<= 1;
1420 q++;
1422 switch (q)
1424 case 4:
1425 if (b < e && (*b++ & 0xc0) != 0x80)
1426 break;
1427 case 3:
1428 if (b < e && (*b++ & 0xc0) != 0x80)
1429 break;
1430 case 2:
1431 if (b < e && (*b++ & 0xc0) != 0x80)
1432 break;
1433 n = 1;
1434 case 0:
1435 if (b >= e)
1437 if (n)
1439 flags &= ~(CC_binary|CC_notext);
1440 flags |= CC_utf_8;
1442 break;
1444 continue;
1446 break;
1449 if (flags & (CC_binary|CC_notext))
1451 unsigned long d = 0;
1453 if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1456 * compression/encryption via standard deviation
1460 for (c = 0; c < UCHAR_MAX; c++)
1462 pun = mp->count[c] - q;
1463 d += pun * pun;
1465 d /= mp->fbsz;
1467 if (d <= 0)
1468 s = T("binary");
1469 else if (d < 4)
1470 s = T("encrypted");
1471 else if (d < 16)
1472 s = T("packed");
1473 else if (d < 64)
1474 s = T("compressed");
1475 else if (d < 256)
1476 s = T("delta");
1477 else
1478 s = T("data");
1479 mp->mime = "application/octet-stream";
1480 return s;
1482 mp->mime = "text/plain";
1483 if (flags & CC_utf_8)
1484 s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1485 else if (flags & CC_latin)
1486 s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1487 else
1488 s = (flags & CC_control) ? T("text with control characters") : T("text");
1489 qualify:
1490 if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1492 t = "dos ";
1493 mp->mime = "text/dos";
1495 else
1496 t = "";
1497 if (code)
1499 if (code == CC_ASCII)
1500 sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
1501 else
1503 sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
1504 mp->mime = "text/ebcdic";
1506 s = buf;
1508 else if (*t)
1510 sfsprintf(buf, PATH_MAX, "%s%s", t, s);
1511 s = buf;
1513 return s;
1517 * return the basic magic string for file,st in buf,size
1520 static char*
1521 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
1523 register char* s;
1524 register char* t;
1526 mp->mime = 0;
1527 if (!S_ISREG(st->st_mode))
1529 if (S_ISDIR(st->st_mode))
1531 mp->mime = "x-system/dir";
1532 return T("directory");
1534 if (S_ISLNK(st->st_mode))
1536 mp->mime = "x-system/lnk";
1537 s = buf;
1538 s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
1539 if (pathgetlink(file, s, size - (s - buf)) < 0)
1540 return T("cannot read symbolic link text");
1541 return buf;
1543 if (S_ISBLK(st->st_mode))
1545 mp->mime = "x-system/blk";
1546 sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1547 return buf;
1549 if (S_ISCHR(st->st_mode))
1551 mp->mime = "x-system/chr";
1552 sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
1553 return buf;
1555 if (S_ISFIFO(st->st_mode))
1557 mp->mime = "x-system/fifo";
1558 return "fifo";
1560 #ifdef S_ISSOCK
1561 if (S_ISSOCK(st->st_mode))
1563 mp->mime = "x-system/sock";
1564 return "socket";
1566 #endif
1568 if (!(mp->fbmx = st->st_size))
1569 s = T("empty");
1570 else if (!mp->fp)
1571 s = T("cannot read");
1572 else
1574 mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1575 if (mp->fbsz < 0)
1576 s = fmterror(errno);
1577 else if (mp->fbsz == 0)
1578 s = T("empty");
1579 else
1581 mp->fbuf[mp->fbsz] = 0;
1582 mp->xoff = 0;
1583 mp->xbsz = 0;
1584 if (!(s = ckmagic(mp, file, buf, st, 0)))
1585 s = cklang(mp, file, buf, st);
1588 if (!mp->mime)
1589 mp->mime = "application/unknown";
1590 else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1592 register char* b;
1593 register char* be;
1594 register char* m;
1595 register char* me;
1597 b = mp->mime;
1598 me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1599 while (m < me && b < t)
1600 *m++ = *b++;
1601 b = t = s;
1602 for (;;)
1604 if (!(be = strchr(t, ' ')))
1606 be = b + strlen(b);
1607 break;
1609 if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1610 break;
1611 b = t;
1612 t = be + 1;
1614 while (m < me && b < be)
1615 if ((*m++ = *b++) == ' ')
1616 *(m - 1) = '-';
1617 *m = 0;
1619 return s;
1623 * low level for magicload()
1626 static int
1627 load(register Magic_t* mp, char* file, register Sfio_t* fp)
1629 register Entry_t* ep;
1630 register char* p;
1631 register char* p2;
1632 char* p3;
1633 char* next;
1634 int n;
1635 int lge;
1636 int lev;
1637 int ent;
1638 int old;
1639 int cont;
1640 Info_t* ip;
1641 Entry_t* ret;
1642 Entry_t* first;
1643 Entry_t* last = 0;
1644 Entry_t* fun['z' - 'a' + 1];
1646 memzero(fun, sizeof(fun));
1647 cont = '$';
1648 ent = 0;
1649 lev = 0;
1650 old = 0;
1651 ret = 0;
1652 error_info.file = file;
1653 error_info.line = 0;
1654 first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1655 while (p = sfgetr(fp, '\n', 1))
1657 error_info.line++;
1658 for (; isspace(*p); p++);
1661 * nesting
1664 switch (*p)
1666 case 0:
1667 case '#':
1668 cont = '#';
1669 continue;
1670 case '{':
1671 if (++lev < MAXNEST)
1672 ep->nest = *p;
1673 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1674 (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1675 continue;
1676 case '}':
1677 if (!last || lev <= 0)
1679 if (mp->disc->errorf)
1680 (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1682 else if (lev-- == ent)
1684 ent = 0;
1685 ep->cont = ':';
1686 ep->offset = ret->offset;
1687 ep->nest = ' ';
1688 ep->type = ' ';
1689 ep->op = ' ';
1690 ep->desc = "[RETURN]";
1691 last = ep;
1692 ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1693 ret = 0;
1695 else
1696 last->nest = *p;
1697 continue;
1698 default:
1699 if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1701 n = *p++;
1702 if (n >= 'a' && n <= 'z')
1703 n -= 'a';
1704 else
1706 if (mp->disc->errorf)
1707 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1708 n = 0;
1710 if (ret && mp->disc->errorf)
1711 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1712 if (*p == '{')
1714 ent = ++lev;
1715 ret = ep;
1716 ep->desc = "[FUNCTION]";
1718 else
1720 if (*(p + 1) != ')' && mp->disc->errorf)
1721 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1722 ep->desc = "[CALL]";
1724 ep->cont = cont;
1725 ep->offset = n;
1726 ep->nest = ' ';
1727 ep->type = ' ';
1728 ep->op = ' ';
1729 last = ep;
1730 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1731 if (ret)
1732 fun[n] = last->value.lab = ep;
1733 else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1734 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1735 continue;
1737 if (!ep->nest)
1738 ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1739 break;
1743 * continuation
1746 cont = '$';
1747 switch (*p)
1749 case '>':
1750 old = 1;
1751 if (*(p + 1) == *p)
1754 * old style nesting push
1757 p++;
1758 old = 2;
1759 if (!lev && last)
1761 lev = 1;
1762 last->nest = '{';
1763 if (last->cont == '>')
1764 last->cont = '&';
1765 ep->nest = '1';
1768 /*FALLTHROUGH*/
1769 case '+':
1770 case '&':
1771 case '|':
1772 ep->cont = *p++;
1773 break;
1774 default:
1775 if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1776 (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1777 /*FALLTHROUGH*/
1778 case '*':
1779 case '0': case '1': case '2': case '3': case '4':
1780 case '5': case '6': case '7': case '8': case '9':
1781 ep->cont = (lev > 0) ? '&' : '#';
1782 break;
1784 switch (old)
1786 case 1:
1787 old = 0;
1788 if (lev)
1791 * old style nesting pop
1794 lev = 0;
1795 if (last)
1796 last->nest = '}';
1797 ep->nest = ' ';
1798 if (ep->cont == '&')
1799 ep->cont = '#';
1801 break;
1802 case 2:
1803 old = 1;
1804 break;
1806 if (isdigit(*p))
1809 * absolute offset
1812 ep->offset = strton(p, &next, NiL, 0);
1813 p2 = next;
1815 else
1817 for (p2 = p; *p2 && !isspace(*p2); p2++);
1818 if (!*p2)
1820 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1821 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1822 continue;
1826 * offset expression
1829 *p2++ = 0;
1830 ep->expr = vmstrdup(mp->vm, p);
1831 if (isalpha(*p))
1832 ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1833 else if (*p == '(' && ep->cont == '>')
1836 * convert old style indirection to @
1839 p = ep->expr + 1;
1840 for (;;)
1842 switch (*p++)
1844 case 0:
1845 case '@':
1846 case '(':
1847 break;
1848 case ')':
1849 break;
1850 default:
1851 continue;
1853 break;
1855 if (*--p == ')')
1857 *p = 0;
1858 *ep->expr = '@';
1862 for (; isspace(*p2); p2++);
1863 for (p = p2; *p2 && !isspace(*p2); p2++);
1864 if (!*p2)
1866 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1867 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1868 continue;
1870 *p2++ = 0;
1873 * type
1876 if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1878 ep->swap = ~(*p == 'l' ? 7 : 0);
1879 p += 2;
1881 if (*p == 's')
1883 if (*(p + 1) == 'h')
1884 ep->type = 'h';
1885 else
1886 ep->type = 's';
1888 else if (*p == 'a')
1889 ep->type = 's';
1890 else
1891 ep->type = *p;
1892 if (p = strchr(p, '&'))
1895 * old style mask
1898 ep->mask = strton(++p, NiL, NiL, 0);
1900 for (; isspace(*p2); p2++);
1901 if (ep->mask)
1902 *--p2 = '=';
1905 * comparison operation
1908 p = p2;
1909 if (p2 = strchr(p, '\t'))
1910 *p2++ = 0;
1911 else
1913 int qe = 0;
1914 int qn = 0;
1917 * assume balanced {}[]()\\""'' field
1920 for (p2 = p;;)
1922 switch (n = *p2++)
1924 case 0:
1925 break;
1926 case '{':
1927 if (!qe)
1928 qe = '}';
1929 if (qe == '}')
1930 qn++;
1931 continue;
1932 case '(':
1933 if (!qe)
1934 qe = ')';
1935 if (qe == ')')
1936 qn++;
1937 continue;
1938 case '[':
1939 if (!qe)
1940 qe = ']';
1941 if (qe == ']')
1942 qn++;
1943 continue;
1944 case '}':
1945 case ')':
1946 case ']':
1947 if (qe == n && qn > 0)
1948 qn--;
1949 continue;
1950 case '"':
1951 case '\'':
1952 if (!qe)
1953 qe = n;
1954 else if (qe == n)
1955 qe = 0;
1956 continue;
1957 case '\\':
1958 if (*p2)
1959 p2++;
1960 continue;
1961 default:
1962 if (!qe && isspace(n))
1963 break;
1964 continue;
1966 if (n)
1967 *(p2 - 1) = 0;
1968 else
1969 p2--;
1970 break;
1973 lge = 0;
1974 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
1975 ep->op = '=';
1976 else
1978 if (*p == '&')
1980 ep->mask = strton(++p, &next, NiL, 0);
1981 p = next;
1983 switch (*p)
1985 case '=':
1986 case '>':
1987 case '<':
1988 case '*':
1989 ep->op = *p++;
1990 if (*p == '=')
1992 p++;
1993 switch (ep->op)
1995 case '>':
1996 lge = -1;
1997 break;
1998 case '<':
1999 lge = 1;
2000 break;
2003 break;
2004 case '!':
2005 case '@':
2006 ep->op = *p++;
2007 if (*p == '=')
2008 p++;
2009 break;
2010 case 'x':
2011 p++;
2012 ep->op = '*';
2013 break;
2014 default:
2015 ep->op = '=';
2016 if (ep->mask)
2017 ep->value.num = ep->mask;
2018 break;
2021 if (ep->op != '*' && !ep->value.num)
2023 if (ep->type == 'e')
2025 if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2027 ep->value.sub->re_disc = &mp->redisc;
2028 if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2030 p += ep->value.sub->re_npat;
2031 if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2032 p += ep->value.sub->re_npat;
2034 if (n)
2036 regmessage(mp, ep->value.sub, n);
2037 ep->value.sub = 0;
2039 else if (*p && mp->disc->errorf)
2040 (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2043 else if (ep->type == 'm')
2045 ep->mask = stresc(p) + 1;
2046 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2047 memcpy(ep->value.str, p, ep->mask);
2048 if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2049 ep->value.str[ep->mask - 1] = '*';
2051 else if (ep->type == 's')
2053 ep->mask = stresc(p);
2054 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2055 memcpy(ep->value.str, p, ep->mask);
2057 else if (*p == '\'')
2059 stresc(p);
2060 ep->value.num = *(unsigned char*)(p + 1) + lge;
2062 else if (strmatch(p, "+([a-z])\\(*\\)"))
2064 char* t;
2066 t = p;
2067 ep->type = 'V';
2068 ep->op = *p;
2069 while (*p && *p++ != '(');
2070 switch (ep->op)
2072 case 'l':
2073 n = *p++;
2074 if (n < 'a' || n > 'z')
2076 if (mp->disc->errorf)
2077 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2079 else if (!fun[n -= 'a'])
2081 if (mp->disc->errorf)
2082 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2084 else
2086 ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2087 ep->value.loop->lab = fun[n];
2088 while (*p && *p++ != ',');
2089 ep->value.loop->start = strton(p, &t, NiL, 0);
2090 while (*t && *t++ != ',');
2091 ep->value.loop->size = strton(t, &t, NiL, 0);
2093 break;
2094 case 'm':
2095 case 'r':
2096 ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2097 ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2098 break;
2099 case 'v':
2100 break;
2101 default:
2102 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2103 (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2104 break;
2107 else
2109 ep->value.num = strton(p, NiL, NiL, 0) + lge;
2110 if (ep->op == '@')
2111 ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2116 * file description
2119 if (p2)
2121 for (; isspace(*p2); p2++);
2122 if (p = strchr(p2, '\t'))
2125 * check for message catalog index
2128 *p++ = 0;
2129 if (isalpha(*p2))
2131 for (p3 = p2; isalnum(*p3); p3++);
2132 if (*p3++ == ':')
2134 for (; isdigit(*p3); p3++);
2135 if (!*p3)
2137 for (p2 = p; isspace(*p2); p2++);
2138 if (p = strchr(p2, '\t'))
2139 *p++ = 0;
2144 stresc(p2);
2145 ep->desc = vmstrdup(mp->vm, p2);
2146 if (p)
2148 for (; isspace(*p); p++);
2149 if (*p)
2150 ep->mime = vmstrdup(mp->vm, p);
2153 else
2154 ep->desc = "";
2157 * get next entry
2160 last = ep;
2161 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2163 if (last)
2165 last->next = 0;
2166 if (mp->magiclast)
2167 mp->magiclast->next = first;
2168 else
2169 mp->magic = first;
2170 mp->magiclast = last;
2172 vmfree(mp->vm, ep);
2173 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2175 if (lev < 0)
2176 (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2177 else if (lev > 0)
2178 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2179 if (ret)
2180 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2182 error_info.file = 0;
2183 error_info.line = 0;
2184 return 0;
2188 * load a magic file into mp
2192 magicload(register Magic_t* mp, const char* file, unsigned long flags)
2194 register char* s;
2195 register char* e;
2196 register char* t;
2197 int n;
2198 int found;
2199 int list;
2200 Sfio_t* fp;
2202 mp->flags = mp->disc->flags | flags;
2203 found = 0;
2204 if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2206 if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2207 s = MAGIC_FILE;
2209 for (;;)
2211 if (!list)
2212 e = 0;
2213 else if (e = strchr(s, ':'))
2216 * ok, so ~ won't work for the last list element
2217 * we do it for MAGIC_FILES_ENV anyway
2220 if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2222 sfputr(mp->tmp, t, -1);
2223 s += n - 1;
2225 sfwrite(mp->tmp, s, e - s);
2226 if (!(s = sfstruse(mp->tmp)))
2227 goto nospace;
2229 if (!*s || streq(s, "-"))
2230 s = MAGIC_FILE;
2231 if (!(fp = sfopen(NiL, s, "r")))
2233 if (list)
2235 if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
2237 strcpy(mp->fbuf, s);
2238 sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2239 if (!(s = sfstruse(mp->tmp)))
2240 goto nospace;
2241 if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
2242 goto next;
2244 if (!(fp = sfopen(NiL, t, "r")))
2245 goto next;
2247 else
2249 if (mp->disc->errorf)
2250 (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2251 return -1;
2254 found = 1;
2255 n = load(mp, s, fp);
2256 sfclose(fp);
2257 if (n && !list)
2258 return -1;
2259 next:
2260 if (!e)
2261 break;
2262 s = e + 1;
2264 if (!found)
2266 if (mp->flags & MAGIC_VERBOSE)
2268 if (mp->disc->errorf)
2269 (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2271 return -1;
2273 return 0;
2274 nospace:
2275 if (mp->disc->errorf)
2276 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2277 return -1;
2281 * open a magic session
2284 Magic_t*
2285 magicopen(Magicdisc_t* disc)
2287 register Magic_t* mp;
2288 register int i;
2289 register int n;
2290 register int f;
2291 register int c;
2292 register Vmalloc_t* vm;
2293 unsigned char* map[CC_MAPS + 1];
2295 if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2296 return 0;
2297 if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2299 vmclose(vm);
2300 return 0;
2302 mp->id = lib;
2303 mp->disc = disc;
2304 mp->vm = vm;
2305 mp->flags = disc->flags;
2306 mp->redisc.re_version = REG_VERSION;
2307 mp->redisc.re_flags = REG_NOFREE;
2308 mp->redisc.re_errorf = (regerror_t)disc->errorf;
2309 mp->redisc.re_resizef = (regresize_t)vmgetmem;
2310 mp->redisc.re_resizehandle = (void*)mp->vm;
2311 mp->dtdisc.key = offsetof(Info_t, name);
2312 mp->dtdisc.link = offsetof(Info_t, link);
2313 if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
2314 goto bad;
2315 for (n = 0; n < elementsof(info); n++)
2316 dtinsert(mp->infotab, &info[n]);
2317 for (i = 0; i < CC_MAPS; i++)
2318 map[i] = ccmap(i, CC_ASCII);
2319 mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2320 for (n = 0; n <= UCHAR_MAX; n++)
2322 f = 0;
2323 i = CC_MAPS;
2324 while (--i >= 0)
2326 c = ccmapchr(map[i], n);
2327 f = (f << CC_BIT) | CCTYPE(c);
2329 mp->cctype[n] = f;
2331 return mp;
2332 bad:
2333 magicclose(mp);
2334 return 0;
2338 * close a magicopen() session
2342 magicclose(register Magic_t* mp)
2344 if (!mp)
2345 return -1;
2346 if (mp->tmp)
2347 sfstrclose(mp->tmp);
2348 if (mp->vm)
2349 vmclose(mp->vm);
2350 return 0;
2354 * return the magic string for file with optional stat info st
2357 char*
2358 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2360 off_t off;
2361 char* s;
2363 mp->flags = mp->disc->flags;
2364 mp->mime = 0;
2365 if (!st)
2366 s = T("cannot stat");
2367 else
2369 if (mp->fp = fp)
2370 off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2371 s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf));
2372 if (mp->fp)
2373 sfseek(mp->fp, off, SEEK_SET);
2374 if (!(mp->flags & MAGIC_MIME))
2376 if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2377 sfprintf(mp->tmp, "%s ", T("short"));
2378 sfprintf(mp->tmp, "%s", s);
2379 if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2380 sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2381 if (st->st_mode & S_ISUID)
2382 sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2383 if (st->st_mode & S_ISGID)
2384 sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2385 if (st->st_mode & S_ISVTX)
2386 sfprintf(mp->tmp, ", sticky");
2387 if (!(s = sfstruse(mp->tmp)))
2388 s = T("out of space");
2391 if (mp->flags & MAGIC_MIME)
2392 s = mp->mime;
2393 if (!s)
2394 s = T("error");
2395 return s;
2399 * list the magic table in mp on sp
2403 magiclist(register Magic_t* mp, register Sfio_t* sp)
2405 register Entry_t* ep = mp->magic;
2406 register Entry_t* rp = 0;
2408 mp->flags = mp->disc->flags;
2409 sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2410 while (ep)
2412 sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2413 if (ep->expr)
2414 sfprintf(sp, "%s", ep->expr);
2415 else
2416 sfprintf(sp, "%ld", ep->offset);
2417 sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2418 switch (ep->type)
2420 case 'm':
2421 case 's':
2422 sfputr(sp, fmtesc(ep->value.str), -1);
2423 break;
2424 case 'V':
2425 switch (ep->op)
2427 case 'l':
2428 sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2429 break;
2430 case 'v':
2431 sfprintf(sp, "vcodex()");
2432 break;
2433 default:
2434 sfprintf(sp, "%p", ep->value.str);
2435 break;
2437 break;
2438 default:
2439 sfprintf(sp, "%lo", ep->value.num);
2440 break;
2442 sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2443 if (ep->cont == '$' && !ep->value.lab->mask)
2445 rp = ep;
2446 ep = ep->value.lab;
2448 else
2450 if (ep->cont == ':')
2452 ep = rp;
2453 ep->value.lab->mask = 1;
2455 ep = ep->next;
2458 return 0;