1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
21 ***********************************************************************/
26 * James A. Woods, Informatics General Corporation,
27 * NASA Ames Research Center, 6/81.
28 * Usenix ;login:, February/March, 1983, p. 8.
30 * discipline/method interface
34 * modified from the original BSD source
36 * 'fastfind' scans a file list for the full pathname of a file
37 * given only a piece of the name. The list is processed with
38 * with "front-compression" and bigram coding. Front compression reduces
39 * space by a factor of 4-5, bigram coding by a further 20-25%.
41 * there are 4 methods:
43 * FF_old original with 7 bit bigram encoding (no magic)
44 * FF_gnu 8 bit clean front compression (FF_gnu_magic)
45 * FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
46 * FF_typ FF_dir with (mime) types (FF_typ_magic)
48 * the bigram encoding steals the eighth bit (that's why its FF_old)
49 * maybe one day we'll limit it to readonly:
51 * 0-2*FF_OFF likeliest differential counts + offset to make nonnegative
52 * FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows
53 * FF_MIN-FF_MAX ascii residue
54 * >=FF_MAX bigram codes
56 * a two-tiered string search technique is employed
58 * a metacharacter-free subpattern and partial pathname is matched
59 * backwards to avoid full expansion of the pathname list
61 * then the actual shell glob-style regular expression (if in this form)
62 * is matched against the candidate pathnames using the slower regexec()
64 * The original BSD code is covered by the BSD license:
66 * Copyright (c) 1985, 1993, 1999
67 * The Regents of the University of California. All rights reserved.
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 * 3. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
94 static const char id
[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
96 static const char lib
[] = "libast:fastfind";
100 #define FIND_MATCH "*/(find|locate)/*"
103 * this db could be anywhere
104 * findcodes[] directories are checked for findnames[i]
107 static char* findcodes
[] =
112 "/usr/local/share/lib",
123 static char* findnames
[] =
134 * convert t to lower case and drop leading x- and x- after /
135 * converted value copied to b of size n
139 typefix(char* buf
, size_t n
, register const char* t
)
142 register char* b
= buf
;
144 if ((*t
== 'x' || *t
== 'X') && *(t
+ 1) == '-')
150 if ((*b
++ = c
) == '/' && (*t
== 'x' || *t
== 'X') && *(t
+ 1) == '-')
158 * return a fastfind stream handle for pattern
162 findopen(const char* file
, const char* pattern
, const char* type
, Finddisc_t
* disc
)
182 if (!(vm
= vmopen(Vmdcheap
, Vmbest
, 0)))
186 * NOTE: searching for FIND_CODES would be much simpler if we
187 * just stuck with our own, but we also support GNU
188 * locate codes and have to search for the one of a
189 * bazillion possible names for that file
193 findcodes
[1] = getenv(FIND_CODES_ENV
);
194 if (disc
->flags
& FIND_GENERATE
)
196 if (!(fp
= (Find_t
*)vmnewof(vm
, 0, Find_t
, 1, sizeof(Encode_t
) - sizeof(Code_t
))))
202 if (file
&& (!*file
|| streq(file
, "-")))
205 j
= (findcodes
[0] = (char*)file
) && *file
== '/' ? 1 : elementsof(findcodes
);
208 * look for the codes file, but since it may not exist yet,
209 * also look for the containing directory if i<2 or if
210 * it is sufficiently qualified (FIND_MATCH)
213 for (i
= 0; i
< j
; i
++)
214 if (path
= findcodes
[i
])
218 if (!stat(path
, &st
))
220 if (S_ISDIR(st
.st_mode
))
222 for (k
= 0; k
< elementsof(findnames
); k
++)
224 sfsprintf(fp
->encode
.file
, sizeof(fp
->encode
.file
), "%s/%s", path
, findnames
[k
]);
225 if (!eaccess(fp
->encode
.file
, R_OK
|W_OK
))
227 path
= fp
->encode
.file
;
230 if (strchr(findnames
[k
], '/') && (b
= strrchr(fp
->encode
.file
, '/')))
233 if (!stat(fp
->encode
.file
, &st
) && st
.st_uid
== uid
&& (st
.st_mode
& S_IWUSR
))
236 path
= fp
->encode
.file
;
241 if (k
< elementsof(findnames
))
244 else if (st
.st_uid
== uid
&& (st
.st_mode
& S_IWUSR
))
246 sfsprintf(fp
->encode
.file
, sizeof(fp
->encode
.file
), "%s", path
);
247 path
= fp
->encode
.file
;
251 else if (i
< 2 || strmatch(path
, FIND_MATCH
))
253 sfsprintf(fp
->encode
.file
, sizeof(fp
->encode
.file
), "%s", path
);
254 if (b
= strrchr(fp
->encode
.file
, '/'))
257 if (!stat(fp
->encode
.file
, &st
) && st
.st_uid
== uid
&& (st
.st_mode
& S_IWUSR
))
260 path
= fp
->encode
.file
;
266 else if (pathpath(fp
->encode
.file
, path
, "", PATH_REGULAR
|PATH_READ
|PATH_WRITE
))
268 path
= fp
->encode
.file
;
271 else if (b
= strrchr(path
, '/'))
273 sfsprintf(fp
->encode
.file
, sizeof(fp
->encode
.file
), "%-.*s", b
- path
, path
);
274 if (pathpath(fp
->encode
.temp
, fp
->encode
.file
, "", PATH_EXECUTE
|PATH_READ
|PATH_WRITE
) &&
275 !stat(fp
->encode
.temp
, &st
) && st
.st_uid
== uid
&& (st
.st_mode
& S_IWUSR
))
277 sfsprintf(fp
->encode
.file
, sizeof(fp
->encode
.file
), "%s%s", fp
->encode
.temp
, b
);
278 path
= fp
->encode
.file
;
285 if (fp
->disc
->errorf
)
286 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: cannot locate codes", file
? file
: findcodes
[2]);
289 if (fp
->disc
->flags
& FIND_OLD
)
292 * FF_old generates temp data that is read
293 * in a second pass to generate the real codes
297 if (!(fp
->fp
= sftmp(32 * PATH_MAX
)))
299 if (fp
->disc
->errorf
)
300 (*fp
->disc
->errorf
)(fp
, fp
->disc
, ERROR_SYSTEM
|2, "cannot create tmp file");
307 * the rest generate into a temp file that
308 * is simply renamed on completion
311 if (s
= strrchr(path
, '/'))
318 if (!pathtemp(fp
->encode
.temp
, sizeof(fp
->encode
.temp
), p
, "ff", &fd
))
320 if (fp
->disc
->errorf
)
321 (*fp
->disc
->errorf
)(fp
, fp
->disc
, ERROR_SYSTEM
|2, "%s: cannot create tmp file in this directory", p
? p
: ".");
326 if (!(fp
->fp
= sfnew(NiL
, NiL
, (size_t)SF_UNBOUND
, fd
, SF_WRITE
)))
328 if (fp
->disc
->errorf
)
329 (*fp
->disc
->errorf
)(fp
, fp
->disc
, ERROR_SYSTEM
|2, "%s: cannot open tmp file", fp
->encode
.temp
);
333 if (fp
->disc
->flags
& FIND_TYPE
)
336 fp
->encode
.namedisc
.key
= offsetof(Type_t
, name
);
337 fp
->encode
.namedisc
.link
= offsetof(Type_t
, byname
);
338 fp
->encode
.indexdisc
.key
= offsetof(Type_t
, index
);
339 fp
->encode
.indexdisc
.size
= sizeof(unsigned long);
340 fp
->encode
.indexdisc
.link
= offsetof(Type_t
, byindex
);
342 if (!(fp
->encode
.namedict
= dtopen(&fp
->encode
.namedisc
, Dttree
)) || !(fp
->encode
.indexdict
= dtopen(&fp
->encode
.indexdisc
, Dttree
)) || !(tp
= newof(0, Type_t
, 1, strlen(s
) + 1)))
344 if (fp
->encode
.namedict
)
345 dtclose(fp
->encode
.namedict
);
346 if (fp
->disc
->errorf
)
347 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "cannot allocate type table");
352 * type index 1 is always system/dir
355 tp
->index
= ++fp
->types
;
357 dtinsert(fp
->encode
.namedict
, tp
);
358 dtinsert(fp
->encode
.indexdict
, tp
);
360 else if (fp
->disc
->flags
& FIND_GNU
)
364 sfputr(fp
->fp
, FF_gnu_magic
, 0);
370 sfputr(fp
->fp
, FF_dir_magic
, 0);
376 i
= sizeof(Decode_t
) + sizeof(Code_t
);
377 if (!pattern
|| !*pattern
)
379 i
+= (j
= 2 * (strlen(pattern
) + 1));
380 if (!(fp
= (Find_t
*)vmnewof(vm
, 0, Find_t
, 1, i
)))
388 if (disc
->flags
& FIND_ICASE
)
389 fp
->decode
.ignorecase
= 1;
390 j
= (findcodes
[0] = (char*)file
) && *file
== '/' ? 1 : elementsof(findcodes
);
391 for (i
= 0; i
< j
; i
++)
392 if (path
= findcodes
[i
])
396 if (!stat(path
, &st
))
398 if (S_ISDIR(st
.st_mode
))
400 for (k
= 0; k
< elementsof(findnames
); k
++)
402 sfsprintf(fp
->decode
.path
, sizeof(fp
->decode
.path
), "%s/%s", path
, findnames
[k
]);
403 if (fp
->fp
= sfopen(NiL
, fp
->decode
.path
, "r"))
405 path
= fp
->decode
.path
;
412 else if (fp
->fp
= sfopen(NiL
, path
, "r"))
416 else if ((path
= pathpath(fp
->decode
.path
, path
, "", PATH_REGULAR
|PATH_READ
)) && (fp
->fp
= sfopen(NiL
, path
, "r")))
421 if (fp
->disc
->errorf
)
422 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: cannot locate codes", file
? file
: findcodes
[2]);
425 if (fstat(sffileno(fp
->fp
), &st
))
427 if (fp
->disc
->errorf
)
428 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: cannot stat codes", path
);
431 if (fp
->secure
= ((st
.st_mode
& (S_IRGRP
|S_IROTH
)) == S_IRGRP
) && st
.st_gid
== getegid() && getegid() != getgid())
433 fp
->stamp
= st
.st_mtime
;
434 b
= (s
= fp
->decode
.temp
) + 1;
435 for (i
= 0; i
< elementsof(fp
->decode
.bigram1
); i
++)
437 if ((j
= sfgetc(fp
->fp
)) == EOF
)
439 if (!(*s
++ = fp
->decode
.bigram1
[i
] = j
) && i
)
444 if ((j
= sfgetc(fp
->fp
)) == EOF
)
446 if (!(*s
++ = fp
->decode
.bigram2
[i
] = j
) && (i
|| fp
->decode
.bigram1
[0] >= '0' && fp
->decode
.bigram1
[0] <= '1'))
449 if (streq(b
, FF_typ_magic
))
453 type
= (const char*)typefix(fp
->decode
.bigram2
, sizeof(fp
->decode
.bigram2
), type
);
454 memset(fp
->decode
.bigram1
, 0, sizeof(fp
->decode
.bigram1
));
457 for (j
= 0, i
= 1;; i
++)
459 if (!(s
= sfgetr(fp
->fp
, 0, 0)))
463 if (type
&& strmatch(s
, type
))
473 else if (streq(b
, FF_dir_magic
))
475 else if (streq(b
, FF_gnu_magic
))
477 else if (!*b
&& *--b
>= '0' && *b
<= '1')
480 while (j
= sfgetc(fp
->fp
))
482 if (j
== EOF
|| fp
->decode
.count
>= sizeof(fp
->decode
.path
))
484 fp
->decode
.path
[fp
->decode
.count
++] = j
;
492 if ((j
= sfgetc(fp
->fp
)) == EOF
)
494 fp
->decode
.bigram2
[i
= -i
] = j
;
496 while (++i
< elementsof(fp
->decode
.bigram1
))
498 if ((j
= sfgetc(fp
->fp
)) == EOF
)
500 fp
->decode
.bigram1
[i
] = j
;
501 if ((j
= sfgetc(fp
->fp
)) == EOF
)
503 fp
->decode
.bigram2
[i
] = j
;
505 if ((fp
->decode
.peek
= sfgetc(fp
->fp
)) != FF_OFF
)
510 * set up the physical dir table
513 if (disc
->version
>= 19980301L)
515 fp
->verifyf
= disc
->verifyf
;
516 if (disc
->dirs
&& *disc
->dirs
)
518 for (k
= 0; disc
->dirs
[k
]; k
++);
519 if (k
== 1 && streq(disc
->dirs
[0], "/"))
523 if (!(fp
->dirs
= vmnewof(fp
->vm
, 0, char*, 2 * k
+ 1, 0)))
525 if (!(fp
->lens
= vmnewof(fp
->vm
, 0, int, 2 * k
, 0)))
529 j
= fp
->method
== FF_old
|| fp
->method
== FF_gnu
;
532 * fill the dir list with logical and
533 * physical names since we don't know
534 * which way the db was encoded (it
535 * could be *both* ways)
538 for (i
= q
= 0; i
< k
; i
++)
540 if (*(s
= disc
->dirs
[i
]) == '/')
541 sfsprintf(b
, sizeof(fp
->decode
.temp
) - 1, "%s", s
);
542 else if (!p
&& !(p
= getcwd(fp
->decode
.path
, sizeof(fp
->decode
.path
))))
545 sfsprintf(b
, sizeof(fp
->decode
.temp
) - 1, "%s/%s", p
, s
);
549 if (!(fp
->dirs
[q
] = vmstrdup(fp
->vm
, b
)))
552 (fp
->dirs
[q
])[s
- b
] = 0;
555 s
= pathcanon(b
, PATH_PHYSICAL
);
558 if (!strneq(b
, fp
->dirs
[q
- 1], s
- b
))
560 if (!(fp
->dirs
[q
] = vmstrdup(fp
->vm
, b
)))
563 (fp
->dirs
[q
])[s
- b
] = 0;
567 strsort(fp
->dirs
, q
, strcasecmp
);
568 for (i
= 0; i
< q
; i
++)
569 fp
->lens
[i
] = strlen(fp
->dirs
[i
]);
573 if (fp
->verifyf
|| (disc
->flags
& FIND_VERIFY
))
575 if (fp
->method
!= FF_dir
&& fp
->method
!= FF_typ
)
577 if (fp
->disc
->errorf
)
578 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: %s code format does not support directory verification", path
, fp
->method
== FF_gnu
? FF_gnu_magic
: "OLD-BIGRAM");
585 * extract last glob-free subpattern in name for fast pre-match
586 * prepend 0 for backwards match
589 if (p
= s
= (char*)pattern
)
591 b
= fp
->decode
.pattern
;
623 if (!brace
&& paren
> 0 && !--paren
)
628 if (!brace
&& !paren
)
643 if (s
!= pattern
&& !streq(pattern
, "*"))
645 fp
->decode
.match
= 1;
646 if (i
= regcomp(&fp
->decode
.re
, pattern
, REG_SHELL
|REG_AUGMENTED
|(fp
->decode
.ignorecase
?REG_ICASE
:0)))
650 regerror(i
, &fp
->decode
.re
, fp
->decode
.temp
, sizeof(fp
->decode
.temp
));
651 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: %s", pattern
, fp
->decode
.temp
);
663 if (fp
->decode
.ignorecase
)
664 for (s
= fp
->decode
.pattern
; s
<= b
; s
++)
673 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "out of space");
683 if (fp
->disc
->errorf
)
684 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: invalid codes", path
);
686 if (!fp
->generate
&& fp
->decode
.match
)
687 regfree(&fp
->decode
.re
);
695 * return the next fastfind path
696 * 0 returned when list exhausted
700 findread(register Find_t
* fp
)
717 if (fp
->decode
.restore
)
719 *fp
->decode
.restore
= '/';
720 fp
->decode
.restore
= 0;
722 ignorecase
= fp
->decode
.ignorecase
? STR_ICASE
: 0;
734 if ((c
= sfgetc(fp
->fp
)) == EOF
)
738 if ((c
= sfgetc(fp
->fp
)) == EOF
)
741 if ((c
= sfgetc(fp
->fp
)) == EOF
)
745 n
= (n
- 0xffff) - 1;
747 else if ((n
= c
) & 0x80)
755 p
= fp
->decode
.path
+ (fp
->decode
.count
+= n
);
758 if ((c
= sfgetc(fp
->fp
)) == EOF
)
771 if (sfread(fp
->fp
, w
, sizeof(w
)) != sizeof(w
))
773 if (fp
->decode
.swap
>= 0)
775 c
= (int32_t)((w
[0] << 24) | (w
[1] << 16) | (w
[2] << 8) | w
[3]);
776 if (!fp
->decode
.swap
)
779 * the old format uses machine
780 * byte order; this test uses
781 * the smallest magnitude of
782 * both byte orders on the
783 * first encoded path motion
784 * to determine the original
791 n
= (int32_t)((w
[3] << 24) | (w
[2] << 16) | (w
[1] << 8) | w
[0]);
798 fp
->decode
.swap
= -1;
799 c
= (int32_t)((w
[3] << 24) | (w
[2] << 16) | (w
[1] << 8) | w
[0]);
804 c
= (int32_t)((w
[3] << 24) | (w
[2] << 16) | (w
[1] << 8) | w
[0]);
806 fp
->decode
.count
+= c
- FF_OFF
;
807 for (p
= fp
->decode
.path
+ fp
->decode
.count
; (c
= sfgetc(fp
->fp
)) > FF_ESC
;)
808 if (c
& (1<<(CHAR_BIT
-1)))
810 *p
++ = fp
->decode
.bigram1
[c
& ((1<<(CHAR_BIT
-1))-1)];
811 *p
++ = fp
->decode
.bigram2
[c
& ((1<<(CHAR_BIT
-1))-1)];
820 if (fp
->decode
.found
)
821 fp
->decode
.found
= 0;
823 b
+= fp
->decode
.count
;
831 * use the ordering and lengths to prune
832 * comparison function calls
833 * (*fp->dirs)[*fp->lens]=='/' if its
834 * already been matched
837 if ((n
= p
- fp
->decode
.path
+ 1) > (m
= *fp
->lens
))
841 if (!strncasecmp(*fp
->dirs
, fp
->decode
.path
, m
))
848 if (!(n
= strcasecmp(*fp
->dirs
, fp
->decode
.path
)) && (ignorecase
|| !strcmp(*fp
->dirs
, fp
->decode
.path
)))
852 (*fp
->dirs
)[m
] = '/';
853 if ((*fp
->dirs
)[m
- 1] != '/')
854 (*fp
->dirs
)[++(*fp
->lens
)] = '/';
862 else if (!(*fp
->dirs
)[m
])
867 if (fp
->verify
&& (*p
== '/' || t
== 1))
869 if ((n
= p
- fp
->decode
.path
))
874 n
= (*fp
->verifyf
)(fp
, fp
->decode
.path
, n
, fp
->disc
);
875 else if (stat(fp
->decode
.path
, &st
))
877 else if ((unsigned long)st
.st_mtime
> fp
->stamp
)
884 * n<0 skip this subtree
886 * n>0 read this dir now
889 /* NOT IMPLEMENTED YET */
891 if (FF_OK_TYPE(fp
, t
))
897 if (*fp
->decode
.pattern
== '/' && b
> fp
->decode
.path
)
900 if (*s
== *fp
->decode
.end
|| ignorecase
&& tolower(*s
) == *fp
->decode
.end
)
903 for (e
= fp
->decode
.end
- 1, q
= s
- 1; *e
&& (*q
== *e
|| tolower(*q
) == *e
); e
--, q
--);
905 for (e
= fp
->decode
.end
- 1, q
= s
- 1; *e
&& *q
== *e
; e
--, q
--);
908 fp
->decode
.found
= 1;
909 if (!fp
->decode
.match
|| strgrpmatch(fp
->decode
.path
, fp
->decode
.pattern
, NiL
, 0, STR_MAXIMAL
|STR_LEFT
|STR_RIGHT
|ignorecase
))
913 *(fp
->decode
.restore
= p
) = 0;
914 if (!fp
->secure
|| !access(fp
->decode
.path
, F_OK
))
915 return fp
->decode
.path
;
921 else if (!fp
->decode
.match
|| !(n
= regexec(&fp
->decode
.re
, fp
->decode
.path
, 0, NiL
, 0)))
924 if (*p
== '/' && p
> fp
->decode
.path
)
925 *(fp
->decode
.restore
= p
) = 0;
926 if (!fp
->secure
|| !access(fp
->decode
.path
, F_OK
))
927 return fp
->decode
.path
;
929 else if (n
!= REG_NOMATCH
)
931 if (fp
->disc
->errorf
)
933 regerror(n
, &fp
->decode
.re
, fp
->decode
.temp
, sizeof(fp
->decode
.temp
));
934 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: %s", fp
->decode
.pattern
, fp
->decode
.temp
);
943 * add path to the code table
944 * paths are assumed to be in sort order
948 findwrite(register Find_t
* fp
, const char* path
, size_t len
, const char* type
)
950 register unsigned char* s
;
951 register unsigned char* e
;
952 register unsigned char* p
;
956 register unsigned long u
;
960 if (type
&& fp
->method
== FF_dir
)
962 len
= sfsprintf(fp
->encode
.mark
, sizeof(fp
->encode
.mark
), "%-.*s/", len
, path
);
963 path
= fp
->encode
.mark
;
965 s
= (unsigned char*)path
;
968 if (len
< sizeof(fp
->encode
.path
))
972 len
= sizeof(fp
->encode
.path
) - 1;
975 p
= (unsigned char*)fp
->encode
.path
;
982 n
= s
- (unsigned char*)path
;
986 d
= n
- fp
->encode
.prefix
;
987 if (d
>= -127 && d
<= 127)
988 sfputc(fp
->fp
, d
& 0xff);
991 sfputc(fp
->fp
, 0x80);
992 sfputc(fp
->fp
, (d
>> 8) & 0xff);
993 sfputc(fp
->fp
, d
& 0xff);
995 fp
->encode
.prefix
= n
;
996 sfputr(fp
->fp
, (char*)s
, 0);
999 sfprintf(fp
->fp
, "%ld", n
- fp
->encode
.prefix
+ FF_OFF
);
1000 fp
->encode
.prefix
= n
;
1001 sfputc(fp
->fp
, ' ');
1008 fp
->encode
.code
[n
][*s
++]++;
1012 if ((n
= *p
++) < FF_MIN
|| n
>= FF_MAX
)
1021 type
= (const char*)typefix((char*)fp
->encode
.bigram
, sizeof(fp
->encode
.bigram
), type
);
1022 if (x
= (Type_t
*)dtmatch(fp
->encode
.namedict
, type
))
1024 else if (!(x
= newof(0, Type_t
, 1, strlen(type
) + 1)))
1028 u
= x
->index
= ++fp
->types
;
1029 strcpy(x
->name
, type
);
1030 dtinsert(fp
->encode
.namedict
, x
);
1031 dtinsert(fp
->encode
.indexdict
, x
);
1039 d
= n
- fp
->encode
.prefix
;
1041 fp
->encode
.prefix
= n
;
1042 sfputr(fp
->fp
, (char*)s
, 0);
1045 memcpy(fp
->encode
.path
, path
, len
);
1054 finddone(register Find_t
* fp
)
1060 if (fp
->disc
->errorf
)
1061 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: write error [sfsync]", fp
->encode
.file
);
1064 if (sferror(fp
->fp
))
1066 if (fp
->disc
->errorf
)
1067 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: write error [sferror]", fp
->encode
.file
);
1070 r
= sfclose(fp
->fp
);
1074 if (fp
->disc
->errorf
)
1075 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: write error [sfclose]", fp
->encode
.file
);
1082 * finish the code table
1086 findsync(register Find_t
* fp
)
1103 * replace the real file with the temp file
1108 remove(fp
->encode
.file
);
1109 if (rename(fp
->encode
.temp
, fp
->encode
.file
))
1111 if (fp
->disc
->errorf
)
1112 (*fp
->disc
->errorf
)(fp
, fp
->disc
, ERROR_SYSTEM
|2, "%s: cannot rename from tmp file %s", fp
->encode
.file
, fp
->encode
.temp
);
1113 remove(fp
->encode
.temp
);
1119 * determine the top FF_MAX bigrams
1122 for (n
= 0; n
< FF_MAX
; n
++)
1123 for (m
= 0; m
< FF_MAX
; m
++)
1124 fp
->encode
.hits
[fp
->encode
.code
[n
][m
]]++;
1125 fp
->encode
.hits
[0] = 0;
1127 for (n
= USHRT_MAX
; n
>= 0; n
--)
1128 if (d
= fp
->encode
.hits
[n
])
1130 fp
->encode
.hits
[n
] = m
;
1131 if ((m
+= d
) > FF_MAX
)
1135 fp
->encode
.hits
[n
] = 0;
1136 for (n
= FF_MAX
- 1; n
>= 0; n
--)
1137 for (m
= FF_MAX
- 1; m
>= 0; m
--)
1138 if (fp
->encode
.hits
[fp
->encode
.code
[n
][m
]])
1140 d
= fp
->encode
.code
[n
][m
];
1141 b
= fp
->encode
.hits
[d
] - 1;
1142 fp
->encode
.code
[n
][m
] = b
+ FF_MAX
;
1143 if (fp
->encode
.hits
[d
]++ >= FF_MAX
)
1144 fp
->encode
.hits
[d
] = 0;
1145 fp
->encode
.bigram
[b
*= 2] = n
;
1146 fp
->encode
.bigram
[b
+ 1] = m
;
1149 fp
->encode
.code
[n
][m
] = 0;
1152 * commit the real file
1155 if (sfseek(fp
->fp
, (Sfoff_t
)0, SEEK_SET
))
1157 if (fp
->disc
->errorf
)
1158 (*fp
->disc
->errorf
)(fp
, fp
->disc
, ERROR_SYSTEM
|2, "cannot rewind tmp file");
1161 if (!(sp
= sfopen(NiL
, fp
->encode
.file
, "w")))
1168 sfwrite(sp
, fp
->encode
.bigram
, sizeof(fp
->encode
.bigram
));
1171 * encode the massaged paths
1174 while (s
= sfgetr(fp
->fp
, 0, 0))
1176 z
= strtol(s
, &t
, 0);
1178 if (z
< 0 || z
> 2 * FF_OFF
)
1181 sfputc(sp
, (z
>> 24));
1182 sfputc(sp
, (z
>> 16));
1183 sfputc(sp
, (z
>> 8));
1195 if (d
= fp
->encode
.code
[n
][m
])
1212 if (!(fp
->fp
= sfopen(NiL
, fp
->encode
.temp
, "r")))
1214 if (fp
->disc
->errorf
)
1215 (*fp
->disc
->errorf
)(fp
, fp
->disc
, ERROR_SYSTEM
|2, "%s: cannot read tmp file", fp
->encode
.temp
);
1216 remove(fp
->encode
.temp
);
1221 * commit the output file
1224 if (!(sp
= sfopen(NiL
, fp
->encode
.file
, "w")))
1228 * write the header magic
1232 sfputr(sp
, FF_typ_magic
, 0);
1235 * write the type table in index order starting with 1
1238 for (x
= (Type_t
*)dtfirst(fp
->encode
.indexdict
); x
; x
= (Type_t
*)dtnext(fp
->encode
.indexdict
, x
))
1239 sfputr(sp
, x
->name
, 0);
1243 * append the front compressed strings
1246 if (sfmove(fp
->fp
, sp
, SF_UNBOUND
, -1) < 0 || !sfeof(fp
->fp
))
1249 if (fp
->disc
->errorf
)
1250 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: cannot append codes", fp
->encode
.file
);
1257 remove(fp
->encode
.temp
);
1262 if (fp
->disc
->errorf
)
1263 (*fp
->disc
->errorf
)(fp
, fp
->disc
, 2, "%s: cannot write codes", fp
->encode
.file
);
1270 remove(fp
->encode
.temp
);
1275 * close an open fastfind stream
1279 findclose(register Find_t
* fp
)
1288 if (fp
->encode
.indexdict
)
1289 dtclose(fp
->encode
.indexdict
);
1290 if (fp
->encode
.namedict
)
1291 dtclose(fp
->encode
.namedict
);
1295 if (fp
->decode
.match
)
1296 regfree(&fp
->decode
.re
);