From: Apple Date: Thu, 12 Apr 2007 22:08:58 +0000 (+0000) Subject: shell_cmds-81.1.tar.gz X-Git-Tag: mac-os-x-10411x86^0 X-Git-Url: https://git.saurik.com/apple/shell_cmds.git/commitdiff_plain/e407a76b5d5c20d98a7c128a4f9b6499da2be298 shell_cmds-81.1.tar.gz --- diff --git a/locate/bigram/Makefile b/locate/bigram/Makefile index 3916131..21039bf 100644 --- a/locate/bigram/Makefile +++ b/locate/bigram/Makefile @@ -26,6 +26,7 @@ DEBUG_LIBS = $(LIBS) PROF_LIBS = $(LIBS) +HEADER_PATHS = -I../locate NEXTSTEP_BUILD_OUTPUT_DIR = /tmp/$(NAME)/Build diff --git a/locate/bigram/locate.bigram.c b/locate/bigram/locate.bigram.c index 89a87eb..a4ad2b1 100644 --- a/locate/bigram/locate.bigram.c +++ b/locate/bigram/locate.bigram.c @@ -1,6 +1,5 @@ -/* $NetBSD: locate.bigram.c,v 1.6 1997/10/19 04:11:52 lukem Exp $ */ - /* + * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -34,62 +33,78 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $FreeBSD: src/usr.bin/locate/bigram/locate.bigram.c,v 1.12 2005/02/09 09:13:36 stefanf Exp $ */ -#include #ifndef lint -__COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ - The Regents of the University of California. All rights reserved.\n"); +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint -#if 0 -static char sccsid[] = "@(#)locate.bigram.c 8.2 (Berkeley) 4/28/95"; -#endif -__RCSID("$NetBSD: locate.bigram.c,v 1.6 1997/10/19 04:11:52 lukem Exp $"); +static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ /* - * bigram < text > bigrams - * + * bigram < sorted_file_names | sort -nr | + * awk 'NR <= 128 { printf $2 }' > bigrams + * * List bigrams for 'updatedb' script. * Use 'code' to encode a file using this output. */ #include +#include #include /* for MAXPATHLEN */ +#include "locate.h" -char buf1[MAXPATHLEN] = " "; -char buf2[MAXPATHLEN]; - -int main __P((int, char **)); +u_char buf1[MAXPATHLEN] = " "; +u_char buf2[MAXPATHLEN]; +u_int bigram[UCHAR_MAX + 1][UCHAR_MAX + 1]; int -main(argc, argv) - int argc; - char *argv[]; +main(void) { - char *cp; - char *oldpath = buf1, *path = buf2; + register u_char *cp; + register u_char *oldpath = buf1, *path = buf2; + register u_int i, j; + + while (fgets(path, sizeof(buf2), stdin) != NULL) { + + /* + * We don't need remove newline character '\n'. + * '\n' is less than ASCII_MIN and will be later + * ignored at output. + */ - while ( fgets ( path, sizeof(buf2), stdin ) != NULL ) { /* skip longest common prefix */ - for ( cp = path; *cp == *oldpath; cp++, oldpath++ ) - if ( *oldpath == '\0' ) + for (cp = path; *cp == *oldpath; cp++, oldpath++) + if (*cp == '\0') break; - /* - * output post-residue bigrams only - */ - while ( *cp != '\0' && *(cp + 1) != '\0' ) { - putchar ( *cp++ ); - putchar ( *cp++ ); - putchar ( '\n' ); + + while (*cp != '\0' && *(cp + 1) != '\0') { + bigram[(u_char)*cp][(u_char)*(cp + 1)]++; + cp += 2; + } + + /* swap pointers */ + if (path == buf1) { + path = buf2; + oldpath = buf1; + } else { + path = buf1; + oldpath = buf2; } - if ( path == buf1 ) /* swap pointers */ - path = buf2, oldpath = buf1; - else - path = buf1, oldpath = buf2; } - return (0); + + /* output, boundary check */ + for (i = ASCII_MIN; i <= ASCII_MAX; i++) + for (j = ASCII_MIN; j <= ASCII_MAX; j++) + if (bigram[i][j] != 0) + (void)printf("%4u %c%c\n", bigram[i][j], i, j); + + exit(0); } diff --git a/locate/code/locate.code.c b/locate/code/locate.code.c index c3a262e..9f7850e 100644 --- a/locate/code/locate.code.c +++ b/locate/code/locate.code.c @@ -1,6 +1,5 @@ -/* $NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $ */ - /* + * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -34,19 +33,18 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. + * + * $FreeBSD: src/usr.bin/locate/code/locate.code.c,v 1.13 2002/03/22 01:22:47 imp Exp $ */ -#include #ifndef lint -__COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ - The Regents of the University of California. All rights reserved.\n"); +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint -#if 0 -static char sccsid[] = "@(#)locate.code.c 8.4 (Berkeley) 5/4/95"; -#endif -__RCSID("$NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $"); +static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ /* @@ -75,48 +73,66 @@ __RCSID("$NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $"); * * 0-28 likeliest differential counts + offset to make nonnegative * 30 switch code for out-of-range count to follow in next word + * 31 an 8 bit char followed * 128-255 bigram codes (128 most common, as determined by 'updatedb') * 32-127 single character (printable) ascii residue (ie, literal) * - * SEE ALSO: updatedb.csh, bigram.c + * The locate database store any character except newline ('\n') + * and NUL ('\0'). The 8-bit character support don't wast extra + * space until you have characters in file names less than 32 + * or greather than 127. + * + * + * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c * * AUTHOR: James A. Woods, Informatics General Corp., * NASA Ames Research Center, 10/82 + * 8-bit file names characters: + * Wolfram Schneider, Berlin September 1996 */ #include - #include #include -#include #include #include +#include #include - #include "locate.h" #define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ -char buf1[MAXPATHLEN] = " "; -char buf2[MAXPATHLEN]; -char bigrams[BGBUFSIZE + 1] = { 0 }; +u_char buf1[MAXPATHLEN] = " "; +u_char buf2[MAXPATHLEN]; +u_char bigrams[BGBUFSIZE + 1] = { 0 }; + +#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */ -int bgindex __P((char *)); -int main __P((int, char **)); -void usage __P((void)); +#ifdef LOOKUP +#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)]) +typedef short bg_t; +bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1]; +#else +#define BGINDEX(x) bgindex(x) +typedef int bg_t; +int bgindex(char *); +#endif /* LOOKUP */ + + +void usage(void); int main(argc, argv) int argc; char *argv[]; { - char *cp, *oldpath, *path; + register u_char *cp, *oldpath, *path; int ch, code, count, diffcount, oldcount; FILE *fp; + register int i, j; while ((ch = getopt(argc, argv, "")) != -1) switch(ch) { - case '?': default: usage(); } @@ -131,30 +147,51 @@ main(argc, argv) /* First copy bigram array to stdout. */ (void)fgets(bigrams, BGBUFSIZE + 1, fp); + if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE) err(1, "stdout"); (void)fclose(fp); +#ifdef LOOKUP + /* init lookup table */ + for (i = 0; i < UCHAR_MAX + 1; i++) + for (j = 0; j < UCHAR_MAX + 1; j++) + big[i][j] = (bg_t)-1; + + for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2) + big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i; + +#endif /* LOOKUP */ + oldpath = buf1; path = buf2; oldcount = 0; + while (fgets(path, sizeof(buf2), stdin) != NULL) { - /* Truncate newline. */ - cp = path + strlen(path) - 1; - if (cp > path && *cp == '\n') - *cp = '\0'; - /* Squelch characters that would botch the decoding. */ + /* skip empty lines */ + if (*path == '\n') + continue; + + /* remove newline */ for (cp = path; *cp != '\0'; cp++) { - *cp &= PARITY-1; - if (*cp <= SWITCH) +#ifndef LOCATE_CHAR30 + /* old locate implementations core'd for char 30 */ + if (*cp == SWITCH) *cp = '?'; + else +#endif /* !LOCATE_CHAR30 */ + + /* chop newline */ + if (*cp == '\n') + *cp = '\0'; } /* Skip longest common prefix. */ for (cp = path; *cp == *oldpath; cp++, oldpath++) - if (*oldpath == '\0') + if (*cp == '\0') break; + count = cp - path; diffcount = count - oldcount + OFFSET; oldcount = count; @@ -167,22 +204,42 @@ main(argc, argv) err(1, "stdout"); while (*cp != '\0') { - if (*(cp + 1) == '\0') { - if (putchar(*cp) == EOF) - err(1, "stdout"); - break; - } - if ((code = bgindex(cp)) < 0) { - if (putchar(*cp++) == EOF || - putchar(*cp++) == EOF) - err(1, "stdout"); - } else { - /* Found, so mark byte with parity bit. */ + /* print *two* characters */ + + if ((code = BGINDEX(cp)) != (bg_t)-1) { + /* + * print *one* as bigram + * Found, so mark byte with + * parity bit. + */ if (putchar((code / 2) | PARITY) == EOF) err(1, "stdout"); cp += 2; } + + else { + for (i = 0; i < 2; i++) { + if (*cp == '\0') + break; + + /* print umlauts in file names */ + if (*cp < ASCII_MIN || + *cp > ASCII_MAX) { + if (putchar(UMLAUT) == EOF || + putchar(*cp++) == EOF) + err(1, "stdout"); + } + + else { + /* normal character */ + if(putchar(*cp++) == EOF) + err(1, "stdout"); + } + } + + } } + if (path == buf1) { /* swap pointers */ path = buf2; oldpath = buf1; @@ -197,19 +254,21 @@ main(argc, argv) exit(0); } +#ifndef LOOKUP int bgindex(bg) /* Return location of bg in bigrams or -1. */ char *bg; { - char bg0, bg1, *p; + register char bg0, bg1, *p; bg0 = bg[0]; bg1 = bg[1]; - for (p = bigrams; *p != '\0'; p++) + for (p = bigrams; *p != NULL; p++) if (*p++ == bg0 && *p == bg1) break; - return (*p == '\0' ? -1 : --p - bigrams); + return (*p == NULL ? -1 : (--p - bigrams)); } +#endif /* !LOOKUP */ void usage() diff --git a/locate/locate/Makefile b/locate/locate/Makefile index 8682185..7980aca 100644 --- a/locate/locate/Makefile +++ b/locate/locate/Makefile @@ -14,10 +14,12 @@ PROJECT_TYPE = Tool HFILES = locate.h pathnames.h -CFILES = locate.c +CFILES = util.c locate.c -OTHERSRCS = Makefile Makefile.preamble Makefile.postamble locate.1\ - updatedb.csh locate.updatedb.8 +OTHERSRCS = Makefile Makefile.preamble Makefile.postamble \ + fastfind.c \ + locate.1 locate.updatedb.8 locate.rc \ + concatdb.sh mklocatedb.sh updatedb.sh MAKEFILEDIR = $(MAKEFILEPATH)/pb_makefiles diff --git a/locate/locate/Makefile.postamble b/locate/locate/Makefile.postamble index abf00eb..0ea2a99 100644 --- a/locate/locate/Makefile.postamble +++ b/locate/locate/Makefile.postamble @@ -1,5 +1,10 @@ include $(CoreOSMakefiles)/ProjectBuilder/Makefile.Postamble.Common after_install:: - $(CP) updatedb.csh $(DSTROOT)/usr/libexec/locate.updatedb - $(CHMOD) 555 $(DSTROOT)/usr/libexec/locate.updatedb + $(INSTALL_SCRIPT) concatdb.sh $(DSTROOT)/usr/libexec/locate.concatdb + $(INSTALL_SCRIPT) mklocatedb.sh $(DSTROOT)/usr/libexec/locate.mklocatedb + $(INSTALL_SCRIPT) updatedb.sh $(DSTROOT)/usr/libexec/locate.updatedb + $(LN) $(DSTROOT)/usr/share/man/man8/locate.updatedb.8 $(DSTROOT)/usr/share/man/man8/locate.concatdb.8 + $(LN) $(DSTROOT)/usr/share/man/man8/locate.updatedb.8 $(DSTROOT)/usr/share/man/man8/locate.mklocatedb.8 + $(MKDIR) $(DSTROOT)$(ETCDIR) + $(INSTALL_FILE) locate.rc $(DSTROOT)$(ETCDIR) diff --git a/locate/locate/concatdb.sh b/locate/locate/concatdb.sh new file mode 100644 index 0000000..a236332 --- /dev/null +++ b/locate/locate/concatdb.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# concatdb - concatenate locate databases +# +# usage: concatdb database1 ... databaseN > newdb +# +# Sequence of databases is important. +# +# $FreeBSD: src/usr.bin/locate/locate/concatdb.sh,v 1.10 2000/01/12 08:01:00 kris Exp $ + +# The directory containing locate subprograms +: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR + +PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH + +umask 077 # protect temp files + +: ${TMPDIR:=/var/tmp}; export TMPDIR; +test -d "$TMPDIR" || TMPDIR=/var/tmp + +# utilities to built locate database +: ${bigram:=locate.bigram} +: ${code:=locate.code} +: ${sort:=sort} +: ${locate:=locate} + + +case $# in + [01]) echo 'usage: concatdb databases1 ... databaseN > newdb' + exit 1 + ;; +esac + + +bigrams=`mktemp ${TMPDIR=/tmp}/_bigrams.XXXXXXXXXX` || exit 1 +trap 'rm -f $bigrams' 0 1 2 3 5 10 15 + +for db +do + $locate -d $db / +done | $bigram | $sort -nr | awk 'NR <= 128 { printf $2 }' > $bigrams + +for db +do + $locate -d $db / +done | $code $bigrams diff --git a/locate/locate/fastfind.c b/locate/locate/fastfind.c new file mode 100644 index 0000000..9383f07 --- /dev/null +++ b/locate/locate/fastfind.c @@ -0,0 +1,330 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/usr.bin/locate/locate/fastfind.c,v 1.14 2005/12/07 12:22:46 des Exp $ + */ + + +#ifndef _LOCATE_STATISTIC_ +#define _LOCATE_STATISTIC_ + +void +statistic (fp, path_fcodes) + FILE *fp; /* open database */ + char *path_fcodes; /* for error message */ +{ + register int lines, chars, size, big, zwerg; + register u_char *p, *s; + register int c; + int count, umlaut; + u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } + + lines = chars = big = zwerg = umlaut = 0; + size = NBG + NBG; + + for (c = getc(fp), count = 0; c != EOF; size++) { + if (c == SWITCH) { + count += getwf(fp) - OFFSET; + size += sizeof(int); + zwerg++; + } else + count += c - OFFSET; + + for (p = path + count; (c = getc(fp)) > SWITCH; size++) + if (c < PARITY) { + if (c == UMLAUT) { + c = getc(fp); + size++; + umlaut++; + } + p++; + } else { + /* bigram char */ + big++; + p += 2; + } + + p++; + lines++; + chars += (p - path); + } + + (void)printf("\nDatabase: %s\n", path_fcodes); + (void)printf("Compression: Front: %2.2f%%, ", + (size + big - (2 * NBG)) / (chars / (float)100)); + (void)printf("Bigram: %2.2f%%, ", (size - big) / (size / (float)100)); + (void)printf("Total: %2.2f%%\n", + (size - (2 * NBG)) / (chars / (float)100)); + (void)printf("Filenames: %d, ", lines); + (void)printf("Characters: %d, ", chars); + (void)printf("Database size: %d\n", size); + (void)printf("Bigram characters: %d, ", big); + (void)printf("Integers: %d, ", zwerg); + (void)printf("8-Bit characters: %d\n", umlaut); + +} +#endif /* _LOCATE_STATISTIC_ */ + +extern char separator; + +void +#ifdef FF_MMAP + + +#ifdef FF_ICASE +fastfind_mmap_icase +#else +fastfind_mmap +#endif /* FF_ICASE */ +(pathpart, paddr, len, database) + char *pathpart; /* search string */ + caddr_t paddr; /* mmap pointer */ + int len; /* length of database */ + char *database; /* for error message */ + + +#else /* MMAP */ + + +#ifdef FF_ICASE +fastfind_icase +#else +fastfind +#endif /* FF_ICASE */ + +(fp, pathpart, database) + FILE *fp; /* open database */ + char *pathpart; /* search string */ + char *database; /* for error message */ + + +#endif /* MMAP */ + +{ + register u_char *p, *s, *patend, *q, *foundchar; + register int c, cc; + int count, found, globflag; + u_char *cutoff; + u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; + +#ifdef FF_ICASE + /* use a lookup table for case insensitive search */ + u_char table[UCHAR_MAX + 1]; + + tolower_word(pathpart); +#endif /* FF_ICASE*/ + + /* init bigram table */ +#ifdef FF_MMAP + if (len < (2*NBG)) + errx(1, "database too small: %s", database); + + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) { + p[c] = check_bigram_char(*paddr++); + s[c] = check_bigram_char(*paddr++); + } +#else + for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) { + p[c] = check_bigram_char(getc(fp)); + s[c] = check_bigram_char(getc(fp)); + } +#endif /* FF_MMAP */ + + /* find optimal (last) char for searching */ + for (p = pathpart; *p != '\0'; p++) + if (index(LOCATE_REG, *p) != NULL) + break; + + if (*p == '\0') + globflag = 0; + else + globflag = 1; + + p = pathpart; + patend = patprep(p); + cc = *patend; + +#ifdef FF_ICASE + /* set patend char to true */ + for (c = 0; c < UCHAR_MAX + 1; c++) + table[c] = 0; + + table[TOLOWER(*patend)] = 1; + table[toupper(*patend)] = 1; +#endif /* FF_ICASE */ + + + /* main loop */ + found = count = 0; + foundchar = 0; + +#ifdef FF_MMAP + c = (u_char)*paddr++; len--; + for (; len > 0; ) { +#else + c = getc(fp); + for (; c != EOF; ) { +#endif /* FF_MMAP */ + + /* go forward or backward */ + if (c == SWITCH) { /* big step, an integer */ +#ifdef FF_MMAP + count += getwm(paddr) - OFFSET; + len -= INTSIZE; paddr += INTSIZE; +#else + count += getwf(fp) - OFFSET; +#endif /* FF_MMAP */ + } else { /* slow step, =< 14 chars */ + count += c - OFFSET; + } + + /* overlay old path */ + p = path + count; + foundchar = p - 1; + +#ifdef FF_MMAP + for (; len > 0;) { + c = (u_char)*paddr++; + len--; +#else + for (;;) { + c = getc(fp); +#endif /* FF_MMAP */ + /* + * == UMLAUT: 8 bit char followed + * <= SWITCH: offset + * >= PARITY: bigram + * rest: single ascii char + * + * offset < SWITCH < UMLAUT < ascii < PARITY < bigram + */ + if (c < PARITY) { + if (c <= UMLAUT) { + if (c == UMLAUT) { +#ifdef FF_MMAP + c = (u_char)*paddr++; + len--; +#else + c = getc(fp); +#endif /* FF_MMAP */ + + } else + break; /* SWITCH */ + } +#ifdef FF_ICASE + if (table[c]) +#else + if (c == cc) +#endif /* FF_ICASE */ + foundchar = p; + *p++ = c; + } + else { + /* bigrams are parity-marked */ + TO7BIT(c); + +#ifndef FF_ICASE + if (bigram1[c] == cc || + bigram2[c] == cc) +#else + + if (table[bigram1[c]] || + table[bigram2[c]]) +#endif /* FF_ICASE */ + foundchar = p + 1; + + *p++ = bigram1[c]; + *p++ = bigram2[c]; + } + } + + if (found) { /* previous line matched */ + cutoff = path; + *p-- = '\0'; + foundchar = p; + } else if (foundchar >= path + count) { /* a char matched */ + *p-- = '\0'; + cutoff = path + count; + } else /* nothing to do */ + continue; + + found = 0; + for (s = foundchar; s >= cutoff; s--) { + if (*s == cc +#ifdef FF_ICASE + || TOLOWER(*s) == cc +#endif /* FF_ICASE */ + ) { /* fast first char check */ + for (p = patend - 1, q = s - 1; *p != '\0'; + p--, q--) + if (*q != *p +#ifdef FF_ICASE + && TOLOWER(*q) != *p +#endif /* FF_ICASE */ + ) + break; + if (*p == '\0') { /* fast match success */ + found = 1; + if (!globflag || +#ifndef FF_ICASE + !fnmatch(pathpart, path, 0)) +#else + !fnmatch(pathpart, path, + FNM_CASEFOLD)) +#endif /* !FF_ICASE */ + { + if (f_silent) + counter++; + else if (f_limit) { + counter++; + if (f_limit >= counter) + (void)printf("%s%c",path,separator); + else + errx(0, "[show only %d lines]", counter - 1); + } else + (void)printf("%s%c",path,separator); + } + break; + } + } + } + } +} diff --git a/locate/locate/locate.1 b/locate/locate/locate.1 index 8ebdbae..05c036e 100644 --- a/locate/locate/locate.1 +++ b/locate/locate/locate.1 @@ -1,5 +1,4 @@ -.\" $NetBSD: locate.1,v 1.6 1998/08/08 14:49:23 hubertf Exp $ -.\" +.\" Copyright (c) 1995 Wolfram Schneider . Berlin. .\" Copyright (c) 1990, 1993 .\" The Regents of the University of California. All rights reserved. .\" @@ -32,46 +31,182 @@ .\" SUCH DAMAGE. .\" .\" @(#)locate.1 8.1 (Berkeley) 6/6/93 +.\" $FreeBSD: src/usr.bin/locate/locate/locate.1,v 1.34 2006/09/29 15:20:45 ru Exp $ .\" -.Dd June 6, 1993 +.Dd August 17, 2006 .Dt LOCATE 1 -.Os BSD 4.4 +.Os .Sh NAME .Nm locate -.Nd find files +.Nd find filenames quickly .Sh SYNOPSIS .Nm -.Ar pattern +.Op Fl 0Scims +.Op Fl l Ar limit +.Op Fl d Ar database +.Ar pattern ... .Sh DESCRIPTION +The .Nm -searches a database for all pathnames which match the specified -.Ar pattern . -The database is recomputed periodically, and contains the pathnames +program searches a database for all pathnames which match the specified +.Ar pattern . +The database is recomputed periodically (usually weekly or daily), +and contains the pathnames of all files which are publicly accessible. .Pp -Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``['' -and ``]'') +Shell globbing and quoting characters +.Dq ( * , +.Dq \&? , +.Dq \e , +.Dq \&[ +and +.Dq \&] ) may be used in -.Ar pattern , +.Ar pattern , although they will have to be escaped from the shell. -Preceding any character with a backslash (``\e'') eliminates any special +Preceding any character with a backslash +.Pq Dq \e +eliminates any special meaning which it may have. The matching differs in that no characters must be matched explicitly, -including slashes (``/''). +including slashes +.Pq Dq / . +.Pp +As a special case, a pattern containing no globbing characters +.Pq Dq foo +is matched as though it were +.Dq *foo* . +.Pp +Historically, locate only stored characters between 32 and 127. +The +current implementation store any character except newline +.Pq Sq \en +and +.Dv NUL +.Pq Sq \e0 . +The 8-bit character support does not waste extra space for +plain ASCII file names. +Characters less than 32 or greater than 127 +are stored in 2 bytes. +.Pp +The following options are available: +.Bl -tag -width 10n +.It Fl 0 +Print pathnames separated by an +.Tn ASCII +.Dv NUL +character (character code 0) instead of default NL +(newline, character code 10). +.It Fl S +Print some statistics about the database and exit. +.It Fl c +Suppress normal output; instead print a count of matching file names. +.It Fl d Ar database +Search in +.Ar database +instead of the default file name database. +Multiple +.Fl d +options are allowed. +Each additional +.Fl d +option adds the specified database to the list +of databases to be searched. +.Pp +The option +.Ar database +may be a colon-separated list of databases. +A single colon is a reference +to the default database. +.Bd -literal +$ locate -d $HOME/lib/mydb: foo +.Ed +.Pp +will first search string +.Dq foo +in +.Pa $HOME/lib/mydb +and then in +.Pa /var/db/locate.database . +.Bd -literal +$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo +.Ed +.Pp +will first search string +.Dq foo +in +.Pa $HOME/lib/mydb +and then in +.Pa /var/db/locate.database +and then in +.Pa /cdrom/locate.database . +.Pp +.Dl "$ locate -d db1 -d db2 -d db3 pattern" +.Pp +is the same as +.Pp +.Dl "$ locate -d db1:db2:db3 pattern" +.Pp +or +.Pp +.Dl "$ locate -d db1:db2 -d db3 pattern" +.Pp +If +.Fl +is given as the database name, standard input will be read instead. +For example, you can compress your database +and use: +.Bd -literal +$ zcat database.gz | locate -d - pattern +.Ed .Pp -As a special case, a pattern containing no globbing characters (``foo'') -is matched as though it were ``*foo*''. +This might be useful on machines with a fast CPU and little RAM and slow +I/O. +Note: you can only use +.Em one +pattern for stdin. +.It Fl i +Ignore case distinctions in both the pattern and the database. +.It Fl l Ar number +Limit output to +.Ar number +of file names and exit. +.It Fl m +Use +.Xr mmap 2 +instead of the +.Xr stdio 3 +library. +This is the default behavior +and is faster in most cases. +.It Fl s +Use the +.Xr stdio 3 +library instead of +.Xr mmap 2 . +.El +.Sh ENVIRONMENT +.Bl -tag -width LOCATE_PATH -compact +.It Pa LOCATE_PATH +path to the locate database if set and not empty, ignored if the +.Fl d +option was specified. +.El .Sh FILES -.Bl -tag -width /usr/libexec/locate.updatedb -compact +.Bl -tag -width /etc/periodic/weekly/310.locate -compact .It Pa /var/db/locate.database -Database -.It /usr/libexec/locate.updatedb -Script to update database. +locate database +.It Pa /usr/libexec/locate.updatedb +Script to update the locate database +.It Pa /etc/periodic/weekly/310.locate +Script that starts the database rebuild .El .Sh SEE ALSO .Xr find 1 , +.Xr whereis 1 , +.Xr which 1 , .Xr fnmatch 3 , -.Xr weekly.conf 5 +.Xr locate.updatedb 8 .Rs .%A Woods, James A. .%D 1983 @@ -83,5 +218,59 @@ Script to update database. .Sh HISTORY The .Nm -command appears in +command first appeared in .Bx 4.4 . +Many new features were +added in +.Fx 2.2 . +.Sh BUGS +The +.Nm +program may fail to list some files that are present, or may +list files that have been removed from the system. +This is because +locate only reports files that are present in the database, which is +typically only regenerated once a week by the +.Pa /etc/periodic/weekly/310.locate +script. +Use +.Xr find 1 +to locate files that are of a more transitory nature. +.Pp +The +.Nm +database is typically built by user +.Dq nobody +and the +.Xr locate.updatedb 8 +utility skips directories +which are not readable for user +.Dq nobody , +group +.Dq nobody , +or +world. +For example, if your HOME directory is not world-readable, +.Em none +of your files are +in the database. +.Pp +The +.Nm +database is not byte order independent. +It is not possible +to share the databases between machines with different byte order. +The current +.Nm +implementation understands databases in host byte order or +network byte order if both architectures use the same integer size. +So on a +.Fx Ns /i386 +machine +(little endian), you can read +a locate database which was built on SunOS/sparc machine +(big endian, net). +.Pp +The +.Nm +utility does not recognize multibyte characters. diff --git a/locate/locate/locate.c b/locate/locate/locate.c index 67e3567..bad3e34 100644 --- a/locate/locate/locate.c +++ b/locate/locate/locate.c @@ -1,8 +1,7 @@ -/* $NetBSD: locate.c,v 1.8 1997/10/19 04:11:56 lukem Exp $ */ - /* + * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * James A. Woods. @@ -17,8 +16,8 @@ * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. + * This product includes software developed by the University of + * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -36,17 +35,19 @@ * SUCH DAMAGE. */ -#include #ifndef lint -__COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\ - The Regents of the University of California. All rights reserved.\n"); +static const char copyright[] = +"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\ +@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 -static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; +static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; #endif -__RCSID("$NetBSD: locate.c,v 1.8 1997/10/19 04:11:56 lukem Exp $"); +static const char rcsid[] = + "$FreeBSD: src/usr.bin/locate/locate/locate.c,v 1.17 2006/06/11 17:40:25 maxim Exp $"; #endif /* not lint */ /* @@ -59,10 +60,11 @@ __RCSID("$NetBSD: locate.c,v 1.8 1997/10/19 04:11:56 lukem Exp $"); * * The codes are: * - * 0-28 likeliest differential counts + offset to make nonnegative - * 30 switch code for out-of-range count to follow in next word - * 128-255 bigram codes (128 most common, as determined by 'updatedb') - * 32-127 single character (printable) ascii residue (ie, literal) + * 0-28 likeliest differential counts + offset to make nonnegative + * 30 switch code for out-of-range count to follow in next word + * 31 an 8 bit char followed + * 128-255 bigram codes (128 most common, as determined by 'updatedb') + * 32-127 single character (printable) ascii residue (ie, literal) * * A novel two-tiered string search technique is employed: * @@ -77,131 +79,289 @@ __RCSID("$NetBSD: locate.c,v 1.8 1997/10/19 04:11:56 lukem Exp $"); */ #include - +#include +#include #include -#include +#include #include -#include #include +#include +#include + +#ifdef MMAP +# include +# include +# include +# include +#endif + #include "locate.h" #include "pathnames.h" -void fastfind __P((char *)); -int main __P((int, char **)); -char *patprep __P((char *)); +#ifdef DEBUG +# include +# include +# include +#endif + +int f_mmap; /* use mmap */ +int f_icase; /* ignore case */ +int f_stdin; /* read database from stdin */ +int f_statistic; /* print statistic */ +int f_silent; /* suppress output, show only count of matches */ +int f_limit; /* limit number of output lines, 0 == infinite */ +u_int counter; /* counter for matches [-c] */ +char separator='\n'; /* line separator */ +#ifdef __APPLE__ +u_char myctype[UCHAR_MAX + 1]; +#endif /* __APPLE__ */ + -FILE *fp; +void usage(void); +void statistic(FILE *, char *); +void fastfind(FILE *, char *, char *); +void fastfind_icase(FILE *, char *, char *); +void fastfind_mmap(char *, caddr_t, int, char *); +void fastfind_mmap_icase(char *, caddr_t, int, char *); +void search_mmap(char *, char **); +void search_fopen(char *, char **); +unsigned long cputime(void); + +extern char **colon(char **, char*, char*); +extern void print_matches(u_int); +extern int getwm(caddr_t); +extern int getwf(FILE *); +extern u_char *tolower_word(u_char *); +extern int check_bigram_char(int); +extern char *patprep(char *); int main(argc, argv) - int argc; - char *argv[]; + int argc; + char **argv; { - if (argc != 2) { - (void)fprintf(stderr, "usage: locate pattern\n"); - exit(1); - } - if (!(fp = fopen(_PATH_FCODES, "r"))) { - (void)fprintf(stderr, "locate: no database file %s.\n", - _PATH_FCODES); - exit(1); - } - while (*++argv) - fastfind(*argv); - exit(0); + register int ch; + char **dbv = NULL; + char *path_fcodes; /* locate database */ +#ifdef MMAP + f_mmap = 1; /* mmap is default */ +#endif + (void) setlocale(LC_ALL, ""); + + while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1) + switch(ch) { + case '0': /* 'find -print0' style */ + separator = '\0'; + break; + case 'S': /* statistic lines */ + f_statistic = 1; + break; + case 'l': /* limit number of output lines, 0 == infinite */ + f_limit = atoi(optarg); + break; + case 'd': /* database */ + dbv = colon(dbv, optarg, _PATH_FCODES); + break; + case 'i': /* ignore case */ + f_icase = 1; + break; + case 'm': /* mmap */ +#ifdef MMAP + f_mmap = 1; +#else + warnx("mmap(2) not implemented"); +#endif + break; + case 's': /* stdio lib */ + f_mmap = 0; + break; + case 'c': /* suppress output, show only count of matches */ + f_silent = 1; + break; + default: + usage(); + } + argv += optind; + argc -= optind; + + /* to few arguments */ + if (argc < 1 && !(f_statistic)) + usage(); + + /* no (valid) database as argument */ + if (dbv == NULL || *dbv == NULL) { + /* try to read database from enviroment */ + if ((path_fcodes = getenv("LOCATE_PATH")) == NULL || + *path_fcodes == '\0') + /* use default database */ + dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES); + else /* $LOCATE_PATH */ + dbv = colon(dbv, path_fcodes, _PATH_FCODES); + } + + if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */ + for (ch = 0; ch < UCHAR_MAX + 1; ch++) + myctype[ch] = tolower(ch); + + /* foreach database ... */ + while((path_fcodes = *dbv) != NULL) { + dbv++; + + if (!strcmp(path_fcodes, "-")) + f_stdin = 1; + else + f_stdin = 0; + +#ifndef MMAP + f_mmap = 0; /* be paranoid */ +#endif + if (!f_mmap || f_stdin || f_statistic) + search_fopen(path_fcodes, argv); + else + search_mmap(path_fcodes, argv); + } + + if (f_silent) + print_matches(counter); + exit(0); } + void -fastfind(pathpart) - char *pathpart; +search_fopen(db, s) + char *db; /* database */ + char **s; /* search strings */ { - char *p, *s; - int c; - int count, found, globflag; - char *cutoff, *patend, *q; - char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; - - for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) - p[c] = getc(fp), s[c] = getc(fp); - - p = pathpart; - globflag = strchr(p, '*') || strchr(p, '?') || strchr(p, '['); - patend = patprep(p); - - found = 0; - for (c = getc(fp), count = 0; c != EOF;) { - count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; - /* overlay old path */ - for (p = path + count; (c = getc(fp)) > SWITCH;) - if (c < PARITY) - *p++ = c; - else { /* bigrams are parity-marked */ - c &= PARITY - 1; - *p++ = bigram1[c], *p++ = bigram2[c]; - } - *p-- = '\0'; - cutoff = (found ? path : path + count); - for (found = 0, s = p; s >= cutoff; s--) - if (*s == *patend) { /* fast first char check */ - for (p = patend - 1, q = s - 1; *p != '\0'; - p--, q--) - if (*q != *p) - break; - if (*p == '\0') { /* fast match success */ - found = 1; - if (!globflag || - !fnmatch(pathpart, path, 0)) - (void)printf("%s\n", path); - break; - } - } + FILE *fp; +#ifdef DEBUG + long t0; +#endif + + /* can only read stdin once */ + if (f_stdin) { + fp = stdin; + if (*(s+1) != NULL) { + warnx("read database from stdin, use only `%s' as pattern", *s); + *(s+1) = NULL; + } + } + else if ((fp = fopen(db, "r")) == NULL) + err(1, "`%s'", db); + + /* count only chars or lines */ + if (f_statistic) { + statistic(fp, db); + (void)fclose(fp); + return; } -} -/* - * extract last glob-free subpattern in name for fast pre-match; prepend - * '\0' for backwards match; return end of new pattern - */ -static char globfree[100]; + /* foreach search string ... */ + while(*s != NULL) { +#ifdef DEBUG + t0 = cputime(); +#endif + if (!f_stdin && + fseek(fp, (long)0, SEEK_SET) == -1) + err(1, "fseek to begin of ``%s''\n", db); + + if (f_icase) + fastfind_icase(fp, *s, db); + else + fastfind(fp, *s, db); +#ifdef DEBUG + warnx("fastfind %ld ms", cputime () - t0); +#endif + s++; + } + (void)fclose(fp); +} -char * -patprep(name) - char *name; +#ifdef MMAP +void +search_mmap(db, s) + char *db; /* database */ + char **s; /* search strings */ { - char *endmark, *p, *subp; - - subp = globfree; - *subp++ = '\0'; - p = name + strlen(name) - 1; - /* skip trailing metacharacters (and [] ranges) */ - for (; p >= name; p--) - if (strchr("*?", *p) == 0) - break; - if (p < name) - p = name; - if (*p == ']') - for (p--; p >= name; p--) - if (*p == '[') { - p--; - break; - } - if (p < name) - p = name; - /* - * if pattern has only metacharacters, check every path (force '/' - * search) - */ - if ((p == name) && strchr("?*[]", *p) != 0) - *subp++ = '/'; - else { - for (endmark = p; p >= name; p--) - if (strchr("]*?", *p) != 0) - break; - for (++p; - (p <= endmark) && subp < (globfree + sizeof(globfree));) - *subp++ = *p++; + struct stat sb; + int fd; + caddr_t p; + off_t len; +#ifdef DEBUG + long t0; +#endif + if ((fd = open(db, O_RDONLY)) == -1 || + fstat(fd, &sb) == -1) + err(1, "`%s'", db); + len = sb.st_size; + + if ((p = mmap((caddr_t)0, (size_t)len, + PROT_READ, MAP_SHARED, + fd, (off_t)0)) == MAP_FAILED) + err(1, "mmap ``%s''", db); + + /* foreach search string ... */ + while (*s != NULL) { +#ifdef DEBUG + t0 = cputime(); +#endif + if (f_icase) + fastfind_mmap_icase(*s, p, (int)len, db); + else + fastfind_mmap(*s, p, (int)len, db); +#ifdef DEBUG + warnx("fastfind %ld ms", cputime () - t0); +#endif + s++; } - *subp = '\0'; - return(--subp); + + if (munmap(p, (size_t)len) == -1) + warn("munmap %s\n", db); + + (void)close(fd); +} +#endif /* MMAP */ + +#ifdef DEBUG +unsigned long +cputime () +{ + struct rusage rus; + + getrusage(0, &rus); + return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000); +} +#endif /* DEBUG */ + +void +usage () +{ + (void)fprintf(stderr, + "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n"); + (void)fprintf(stderr, + "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES); + exit(1); } + + +/* load fastfind functions */ + +/* statistic */ +/* fastfind_mmap, fastfind_mmap_icase */ +#ifdef MMAP +#undef FF_MMAP +#undef FF_ICASE + +#define FF_MMAP +#include "fastfind.c" +#define FF_ICASE +#include "fastfind.c" +#endif /* MMAP */ + +/* fopen */ +/* fastfind, fastfind_icase */ +#undef FF_MMAP +#undef FF_ICASE +#include "fastfind.c" +#define FF_ICASE +#include "fastfind.c" diff --git a/locate/locate/locate.h b/locate/locate/locate.h index 467c2cb..d4aef46 100644 --- a/locate/locate/locate.h +++ b/locate/locate/locate.h @@ -1,6 +1,5 @@ -/* $NetBSD: locate.h,v 1.3 1994/12/22 06:17:48 jtc Exp $ */ - /* + * Copyright (c) 1995 Wolfram Schneider . Berlin. * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -33,6 +32,7 @@ * SUCH DAMAGE. * * @(#)locate.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD: src/usr.bin/locate/locate/locate.h,v 1.7 1999/08/28 01:02:59 peter Exp $ */ /* Symbolic constants shared by locate.c and code.c */ @@ -41,3 +41,36 @@ #define OFFSET 14 /* abs value of max likely diff */ #define PARITY 0200 /* parity bit */ #define SWITCH 30 /* switch code */ +#define UMLAUT 31 /* an 8 bit char followed */ + +/* 0-28 likeliest differential counts + offset to make nonnegative */ +#define LDC_MIN 0 +#define LDC_MAX 28 + +/* 128-255 bigram codes (128 most common, as determined by 'updatedb') */ +#define BIGRAM_MIN (UCHAR_MAX - CHAR_MAX) +#define BIGRAM_MAX UCHAR_MAX + +/* 32-127 single character (printable) ascii residue (ie, literal) */ +#define ASCII_MIN 32 +#define ASCII_MAX CHAR_MAX + +/* #define TO7BIT(x) (x = ( ((u_char)x) & CHAR_MAX )) */ +#define TO7BIT(x) (x = x & CHAR_MAX ) + + +#if UCHAR_MAX >= 4096 + define TOLOWER(ch) tolower(ch) +#else + +#ifdef __APPLE__ +extern u_char myctype[UCHAR_MAX + 1]; +#else +u_char myctype[UCHAR_MAX + 1]; +#endif +#define TOLOWER(ch) (myctype[ch]) +#endif + +#define INTSIZE (sizeof(int)) + +#define LOCATE_REG "*?[]\\" /* fnmatch(3) meta characters */ diff --git a/locate/locate/locate.rc b/locate/locate/locate.rc new file mode 100644 index 0000000..fdf099b --- /dev/null +++ b/locate/locate/locate.rc @@ -0,0 +1,26 @@ +# +# /etc/locate.rc - command script for updatedb(8) +# +# $FreeBSD: src/usr.bin/locate/locate/locate.rc,v 1.9 2005/08/22 08:22:48 cperciva Exp $ + +# +# All commented values are the defaults +# +# temp directory +#TMPDIR="/tmp" + +# the actual database +#FCODES="/var/db/locate.database" + +# directories to be put in the database +#SEARCHPATHS="/" + +# directories unwanted in output +#PRUNEPATHS="/tmp /var/tmp" + +# filesystems allowed. Beware: a non-listed filesystem will be pruned +# and if the SEARCHPATHS starts in such a filesystem locate will build +# an empty database. +# +# be careful if you add 'nfs' +#FILESYSTEMS="hfs ufs" diff --git a/locate/locate/locate.updatedb.8 b/locate/locate/locate.updatedb.8 index 7408d0f..12db435 100755 --- a/locate/locate/locate.updatedb.8 +++ b/locate/locate/locate.updatedb.8 @@ -1,5 +1,3 @@ -.\" $OpenBSD: locate.updatedb.8,v 1.3 1997/01/04 01:36:01 millert Exp $ -.\" .\" Copyright (c) 1996 .\" Mike Pritchard . All rights reserved. .\" @@ -30,46 +28,32 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" +.\" $FreeBSD: src/usr.bin/locate/locate/locate.updatedb.8,v 1.12 2005/01/18 13:43:50 ru Exp $ +.\" .Dd February 11, 1996 .Dt LOCATE.UPDATEDB 8 -.Os BSD 4.4 +.Os .Sh NAME .Nm locate.updatedb .Nd update locate database .Sh SYNOPSIS .Nm /usr/libexec/locate.updatedb -.Op Fl --tmpdir=dir -.Op Fl --fcodes=dbfile -.Op Fl --searchpaths='dir1 dir2...' -.Op Fl --prunepaths='dir1 dir2...' -.Op Fl --filesystems='type1 type2...' .Sh DESCRIPTION -.Nm Locate.updatedb -updates the database used by +The +.Nm +utility updates the database used by .Xr locate 1 . It is typically run once a week by the -.Pa /etc/weekly +.Pa /etc/periodic/weekly/310.locate script. .Pp The contents of the newly built database can be controlled by the .Pa /etc/locate.rc -file as well as the command line arguments. -.Sh OPTIONS -.Bl -tag -width --filesystems -The available options are as follows: -.It Fl --tmpdir -Sets the directory temporary files are stored in. -.It Fl --fcodes -Use the named file as the find codes database. If the file -name ``-'' is given, the database will be sent to standard output. -.It Fl --searchpaths -Sets the list of directories to be put in the database. -.It Fl --prunepaths -Sets the list of parent directories that should not be go in -the database. -.It Fl --filesystems -A list of filesystem types to be traversed by -.Xr find 1 . +file. +.Sh ENVIRONMENT +.Bl -tag -width /var/db/locate.database -compact +.It Pa LOCATE_CONFIG +path to the configuration file .El .Sh FILES .Bl -tag -width /var/db/locate.database -compact @@ -79,7 +63,8 @@ the default database the configuration file .El .Sh SEE ALSO -.Xr locate 1 +.Xr locate 1 , +.Xr periodic 8 .Rs .%A Woods, James A. .%D 1983 diff --git a/locate/locate/mklocatedb.sh b/locate/locate/mklocatedb.sh new file mode 100644 index 0000000..7231eba --- /dev/null +++ b/locate/locate/mklocatedb.sh @@ -0,0 +1,92 @@ +#!/bin/sh +# +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# mklocatedb - build locate database +# +# usage: mklocatedb [-presort] < filelist > database +# +# $FreeBSD: src/usr.bin/locate/locate/mklocatedb.sh,v 1.13 2002/07/22 05:35:59 tjr Exp $ + +# The directory containing locate subprograms +: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR + +PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH + +umask 077 # protect temp files + +: ${TMPDIR:=/tmp}; export TMPDIR +test -d "$TMPDIR" || TMPDIR=/tmp +if ! TMPDIR=`mktemp -d $TMPDIR/mklocateXXXXXXXXXX`; then + exit 1 +fi + + +# utilities to built locate database +: ${bigram:=locate.bigram} +: ${code:=locate.code} +: ${sort:=sort} + + +sortopt="-u -T $TMPDIR" +sortcmd=$sort + + +bigrams=$TMPDIR/_mklocatedb$$.bigrams +filelist=$TMPDIR/_mklocatedb$$.list + +trap 'rm -f $bigrams $filelist; rmdir $TMPDIR' 0 1 2 3 5 10 15 + + +# Input already sorted +if [ X"$1" = "X-presort" ]; then + shift; + + # create an empty file + true > $bigrams + + # Locate database bootstrapping + # 1. first build a temp database without bigram compression + # 2. create the bigram from the temp database + # 3. create the real locate database with bigram compression. + # + # This scheme avoid large temporary files in /tmp + + $code $bigrams > $filelist || exit 1 + locate -d $filelist / | $bigram | $sort -nr | head -128 | + awk '{if (/^[ ]*[0-9]+[ ]+..$/) {printf("%s",$2)} else {exit 1}}' > $bigrams || exit 1 + locate -d $filelist / | $code $bigrams || exit 1 + exit + +else + if $sortcmd $sortopt > $filelist; then + $bigram < $filelist | $sort -nr | + awk '{if (/^[ ]*[0-9]+[ ]+..$/) {printf("%s",$2)} else {exit 1}}' > $bigrams || exit 1 + $code $bigrams < $filelist || exit 1 + else + echo "`basename $0`: cannot build locate database" >&2 + exit 1 + fi +fi diff --git a/locate/locate/pathnames.h b/locate/locate/pathnames.h index 8021ffe..8fb0e8c 100644 --- a/locate/locate/pathnames.h +++ b/locate/locate/pathnames.h @@ -1,5 +1,3 @@ -/* $NetBSD: pathnames.h,v 1.3 1994/12/22 06:17:49 jtc Exp $ */ - /* * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. diff --git a/locate/locate/updatedb.csh b/locate/locate/updatedb.csh deleted file mode 100644 index 8347169..0000000 --- a/locate/locate/updatedb.csh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/csh -f -# -# $NetBSD: updatedb.csh,v 1.7 1995/08/31 22:36:35 jtc Exp $ -# -# Copyright (c) 1989, 1993 -# The Regents of the University of California. All rights reserved. -# -# This code is derived from software contributed to Berkeley by -# James A. Woods. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. All advertising materials mentioning features or use of this software -# must display the following acknowledgement: -# This product includes software developed by the University of -# California, Berkeley and its contributors. -# 4. Neither the name of the University nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -# @(#)updatedb.csh 8.4 (Berkeley) 10/27/94 -# - -set SRCHPATHS = "/" # directories to be put in the database -set LIBDIR = /usr/libexec # for subprograms - # for temp files -if (! $?TMPDIR) setenv TMPDIR /tmp -if (! $?DBDIR) setenv DBDIR /var/db -set FCODES = $DBDIR/locate.database # the database - -set path = ( /bin /usr/bin ) -set bigrams = $TMPDIR/locate.bigrams.$$ -set filelist = $TMPDIR/locate.list.$$ -set errs = $TMPDIR/locate.errs.$$ - -# Make a file list and compute common bigrams. -# Alphabetize '/' before any other char with 'tr'. -# If the system is very short of sort space, 'bigram' can be made -# smarter to accumulate common bigrams directly without sorting -# ('awk', with its associative memory capacity, can do this in several -# lines, but is too slow, and runs out of string space on small machines). - -# search locally or everything -# find ${SRCHPATHS} -print | \ -find ${SRCHPATHS} \( ! -fstype local -o -fstype fdesc -o -fstype devfs \) -a \ - -prune -o -print | \ - tr '/' '\001' | \ - (sort -T "$TMPDIR" -f; echo $status > $errs) | tr '\001' '/' > $filelist - -$LIBDIR/locate.bigram < $filelist | \ - (sort -T "$TMPDIR"; echo $status >> $errs) | \ - uniq -c | sort -T "$TMPDIR" -nr | \ - awk '{ if (NR <= 128) print $2 }' | tr -d '\012' > $bigrams - -# code the file list - -if { grep -s -v 0 $errs } then - printf 'locate: updatedb failed\n\n' -else - $LIBDIR/locate.code $bigrams < $filelist > $FCODES - chmod 644 $FCODES - rm $bigrams $filelist $errs -endif diff --git a/locate/locate/updatedb.sh b/locate/locate/updatedb.sh new file mode 100644 index 0000000..22f2f33 --- /dev/null +++ b/locate/locate/updatedb.sh @@ -0,0 +1,93 @@ +#!/bin/sh +# +# Copyright (c) September 1995 Wolfram Schneider . Berlin. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# updatedb - update locate database for local mounted filesystems +# +# $FreeBSD: src/usr.bin/locate/locate/updatedb.sh,v 1.20 2005/11/12 12:45:08 grog Exp $ + +if [ "$(id -u)" = "0" ]; then + echo ">>> WARNING" 1>&2 + echo ">>> Executing updatedb as root. This WILL reveal all filenames" 1>&2 + echo ">>> on your machine to all login users, which is a security risk." 1>&2 +fi +: ${LOCATE_CONFIG="/etc/locate.rc"} +if [ -f "$LOCATE_CONFIG" -a -r "$LOCATE_CONFIG" ]; then + . $LOCATE_CONFIG +fi + +# The directory containing locate subprograms +: ${LIBEXECDIR:=/usr/libexec}; export LIBEXECDIR +: ${TMPDIR:=/tmp}; export TMPDIR +if ! TMPDIR=`mktemp -d $TMPDIR/locateXXXXXXXXXX`; then + exit 1 +fi + +PATH=$LIBEXECDIR:/bin:/usr/bin:$PATH; export PATH + + +: ${mklocatedb:=locate.mklocatedb} # make locate database program +: ${FCODES:=/var/db/locate.database} # the database +: ${SEARCHPATHS:="/"} # directories to be put in the database +: ${PRUNEPATHS:="/tmp /var/tmp"} # unwanted directories +: ${FILESYSTEMS:="hfs ufs"} # allowed filesystems +: ${find:=find} + +case X"$SEARCHPATHS" in + X) echo "$0: empty variable SEARCHPATHS"; exit 1;; esac +case X"$FILESYSTEMS" in + X) echo "$0: empty variable FILESYSTEMS"; exit 1;; esac + +# Make a list a paths to exclude in the locate run +excludes="! (" or="" +for fstype in $FILESYSTEMS +do + excludes="$excludes $or -fstype $fstype" + or="-or" +done +excludes="$excludes ) -prune" + +case X"$PRUNEPATHS" in + X) ;; + *) for path in $PRUNEPATHS + do + excludes="$excludes -or -path $path -prune" + done;; +esac + +tmp=$TMPDIR/_updatedb$$ +trap 'rm -f $tmp; rmdir $TMPDIR; exit' 0 1 2 3 5 10 15 + +# search locally +# echo $find $SEARCHPATHS $excludes -or -print && exit +if $find -s $SEARCHPATHS $excludes -or -print 2>/dev/null | + $mklocatedb -presort > $tmp +then + case X"`$find $tmp -size -257c -print`" in + X) cat $tmp > $FCODES;; + *) echo "updatedb: locate database $tmp is empty" + exit 1 + esac +fi diff --git a/locate/locate/util.c b/locate/locate/util.c new file mode 100644 index 0000000..0a94c9c --- /dev/null +++ b/locate/locate/util.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 1995 Wolfram Schneider . Berlin. + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * James A. Woods. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/usr.bin/locate/locate/util.c,v 1.10 2002/06/24 12:40:11 naddy Exp $ + */ + + +#include +#include +#include +#include +#include +#include + +#include "locate.h" + +char **colon(char **, char*, char*); +char *patprep(char *); +void print_matches(u_int); +u_char *tolower_word(u_char *); +int getwm(caddr_t); +int getwf(FILE *); +int check_bigram_char(int); + +/* + * Validate bigram chars. If the test failed the database is corrupt + * or the database is obviously not a locate database. + */ +int +check_bigram_char(ch) + int ch; +{ + /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */ + if (ch == 0 || + (ch >= ASCII_MIN && ch <= ASCII_MAX)) + return(ch); + + errx(1, + "locate database header corrupt, bigram char outside 0, %d-%d: %d", + ASCII_MIN, ASCII_MAX, ch); + exit(1); +} + +/* split a colon separated string into a char vector + * + * "bla:foo" -> {"foo", "bla"} + * "bla:" -> {"foo", dot} + * "bla" -> {"bla"} + * "" -> do nothing + * + */ +char ** +colon(dbv, path, dot) + char **dbv; + char *path; + char *dot; /* default for single ':' */ +{ + int vlen, slen; + char *c, *ch, *p; + char **pv; + + if (dbv == NULL) { + if ((dbv = malloc(sizeof(char **))) == NULL) + err(1, "malloc"); + *dbv = NULL; + } + + /* empty string */ + if (*path == '\0') { + warnx("empty database name, ignored"); + return(dbv); + } + + /* length of string vector */ + for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++); + + for (ch = c = path; ; ch++) { + if (*ch == ':' || + (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) { + /* single colon -> dot */ + if (ch == c) + p = dot; + else { + /* a string */ + slen = ch - c; + if ((p = malloc(sizeof(char) * (slen + 1))) + == NULL) + err(1, "malloc"); + bcopy(c, p, slen); + *(p + slen) = '\0'; + } + /* increase dbv with element p */ + if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2))) + == NULL) + err(1, "realloc"); + *(dbv + vlen) = p; + *(dbv + ++vlen) = NULL; + c = ch + 1; + } + if (*ch == '\0') + break; + } + return (dbv); +} + +void +print_matches(counter) + u_int counter; +{ + (void)printf("%d\n", counter); +} + + +/* + * extract last glob-free subpattern in name for fast pre-match; prepend + * '\0' for backwards match; return end of new pattern + */ +static char globfree[100]; + +char * +patprep(name) + char *name; +{ + register char *endmark, *p, *subp; + + subp = globfree; + *subp++ = '\0'; /* set first element to '\0' */ + p = name + strlen(name) - 1; + + /* skip trailing metacharacters */ + for (; p >= name; p--) + if (index(LOCATE_REG, *p) == NULL) + break; + + /* + * check if maybe we are in a character class + * + * 'foo.[ch]' + * |----< p + */ + if (p >= name && + (index(p, '[') != NULL || index(p, ']') != NULL)) { + for (p = name; *p != '\0'; p++) + if (*p == ']' || *p == '[') + break; + p--; + + /* + * cannot find a non-meta character, give up + * '*\*[a-z]' + * |-------< p + */ + if (p >= name && index(LOCATE_REG, *p) != NULL) + p = name - 1; + } + + if (p < name) + /* only meta chars: "???", force '/' search */ + *subp++ = '/'; + + else { + for (endmark = p; p >= name; p--) + if (index(LOCATE_REG, *p) != NULL) + break; + for (++p; + (p <= endmark) && subp < (globfree + sizeof(globfree));) + *subp++ = *p++; + } + *subp = '\0'; + return(--subp); +} + +/* tolower word */ +u_char * +tolower_word(word) + u_char *word; +{ + register u_char *p; + + for(p = word; *p != '\0'; p++) + *p = TOLOWER(*p); + + return(word); +} + + +/* + * Read integer from mmap pointer. + * Essential a simple ``return *(int *)p'' but avoid sigbus + * for integer alignment (SunOS 4.x, 5.x). + * + * Convert network byte order to host byte order if neccessary. + * So we can read on FreeBSD/i386 (little endian) a locate database + * which was built on SunOS/sparc (big endian). + */ + +int +getwm(p) + caddr_t p; +{ + union { + char buf[INTSIZE]; + int i; + } u; + register int i; + + for (i = 0; i < INTSIZE; i++) + u.buf[i] = *p++; + + i = u.i; + + if (i > MAXPATHLEN || i < -(MAXPATHLEN)) { + i = ntohl(i); + if (i > MAXPATHLEN || i < -(MAXPATHLEN)) + errx(1, "integer out of +-MAXPATHLEN (%d): %d", + MAXPATHLEN, abs(i) < abs(htonl(i)) ? i : htonl(i)); + } + return(i); +} + +/* + * Read integer from stream. + * + * Convert network byte order to host byte order if neccessary. + * So we can read on FreeBSD/i386 (little endian) a locate database + * which was built on SunOS/sparc (big endian). + */ + +int +getwf(fp) + FILE *fp; +{ + register int word; + + word = getw(fp); + + if (word > MAXPATHLEN || word < -(MAXPATHLEN)) { + word = ntohl(word); + if (word > MAXPATHLEN || word < -(MAXPATHLEN)) + errx(1, "integer out of +-MAXPATHLEN (%d): %d", + MAXPATHLEN, abs(word) < abs(htonl(word)) ? word : + htonl(word)); + } + return(word); +}