-/* $NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $ */
-
/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.bin/locate/code/locate.code.c,v 1.13 2002/03/22 01:22:47 imp Exp $
*/
-#include <sys/cdefs.h>
#ifndef lint
-__COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
- The Regents of the University of California. All rights reserved.\n");
+static char copyright[] =
+"@(#) Copyright (c) 1989, 1993\n\
+ The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
-#if 0
-static char sccsid[] = "@(#)locate.code.c 8.4 (Berkeley) 5/4/95";
-#endif
-__RCSID("$NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $");
+static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
/*
*
* 0-28 likeliest differential counts + offset to make nonnegative
* 30 switch code for out-of-range count to follow in next word
+ * 31 an 8 bit char followed
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
* 32-127 single character (printable) ascii residue (ie, literal)
*
- * SEE ALSO: updatedb.csh, bigram.c
+ * The locate database store any character except newline ('\n')
+ * and NUL ('\0'). The 8-bit character support don't wast extra
+ * space until you have characters in file names less than 32
+ * or greather than 127.
+ *
+ *
+ * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c
*
* AUTHOR: James A. Woods, Informatics General Corp.,
* NASA Ames Research Center, 10/82
+ * 8-bit file names characters:
+ * Wolfram Schneider, Berlin September 1996
*/
#include <sys/param.h>
-
#include <err.h>
#include <errno.h>
-#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <stdio.h>
#include <unistd.h>
-
#include "locate.h"
#define BGBUFSIZE (NBG * 2) /* size of bigram buffer */
-char buf1[MAXPATHLEN] = " ";
-char buf2[MAXPATHLEN];
-char bigrams[BGBUFSIZE + 1] = { 0 };
+u_char buf1[MAXPATHLEN] = " ";
+u_char buf2[MAXPATHLEN];
+u_char bigrams[BGBUFSIZE + 1] = { 0 };
+
+#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
-int bgindex __P((char *));
-int main __P((int, char **));
-void usage __P((void));
+#ifdef LOOKUP
+#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)])
+typedef short bg_t;
+bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1];
+#else
+#define BGINDEX(x) bgindex(x)
+typedef int bg_t;
+int bgindex(char *);
+#endif /* LOOKUP */
+
+
+void usage(void);
int
main(argc, argv)
int argc;
char *argv[];
{
- char *cp, *oldpath, *path;
+ register u_char *cp, *oldpath, *path;
int ch, code, count, diffcount, oldcount;
FILE *fp;
+ register int i, j;
while ((ch = getopt(argc, argv, "")) != -1)
switch(ch) {
- case '?':
default:
usage();
}
/* First copy bigram array to stdout. */
(void)fgets(bigrams, BGBUFSIZE + 1, fp);
+
if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE)
err(1, "stdout");
(void)fclose(fp);
+#ifdef LOOKUP
+ /* init lookup table */
+ for (i = 0; i < UCHAR_MAX + 1; i++)
+ for (j = 0; j < UCHAR_MAX + 1; j++)
+ big[i][j] = (bg_t)-1;
+
+ for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
+ big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i;
+
+#endif /* LOOKUP */
+
oldpath = buf1;
path = buf2;
oldcount = 0;
+
while (fgets(path, sizeof(buf2), stdin) != NULL) {
- /* Truncate newline. */
- cp = path + strlen(path) - 1;
- if (cp > path && *cp == '\n')
- *cp = '\0';
- /* Squelch characters that would botch the decoding. */
+ /* skip empty lines */
+ if (*path == '\n')
+ continue;
+
+ /* remove newline */
for (cp = path; *cp != '\0'; cp++) {
- *cp &= PARITY-1;
- if (*cp <= SWITCH)
+#ifndef LOCATE_CHAR30
+ /* old locate implementations core'd for char 30 */
+ if (*cp == SWITCH)
*cp = '?';
+ else
+#endif /* !LOCATE_CHAR30 */
+
+ /* chop newline */
+ if (*cp == '\n')
+ *cp = '\0';
}
/* Skip longest common prefix. */
for (cp = path; *cp == *oldpath; cp++, oldpath++)
- if (*oldpath == '\0')
+ if (*cp == '\0')
break;
+
count = cp - path;
diffcount = count - oldcount + OFFSET;
oldcount = count;
err(1, "stdout");
while (*cp != '\0') {
- if (*(cp + 1) == '\0') {
- if (putchar(*cp) == EOF)
- err(1, "stdout");
- break;
- }
- if ((code = bgindex(cp)) < 0) {
- if (putchar(*cp++) == EOF ||
- putchar(*cp++) == EOF)
- err(1, "stdout");
- } else {
- /* Found, so mark byte with parity bit. */
+ /* print *two* characters */
+
+ if ((code = BGINDEX(cp)) != (bg_t)-1) {
+ /*
+ * print *one* as bigram
+ * Found, so mark byte with
+ * parity bit.
+ */
if (putchar((code / 2) | PARITY) == EOF)
err(1, "stdout");
cp += 2;
}
+
+ else {
+ for (i = 0; i < 2; i++) {
+ if (*cp == '\0')
+ break;
+
+ /* print umlauts in file names */
+ if (*cp < ASCII_MIN ||
+ *cp > ASCII_MAX) {
+ if (putchar(UMLAUT) == EOF ||
+ putchar(*cp++) == EOF)
+ err(1, "stdout");
+ }
+
+ else {
+ /* normal character */
+ if(putchar(*cp++) == EOF)
+ err(1, "stdout");
+ }
+ }
+
+ }
}
+
if (path == buf1) { /* swap pointers */
path = buf2;
oldpath = buf1;
exit(0);
}
+#ifndef LOOKUP
int
bgindex(bg) /* Return location of bg in bigrams or -1. */
char *bg;
{
- char bg0, bg1, *p;
+ register char bg0, bg1, *p;
bg0 = bg[0];
bg1 = bg[1];
- for (p = bigrams; *p != '\0'; p++)
+ for (p = bigrams; *p != NULL; p++)
if (*p++ == bg0 && *p == bg1)
break;
- return (*p == '\0' ? -1 : --p - bigrams);
+ return (*p == NULL ? -1 : (--p - bigrams));
}
+#endif /* !LOOKUP */
void
usage()