shell_cmds-170.tar.gz

[apple/shell_cmds.git] / locate / code / locate.code.c
diff --git a/locate/code/locate.code.c b/locate/code/locate.code.c

index c3a262e0a68daec555da65834ca70e87e2fae109..9f7850e16028cdb944ae8ba272c4b02ca4ceb410 100644 (file)
--- a/locate/code/locate.code.c
+++ b/locate/code/locate.code.c
@@ -1,6 +1,5 @@
-/*     $NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $    */
-
  /*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
   * Copyright (c) 1989, 1993
   *     The Regents of the University of California.  All rights reserved.
   *
@@ -34,19 +33,18 @@
   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/usr.bin/locate/code/locate.code.c,v 1.13 2002/03/22 01:22:47 imp Exp $
   */
  
-#include <sys/cdefs.h>
  #ifndef lint
-__COPYRIGHT("@(#) Copyright (c) 1989, 1993\n\
-       The Regents of the University of California.  All rights reserved.\n");
+static char copyright[] =
+"@(#) Copyright (c) 1989, 1993\n\
+       The Regents of the University of California.  All rights reserved.\n";
  #endif /* not lint */
  
  #ifndef lint
-#if 0
-static char sccsid[] = "@(#)locate.code.c      8.4 (Berkeley) 5/4/95";
-#endif
-__RCSID("$NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $");
+static char sccsid[] = "@(#)locate.code.c      8.1 (Berkeley) 6/6/93";
  #endif /* not lint */
  
  /*
@@ -75,48 +73,66 @@ __RCSID("$NetBSD: locate.code.c,v 1.6 1997/10/19 04:11:54 lukem Exp $");
   *
   *     0-28    likeliest differential counts + offset to make nonnegative
   *     30      switch code for out-of-range count to follow in next word
+ *      31      an 8 bit char followed
   *     128-255 bigram codes (128 most common, as determined by 'updatedb')
   *     32-127  single character (printable) ascii residue (ie, literal)
   *
- * SEE ALSO:   updatedb.csh, bigram.c
+ * The locate database store any character except newline ('\n') 
+ * and NUL ('\0'). The 8-bit character support don't wast extra
+ * space until you have characters in file names less than 32
+ * or greather than 127.
+ * 
+ *
+ * SEE ALSO:   updatedb.sh, ../bigram/locate.bigram.c
   *
   * AUTHOR:     James A. Woods, Informatics General Corp.,
   *             NASA Ames Research Center, 10/82
+ *              8-bit file names characters: 
+ *                     Wolfram Schneider, Berlin September 1996
   */
  
  #include <sys/param.h>
-
  #include <err.h>
  #include <errno.h>
-#include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+#include <stdio.h>
  #include <unistd.h>
-
  #include "locate.h"
  
  #define        BGBUFSIZE       (NBG * 2)       /* size of bigram buffer */
  
-char buf1[MAXPATHLEN] = " ";   
-char buf2[MAXPATHLEN];
-char bigrams[BGBUFSIZE + 1] = { 0 };
+u_char buf1[MAXPATHLEN] = " "; 
+u_char buf2[MAXPATHLEN];
+u_char bigrams[BGBUFSIZE + 1] = { 0 };
+
+#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
  
-int    bgindex __P((char *));
-int    main __P((int, char **));
-void   usage __P((void));
+#ifdef LOOKUP
+#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)])
+typedef short bg_t;
+bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1];
+#else
+#define BGINDEX(x) bgindex(x)
+typedef int bg_t;
+int    bgindex(char *);
+#endif /* LOOKUP */
+
+
+void   usage(void);
  
  int
  main(argc, argv)
         int argc;
         char *argv[];
  {
-       char *cp, *oldpath, *path;
+       register u_char *cp, *oldpath, *path;
         int ch, code, count, diffcount, oldcount;
         FILE *fp;
+       register int i, j;
  
         while ((ch = getopt(argc, argv, "")) != -1)
                 switch(ch) {
-               case '?':
                 default:
                         usage();
                 }
@@ -131,30 +147,51 @@ main(argc, argv)
  
         /* First copy bigram array to stdout. */
         (void)fgets(bigrams, BGBUFSIZE + 1, fp);
+
         if (fwrite(bigrams, 1, BGBUFSIZE, stdout) != BGBUFSIZE)
                 err(1, "stdout");
         (void)fclose(fp);
  
+#ifdef LOOKUP
+       /* init lookup table */
+       for (i = 0; i < UCHAR_MAX + 1; i++)
+               for (j = 0; j < UCHAR_MAX + 1; j++) 
+                       big[i][j] = (bg_t)-1;
+
+       for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
+               big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i;
+
+#endif /* LOOKUP */
+
         oldpath = buf1;
         path = buf2;
         oldcount = 0;
+
         while (fgets(path, sizeof(buf2), stdin) != NULL) {
-               /* Truncate newline. */
-               cp = path + strlen(path) - 1;
-               if (cp > path && *cp == '\n')
-                       *cp = '\0';
  
-               /* Squelch characters that would botch the decoding. */
+               /* skip empty lines */
+               if (*path == '\n')
+                       continue;
+
+               /* remove newline */
                 for (cp = path; *cp != '\0'; cp++) {
-                       *cp &= PARITY-1;
-                       if (*cp <= SWITCH)
+#ifndef LOCATE_CHAR30
+                       /* old locate implementations core'd for char 30 */
+                       if (*cp == SWITCH)
                                 *cp = '?';
+                       else
+#endif /* !LOCATE_CHAR30 */
+
+                       /* chop newline */
+                       if (*cp == '\n')
+                               *cp = '\0';
                 }
  
                 /* Skip longest common prefix. */
                 for (cp = path; *cp == *oldpath; cp++, oldpath++)
-                       if (*oldpath == '\0')
+                       if (*cp == '\0')
                                 break;
+
                 count = cp - path;
                 diffcount = count - oldcount + OFFSET;
                 oldcount = count;
@@ -167,22 +204,42 @@ main(argc, argv)
                                 err(1, "stdout");
  
                 while (*cp != '\0') {
-                       if (*(cp + 1) == '\0') {
-                               if (putchar(*cp) == EOF)
-                                       err(1, "stdout");
-                               break;
-                       }
-                       if ((code = bgindex(cp)) < 0) {
-                               if (putchar(*cp++) == EOF ||
-                                   putchar(*cp++) == EOF)
-                                       err(1, "stdout");
-                       } else {
-                               /* Found, so mark byte with parity bit. */
+                       /* print *two* characters */
+
+                       if ((code = BGINDEX(cp)) != (bg_t)-1) {
+                               /*
+                                * print *one* as bigram
+                                * Found, so mark byte with 
+                                *  parity bit. 
+                                */
                                 if (putchar((code / 2) | PARITY) == EOF)
                                         err(1, "stdout");
                                 cp += 2;
                         }
+
+                       else {
+                               for (i = 0; i < 2; i++) {
+                                       if (*cp == '\0')
+                                               break;
+
+                                       /* print umlauts in file names */
+                                       if (*cp < ASCII_MIN || 
+                                           *cp > ASCII_MAX) {
+                                               if (putchar(UMLAUT) == EOF ||
+                                                   putchar(*cp++) == EOF)
+                                                       err(1, "stdout");
+                                       } 
+
+                                       else {
+                                               /* normal character */
+                                               if(putchar(*cp++) == EOF)
+                                                       err(1, "stdout");
+                                       }
+                               }
+
+                       }
                 }
+
                 if (path == buf1) {             /* swap pointers */
                         path = buf2;
                         oldpath = buf1;
@@ -197,19 +254,21 @@ main(argc, argv)
         exit(0);
  }
  
+#ifndef LOOKUP
  int
  bgindex(bg)                    /* Return location of bg in bigrams or -1. */
         char *bg;
  {
-       char bg0, bg1, *p;
+       register char bg0, bg1, *p;
  
         bg0 = bg[0];
         bg1 = bg[1];
-       for (p = bigrams; *p != '\0'; p++)
+       for (p = bigrams; *p != NULL; p++)
                 if (*p++ == bg0 && *p == bg1)
                         break;
-       return (*p == '\0' ? -1 : --p - bigrams);
+       return (*p == NULL ? -1 : (--p - bigrams));
  }
+#endif /* !LOOKUP */
  
  void
  usage()