From owner-freebsd-current  Tue Sep  5 16:49:39 1995
Return-Path: current-owner
Received: (from majordom@localhost)
          by freefall.freebsd.org (8.6.11/8.6.6) id QAA11240
          for current-outgoing; Tue, 5 Sep 1995 16:49:39 -0700
Received: from who.cdrom.com (who.cdrom.com [192.216.222.3])
          by freefall.freebsd.org (8.6.11/8.6.6) with ESMTP id QAA11231
          for <current@freebsd.org>; Tue, 5 Sep 1995 16:49:37 -0700
Received: from mail.cs.tu-berlin.de (mail.cs.tu-berlin.de [130.149.17.13])
          by who.cdrom.com (8.6.11/8.6.11) with ESMTP id QAA22038
          for <current@freebsd.org>; Tue, 5 Sep 1995 16:48:50 -0700
Received: from localhost.cs.tu-berlin.de ([130.149.1.124]) by mail.cs.tu-berlin.de (8.6.12/8.6.12) with ESMTP id BAA10213 for <current@freebsd.org>; Wed, 6 Sep 1995 01:38:02 +0200
Received: (from wosch@localhost) by localhost (8.6.9/8.6.9) id XAA01543; Tue, 5 Sep 1995 23:20:53 +0200
Date: Tue, 5 Sep 1995 23:20:53 +0200
From: Wolfram Schneider <wosch@cs.tu-berlin.de>
Message-Id: <199509052120.XAA01543@localhost>
To: current@freebsd.org
Subject: locate diffs
Reply-to: Wolfram Schneider <wosch@cs.tu-berlin.de>
MIME-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 8bit
Sender: current-owner@freebsd.org
Precedence: bulk


# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	Makefile.diff
#	README
#	locate.1.diff
#	locate.c
#
echo x - Makefile.diff
sed 's/^X//' >Makefile.diff << 'END-of-Makefile.diff'
X--- 1.1	1995/09/05 20:13:49
X+++ Makefile	1995/09/05 20:23:42
X@@ -2,8 +2,12 @@
X 
X PROG=	locate
X 
X+MMAP=   -DMMAP -DHAVE_MMAP=1 -DHAVE_WORKING_MMAP=1
X+CFLAGS+= ${MMAP}
X+CFLAGS+= -DFAST -DDEBUG=1
X+
X beforeinstall:
X-	install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
X+	${INSTALL} -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
X 	    ${.CURDIR}/updatedb.csh ${DESTDIR}/usr/libexec/locate.updatedb
X 
X .include "../../Makefile.inc"
END-of-Makefile.diff
echo x - README
sed 's/^X//' >README << 'END-of-README'
X#
X# $Id: README,v 1.2 1995/09/05 20:42:27 wosch Exp $
X#
X
X95/09/05
X
XNew
X	Option
X		-d database
X	Enviroment
X		$LOCATE_PATH
X
XImpromements
X	Option -m for mmap (-DMMAP)
X	optimized fast first char check (-DFAST)
X
XKnown Bugs
X	to many #ifdefs
X	to few comments and manpages, GNU-locate is much better
X
X
XWolfram
X
X--
XWolfram Schneider <wosch@cs.tu-berlin.de>
Xhttp://hyperg.cs.tu-berlin.de/C~wosch
END-of-README
echo x - locate.1.diff
sed 's/^X//' >locate.1.diff << 'END-of-locate.1.diff'
X--- 1.1	1995/09/05 19:57:37
X+++ locate.1	1995/09/05 20:11:09
X@@ -38,13 +38,15 @@
X .Nm locate
X .Nd find files
X .Sh SYNOPSIS
X-.Ar locate
X-pattern
X+.Nm locate 
X+.Op Fl d Ar database
X+pattern ...
X .Sh DESCRIPTION
X .Nm Locate
X searches a database for all pathnames which match the specified
X .Ar pattern  .
X-The database is recomputed periodically, and contains the pathnames
X+The database is recomputed periodically (mostly weekly or daily), 
X+and contains the pathnames
X of all files which are publicly accessible.
X .Pp
X Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``[''
X@@ -60,9 +62,21 @@
X As a special case, a pattern containing no globbing characters (``foo'')
X is matched as though it were ``*foo*''.
X .Sh FILES
X-.Bl -tag -width /var/db/locate.database -compact
X+.Bl -tag -width /usr/libexec/locate.updatedb -compact
X .It Pa /var/db/locate.database
X+locate database
X+.It Pa /usr/libexec/locate.updatedb
X+rebuilding script
X+.It Pa /etc/weekly
X+start rebuilding locate database
X .El
X+
X+.Sh ENVIROMENT
X+.Bl -tag -width /usr/libexec/locate.updatedb -compact
X+.It Pa LOCATE_PATH
X+locate database
X+.El
X+
X .Sh SEE ALSO
X .Xr find 1 ,
X .Xr fnmatch 3
END-of-locate.1.diff
echo x - locate.c
sed 's/^X//' >locate.c << 'END-of-locate.c'
X/*
X * Copyright (c) 1989, 1993
X *	The Regents of the University of California.  All rights reserved.
X *
X * This code is derived from software contributed to Berkeley by
X * James A. Woods.
X *
X * Redistribution and use in source and binary forms, with or without
X * modification, are permitted provided that the following conditions
X * are met:
X * 1. Redistributions of source code must retain the above copyright
X *    notice, this list of conditions and the following disclaimer.
X * 2. Redistributions in binary form must reproduce the above copyright
X *    notice, this list of conditions and the following disclaimer in the
X *    documentation and/or other materials provided with the distribution.
X * 3. All advertising materials mentioning features or use of this software
X *    must display the following acknowledgement:
X *	This product includes software developed by the University of
X *	California, Berkeley and its contributors.
X * 4. Neither the name of the University nor the names of its contributors
X *    may be used to endorse or promote products derived from this software
X *    without specific prior written permission.
X *
X * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
X * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
X * SUCH DAMAGE.
X */
X
X/* 
X * $Id: locate.c,v 1.10 1995/09/05 20:34:13 wosch Exp $ 
X */
X
X#ifndef lint
Xstatic char copyright[] =
X"@(#) Copyright (c) 1989, 1993\n\
X	The Regents of the University of California.  All rights reserved.\n";
X#endif /* not lint */
X
X#ifndef lint
Xstatic char sccsid[] = "@(#)locate.c	8.1 (Berkeley) 6/6/93";
X#endif /* not lint */
X
X/*
X * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
X *
X * Locate scans a file list for the full pathname of a file given only part
X * of the name.  The list has been processed with with "front-compression"
X * and bigram coding.  Front compression reduces space by a factor of 4-5,
X * bigram coding by a further 20-25%.
X *
X * The codes are:
X *
X * 	0-28	likeliest differential counts + offset to make nonnegative
X *	30	switch code for out-of-range count to follow in next word
X *	128-255 bigram codes (128 most common, as determined by 'updatedb')
X *	32-127  single character (printable) ascii residue (ie, literal)
X *
X * A novel two-tiered string search technique is employed:
X *
X * First, a metacharacter-free subpattern and partial pathname is matched
X * BACKWARDS to avoid full expansion of the pathname list.  The time savings
X * is 40-50% over forward matching, which cannot efficiently handle
X * overlapped search patterns and compressed path residue.
X *
X * Then, the actual shell glob-style regular expression (if in this form) is
X * matched against the candidate pathnames using the slower routines provided
X * in the standard 'find'.
X */
X
X#include <sys/param.h>
X
X#include <fnmatch.h>
X#include <unistd.h>
X#include <stdio.h>
X#include <string.h>
X#include <stdlib.h>
X
X#ifdef MMAP
X#  include <sys/types.h>
X#  include <sys/stat.h>
X#  include <sys/mman.h>
X#  include <fcntl.h>
X#endif
X
X#ifdef FAST
X#  define OPT_FAST_FIRST_CHAR_CHECK
X#endif
X
X#include "locate.h"
X#include "pathnames.h"
X
XFILE *fp;
Xchar *path_fcodes;
X
X
Xvoid usage ()
X{
X    (void)fprintf(stderr, "usage: locate [-d database] pattern ...\n");
X    exit(1);
X}
X
Xint
Xmain(argc, argv)
X	int argc;
X	char *argv[];
X{
X        register int ch;
X	int f_mmap = 0;
X#ifdef MMAP
X	struct stat sb;
X	int fd, len;
X	caddr_t p;
X#endif
X
X	path_fcodes = getenv("LOCATE_PATH");
X	if (path_fcodes == NULL)
X	    path_fcodes = _PATH_FCODES;
X
X        while ((ch = getopt(argc, argv, "?md:")) != EOF)
X                switch((char)ch) {
X                case 'd':
X                        path_fcodes = optarg;
X                        break;
X        	case 'm':
X			f_mmap = 1;
X			break;
X
X                case '?':
X                default:
X                        usage();
X                }
X        argv += optind;
X        argc -= optind;
X
X	if (argc < 1) 
X	    usage();
X
X
X	if (!f_mmap) {
X
X	    while (*argv) {
X		if (!(fp = fopen(path_fcodes, "r"))) {
X		    (void)fprintf(stderr, "locate: no database file %s.\n",
X				  path_fcodes);
X		    exit(1);
X		}
X		fastfind(*argv++);
X		fclose(fp);
X	    }
X
X	} else {
X#ifndef MMAP
X	    (void)fprintf(stderr, "mmap not implemented\n");
X	    exit(1);
X#else
X	    if ((fd = open(path_fcodes, O_RDONLY)) < 0) {
X		perror("open"); 
X		exit(1);
X	    }
X
X	    if (fstat(fd, &sb) == -1) {
X		perror("fstat"); 
X		exit(1);
X	    }
X	    len = sb.st_size;
X
X	    if ((p = mmap((caddr_t)0, (size_t)len, 
X			  PROT_READ, MAP_SHARED, 
X			  fd, (off_t)0)) < 0) {
X		perror("mmap"); exit(1);
X	    }
X
X	    while (*argv) {
X		fastfind_mmap(*argv++, p, len);
X	    }
X	    close(fd);
X#endif
X	}
X
X
X	exit(0);
X}
X
Xfastfind(pathpart)
X	char *pathpart;
X{
X	register char *p, *s, *patend, *q;
X	register int c, foundchar;
X
X	int count, found, globflag;
X	char *cutoff, *patprep();
X	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
X
X	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
X		p[c] = getc(fp), s[c] = getc(fp);
X
X	p = pathpart;
X	globflag = index(p, '*') || index(p, '?') || index(p, '[');
X	patend = patprep(p);
X
X	found = 0;
X	for (c = getc(fp), count = 0; c != EOF;) {
X#if 0
X		count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
X#else
X		if (c == SWITCH) { 
X		    count += getw(fp) - OFFSET;
X#if (DEBUG > 1)
X		    fprintf(stderr, "%s %d\n", path, count);
X#endif
X		} else {
X		    count += c - OFFSET;
X		}
X#endif
X		/* overlay old path */
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X		foundchar = 0; 
X#endif
X		for (p = path + count; (c = getc(fp)) > SWITCH;)
X			if (c < PARITY) {
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X			        if (c == *patend)
X				    foundchar++;
X#endif
X				*p++ = c;
X			    }
X			else {		/* bigrams are parity-marked */
X				c &= PARITY - 1;
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X				if (bigram1[c] == *patend ||
X				    bigram2[c] == *patend)
X				    foundchar++;
X#endif
X				*p++ = bigram1[c],
X				*p++ = bigram2[c];
X			}
X		*p-- = NULL;
X
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X		/* 
X		 * 1. new chars don't match pattern
X		 * 2. no old match
X		 * --> goto next word
X		 */
X		if (!foundchar && !found)
X		    continue;
X#endif
X
X		cutoff = (found ? path : path + count);
X
X#if (DEBUG > 1)
X		fprintf(stderr, "Path: %s cutoff: %s count: %d\n", 
X			path, cutoff, count);
X#endif /* DEBUG */
X
X		for (found = 0, s = p; s >= cutoff; s--)
X			if (*s == *patend) {	/* fast first char check */
X				for (p = patend - 1, q = s - 1; *p != NULL;
X				    p--, q--)
X					if (*q != *p)
X						break;
X				if (*p == NULL) {   /* fast match success */
X					found = 1;
X					if (!globflag ||
X					    !fnmatch(pathpart, path, 0))
X					(void)puts(path);
X
X					break;
X				}
X			}
X	}
X}
X
X#ifdef MMAP
Xfastfind_mmap(pathpart, paddr, len)
X	char *pathpart;
X        u_char *paddr; /* important! must be an unsigned char */
X        int len;
X{
X	register char *s, *p, *patend, *q;
X	register int c, foundchar;
X
X	int count, found, globflag, i;
X	char *cutoff, *patprep();
X	char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
X
X	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
X		p[c] = *paddr++, s[c] = *paddr++;
X
X	len -= (NBG + NBG);
X
X	p = pathpart;
X	globflag = index(p, '*') || index(p, '?') || index(p, '[');
X	patend = patprep(p);
X
X	found = 0;
X	for (c = *paddr++, count = 0; len > 0; len--) {
X	    if (c == SWITCH) {
X
X#if 1
X		/* read integer direct from mmap pointer */
X		count += *(int *)paddr - OFFSET;
X 		len -= sizeof(int); paddr += sizeof(int); 
X#else
X
X		
X#if 1           /* Hack for reading integer from mmap'ed file  */
X		/* little endian, lsb first: i386, vax */
X		for (c = 0, i = 0; i < sizeof(int); i++, paddr++, len--)
X		    c += (*paddr << (8 * i)); 
X		count += c - OFFSET;
X#else 
X		/* big endian, msb first: 6800, ibm, net, sun4, sol2 */
X		for (c = 0, i = 1 - sizeof(int); i <= 0; i++, paddr++, len--)
X		    c += (*paddr << (8 * -i)); 
X		count += c - OFFSET;
X
X#endif /* big endian */
X#endif /* *(int*)mmap pointer */
X
X
X#if (DEBUG > 1)
X		fprintf(stderr, "%s %d\n", path, count);
X#endif
X
X#if (DEBUG >= 1)
X	        /* 
X		 * something is wrong, we got further segv/sigbus or
X		 * garbage. Should not be reached ... (wrong byteorder?)
X		 */
X
X	        if (count < 0 || count > MAXPATHLEN) {
X		    fprintf(stderr, "count out of range: %d\nPath: %s\n", 
X			    count, path);
X		    exit(1);
X		}
X#endif /* DEBUG */
X
X	    } else {
X		count += c - OFFSET;
X	    }
X		/* overlay old path */
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X		foundchar = 0; 
X#endif
X
X#if (DEBUG > 1)
X		fprintf(stderr, "path: %s cutoff: %s count: %d c: %c%c%c\n", 
X			path, cutoff, count, *(paddr), *(paddr+1), *(paddr+2));
X#endif /* DEBUG */
X
X
X		for (p = path + count; (c = *paddr++) > SWITCH; len--)
X			if (c < PARITY) {
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X			        if (c == *patend)
X				    foundchar++;
X#endif
X				*p++ = c;
X			    }
X			else {		/* bigrams are parity-marked */
X				c &= PARITY - 1;
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X				if (bigram1[c] == *patend ||
X				    bigram2[c] == *patend)
X				    foundchar++;
X#endif
X				*p++ = bigram1[c],
X				*p++ = bigram2[c];
X			}
X		*p-- = NULL;
X
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X		/* 
X		 * 1. new chars don't match pattern
X		 * 2. no old match
X		 * --> goto next word
X		 */
X		if (!foundchar && !found)
X		    continue;
X#endif
X
X		cutoff = (found ? path : path + count);
X
X#if (DEBUG > 1)
X		fprintf(stderr, "Path: %s cutoff: %s count: %d\n", 
X			path, cutoff, count);
X#endif /* DEBUG */
X
X		for (found = 0, s = p; s >= cutoff; s--)
X			if (*s == *patend) {	/* fast first char check */
X				for (p = patend - 1, q = s - 1; *p != NULL;
X				    p--, q--)
X					if (*q != *p)
X						break;
X				if (*p == NULL) {   /* fast match success */
X					found = 1;
X					if (!globflag ||
X					    !fnmatch(pathpart, path, 0))
X					(void)puts(path);
X
X					break;
X				}
X			}
X	}
X}
X#endif
X
X/*
X * extract last glob-free subpattern in name for fast pre-match; prepend
X * '\0' for backwards match; return end of new pattern
X */
Xstatic char globfree[100];
X
Xchar *
Xpatprep(name)
X	char *name;
X{
X	register char *endmark, *p, *subp;
X
X	subp = globfree;
X	*subp++ = '\0';
X	p = name + strlen(name) - 1;
X	/* skip trailing metacharacters (and [] ranges) */
X	for (; p >= name; p--)
X		if (index("*?", *p) == 0)
X			break;
X	if (p < name)
X		p = name;
X	if (*p == ']')
X		for (p--; p >= name; p--)
X			if (*p == '[') {
X				p--;
X				break;
X			}
X	if (p < name)
X		p = name;
X	/*
X	 * if pattern has only metacharacters, check every path (force '/'
X	 * search)
X	 */
X	if ((p == name) && index("?*[]", *p) != 0)
X		*subp++ = '/';
X	else {
X		for (endmark = p; p >= name; p--)
X			if (index("]*?", *p) != 0)
X				break;
X		for (++p;
X		    (p <= endmark) && subp < (globfree + sizeof(globfree));)
X			*subp++ = *p++;
X	}
X	*subp = '\0';
X	return(--subp);
X}
END-of-locate.c
exit