From owner-freebsd-current Tue Sep 5 16:49:39 1995 Return-Path: current-owner Received: (from majordom@localhost) by freefall.freebsd.org (8.6.11/8.6.6) id QAA11240 for current-outgoing; Tue, 5 Sep 1995 16:49:39 -0700 Received: from who.cdrom.com (who.cdrom.com [192.216.222.3]) by freefall.freebsd.org (8.6.11/8.6.6) with ESMTP id QAA11231 for ; Tue, 5 Sep 1995 16:49:37 -0700 Received: from mail.cs.tu-berlin.de (mail.cs.tu-berlin.de [130.149.17.13]) by who.cdrom.com (8.6.11/8.6.11) with ESMTP id QAA22038 for ; Tue, 5 Sep 1995 16:48:50 -0700 Received: from localhost.cs.tu-berlin.de ([130.149.1.124]) by mail.cs.tu-berlin.de (8.6.12/8.6.12) with ESMTP id BAA10213 for ; Wed, 6 Sep 1995 01:38:02 +0200 Received: (from wosch@localhost) by localhost (8.6.9/8.6.9) id XAA01543; Tue, 5 Sep 1995 23:20:53 +0200 Date: Tue, 5 Sep 1995 23:20:53 +0200 From: Wolfram Schneider Message-Id: <199509052120.XAA01543@localhost> To: current@freebsd.org Subject: locate diffs Reply-to: Wolfram Schneider MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 8bit Sender: current-owner@freebsd.org Precedence: bulk # This is a shell archive. Save it in a file, remove anything before # this line, and then unpack it by entering "sh file". Note, it may # create directories; files and directories will be owned by you and # have default permissions. # # This archive contains: # # Makefile.diff # README # locate.1.diff # locate.c # echo x - Makefile.diff sed 's/^X//' >Makefile.diff << 'END-of-Makefile.diff' X--- 1.1 1995/09/05 20:13:49 X+++ Makefile 1995/09/05 20:23:42 X@@ -2,8 +2,12 @@ X X PROG= locate X X+MMAP= -DMMAP -DHAVE_MMAP=1 -DHAVE_WORKING_MMAP=1 X+CFLAGS+= ${MMAP} X+CFLAGS+= -DFAST -DDEBUG=1 X+ X beforeinstall: X- install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ X+ ${INSTALL} -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \ X ${.CURDIR}/updatedb.csh ${DESTDIR}/usr/libexec/locate.updatedb X X .include "../../Makefile.inc" END-of-Makefile.diff echo x - README sed 's/^X//' >README << 'END-of-README' X# X# $Id: README,v 1.2 1995/09/05 20:42:27 wosch Exp $ X# X X95/09/05 X XNew X Option X -d database X Enviroment X $LOCATE_PATH X XImpromements X Option -m for mmap (-DMMAP) X optimized fast first char check (-DFAST) X XKnown Bugs X to many #ifdefs X to few comments and manpages, GNU-locate is much better X X XWolfram X X-- XWolfram Schneider Xhttp://hyperg.cs.tu-berlin.de/C~wosch END-of-README echo x - locate.1.diff sed 's/^X//' >locate.1.diff << 'END-of-locate.1.diff' X--- 1.1 1995/09/05 19:57:37 X+++ locate.1 1995/09/05 20:11:09 X@@ -38,13 +38,15 @@ X .Nm locate X .Nd find files X .Sh SYNOPSIS X-.Ar locate X-pattern X+.Nm locate X+.Op Fl d Ar database X+pattern ... X .Sh DESCRIPTION X .Nm Locate X searches a database for all pathnames which match the specified X .Ar pattern . X-The database is recomputed periodically, and contains the pathnames X+The database is recomputed periodically (mostly weekly or daily), X+and contains the pathnames X of all files which are publicly accessible. X .Pp X Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``['' X@@ -60,9 +62,21 @@ X As a special case, a pattern containing no globbing characters (``foo'') X is matched as though it were ``*foo*''. X .Sh FILES X-.Bl -tag -width /var/db/locate.database -compact X+.Bl -tag -width /usr/libexec/locate.updatedb -compact X .It Pa /var/db/locate.database X+locate database X+.It Pa /usr/libexec/locate.updatedb X+rebuilding script X+.It Pa /etc/weekly X+start rebuilding locate database X .El X+ X+.Sh ENVIROMENT X+.Bl -tag -width /usr/libexec/locate.updatedb -compact X+.It Pa LOCATE_PATH X+locate database X+.El X+ X .Sh SEE ALSO X .Xr find 1 , X .Xr fnmatch 3 END-of-locate.1.diff echo x - locate.c sed 's/^X//' >locate.c << 'END-of-locate.c' X/* X * Copyright (c) 1989, 1993 X * The Regents of the University of California. All rights reserved. X * X * This code is derived from software contributed to Berkeley by X * James A. Woods. X * X * Redistribution and use in source and binary forms, with or without X * modification, are permitted provided that the following conditions X * are met: X * 1. Redistributions of source code must retain the above copyright X * notice, this list of conditions and the following disclaimer. X * 2. Redistributions in binary form must reproduce the above copyright X * notice, this list of conditions and the following disclaimer in the X * documentation and/or other materials provided with the distribution. X * 3. All advertising materials mentioning features or use of this software X * must display the following acknowledgement: X * This product includes software developed by the University of X * California, Berkeley and its contributors. X * 4. Neither the name of the University nor the names of its contributors X * may be used to endorse or promote products derived from this software X * without specific prior written permission. X * X * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE X * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF X * SUCH DAMAGE. X */ X X/* X * $Id: locate.c,v 1.10 1995/09/05 20:34:13 wosch Exp $ X */ X X#ifndef lint Xstatic char copyright[] = X"@(#) Copyright (c) 1989, 1993\n\ X The Regents of the University of California. All rights reserved.\n"; X#endif /* not lint */ X X#ifndef lint Xstatic char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93"; X#endif /* not lint */ X X/* X * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8. X * X * Locate scans a file list for the full pathname of a file given only part X * of the name. The list has been processed with with "front-compression" X * and bigram coding. Front compression reduces space by a factor of 4-5, X * bigram coding by a further 20-25%. X * X * The codes are: X * X * 0-28 likeliest differential counts + offset to make nonnegative X * 30 switch code for out-of-range count to follow in next word X * 128-255 bigram codes (128 most common, as determined by 'updatedb') X * 32-127 single character (printable) ascii residue (ie, literal) X * X * A novel two-tiered string search technique is employed: X * X * First, a metacharacter-free subpattern and partial pathname is matched X * BACKWARDS to avoid full expansion of the pathname list. The time savings X * is 40-50% over forward matching, which cannot efficiently handle X * overlapped search patterns and compressed path residue. X * X * Then, the actual shell glob-style regular expression (if in this form) is X * matched against the candidate pathnames using the slower routines provided X * in the standard 'find'. X */ X X#include X X#include X#include X#include X#include X#include X X#ifdef MMAP X# include X# include X# include X# include X#endif X X#ifdef FAST X# define OPT_FAST_FIRST_CHAR_CHECK X#endif X X#include "locate.h" X#include "pathnames.h" X XFILE *fp; Xchar *path_fcodes; X X Xvoid usage () X{ X (void)fprintf(stderr, "usage: locate [-d database] pattern ...\n"); X exit(1); X} X Xint Xmain(argc, argv) X int argc; X char *argv[]; X{ X register int ch; X int f_mmap = 0; X#ifdef MMAP X struct stat sb; X int fd, len; X caddr_t p; X#endif X X path_fcodes = getenv("LOCATE_PATH"); X if (path_fcodes == NULL) X path_fcodes = _PATH_FCODES; X X while ((ch = getopt(argc, argv, "?md:")) != EOF) X switch((char)ch) { X case 'd': X path_fcodes = optarg; X break; X case 'm': X f_mmap = 1; X break; X X case '?': X default: X usage(); X } X argv += optind; X argc -= optind; X X if (argc < 1) X usage(); X X X if (!f_mmap) { X X while (*argv) { X if (!(fp = fopen(path_fcodes, "r"))) { X (void)fprintf(stderr, "locate: no database file %s.\n", X path_fcodes); X exit(1); X } X fastfind(*argv++); X fclose(fp); X } X X } else { X#ifndef MMAP X (void)fprintf(stderr, "mmap not implemented\n"); X exit(1); X#else X if ((fd = open(path_fcodes, O_RDONLY)) < 0) { X perror("open"); X exit(1); X } X X if (fstat(fd, &sb) == -1) { X perror("fstat"); X exit(1); X } X len = sb.st_size; X X if ((p = mmap((caddr_t)0, (size_t)len, X PROT_READ, MAP_SHARED, X fd, (off_t)0)) < 0) { X perror("mmap"); exit(1); X } X X while (*argv) { X fastfind_mmap(*argv++, p, len); X } X close(fd); X#endif X } X X X exit(0); X} X Xfastfind(pathpart) X char *pathpart; X{ X register char *p, *s, *patend, *q; X register int c, foundchar; X X int count, found, globflag; X char *cutoff, *patprep(); X char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; X X for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) X p[c] = getc(fp), s[c] = getc(fp); X X p = pathpart; X globflag = index(p, '*') || index(p, '?') || index(p, '['); X patend = patprep(p); X X found = 0; X for (c = getc(fp), count = 0; c != EOF;) { X#if 0 X count += ((c == SWITCH) ? getw(fp) : c) - OFFSET; X#else X if (c == SWITCH) { X count += getw(fp) - OFFSET; X#if (DEBUG > 1) X fprintf(stderr, "%s %d\n", path, count); X#endif X } else { X count += c - OFFSET; X } X#endif X /* overlay old path */ X#ifdef OPT_FAST_FIRST_CHAR_CHECK X foundchar = 0; X#endif X for (p = path + count; (c = getc(fp)) > SWITCH;) X if (c < PARITY) { X#ifdef OPT_FAST_FIRST_CHAR_CHECK X if (c == *patend) X foundchar++; X#endif X *p++ = c; X } X else { /* bigrams are parity-marked */ X c &= PARITY - 1; X#ifdef OPT_FAST_FIRST_CHAR_CHECK X if (bigram1[c] == *patend || X bigram2[c] == *patend) X foundchar++; X#endif X *p++ = bigram1[c], X *p++ = bigram2[c]; X } X *p-- = NULL; X X#ifdef OPT_FAST_FIRST_CHAR_CHECK X /* X * 1. new chars don't match pattern X * 2. no old match X * --> goto next word X */ X if (!foundchar && !found) X continue; X#endif X X cutoff = (found ? path : path + count); X X#if (DEBUG > 1) X fprintf(stderr, "Path: %s cutoff: %s count: %d\n", X path, cutoff, count); X#endif /* DEBUG */ X X for (found = 0, s = p; s >= cutoff; s--) X if (*s == *patend) { /* fast first char check */ X for (p = patend - 1, q = s - 1; *p != NULL; X p--, q--) X if (*q != *p) X break; X if (*p == NULL) { /* fast match success */ X found = 1; X if (!globflag || X !fnmatch(pathpart, path, 0)) X (void)puts(path); X X break; X } X } X } X} X X#ifdef MMAP Xfastfind_mmap(pathpart, paddr, len) X char *pathpart; X u_char *paddr; /* important! must be an unsigned char */ X int len; X{ X register char *s, *p, *patend, *q; X register int c, foundchar; X X int count, found, globflag, i; X char *cutoff, *patprep(); X char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN]; X X for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) X p[c] = *paddr++, s[c] = *paddr++; X X len -= (NBG + NBG); X X p = pathpart; X globflag = index(p, '*') || index(p, '?') || index(p, '['); X patend = patprep(p); X X found = 0; X for (c = *paddr++, count = 0; len > 0; len--) { X if (c == SWITCH) { X X#if 1 X /* read integer direct from mmap pointer */ X count += *(int *)paddr - OFFSET; X len -= sizeof(int); paddr += sizeof(int); X#else X X X#if 1 /* Hack for reading integer from mmap'ed file */ X /* little endian, lsb first: i386, vax */ X for (c = 0, i = 0; i < sizeof(int); i++, paddr++, len--) X c += (*paddr << (8 * i)); X count += c - OFFSET; X#else X /* big endian, msb first: 6800, ibm, net, sun4, sol2 */ X for (c = 0, i = 1 - sizeof(int); i <= 0; i++, paddr++, len--) X c += (*paddr << (8 * -i)); X count += c - OFFSET; X X#endif /* big endian */ X#endif /* *(int*)mmap pointer */ X X X#if (DEBUG > 1) X fprintf(stderr, "%s %d\n", path, count); X#endif X X#if (DEBUG >= 1) X /* X * something is wrong, we got further segv/sigbus or X * garbage. Should not be reached ... (wrong byteorder?) X */ X X if (count < 0 || count > MAXPATHLEN) { X fprintf(stderr, "count out of range: %d\nPath: %s\n", X count, path); X exit(1); X } X#endif /* DEBUG */ X X } else { X count += c - OFFSET; X } X /* overlay old path */ X#ifdef OPT_FAST_FIRST_CHAR_CHECK X foundchar = 0; X#endif X X#if (DEBUG > 1) X fprintf(stderr, "path: %s cutoff: %s count: %d c: %c%c%c\n", X path, cutoff, count, *(paddr), *(paddr+1), *(paddr+2)); X#endif /* DEBUG */ X X X for (p = path + count; (c = *paddr++) > SWITCH; len--) X if (c < PARITY) { X#ifdef OPT_FAST_FIRST_CHAR_CHECK X if (c == *patend) X foundchar++; X#endif X *p++ = c; X } X else { /* bigrams are parity-marked */ X c &= PARITY - 1; X#ifdef OPT_FAST_FIRST_CHAR_CHECK X if (bigram1[c] == *patend || X bigram2[c] == *patend) X foundchar++; X#endif X *p++ = bigram1[c], X *p++ = bigram2[c]; X } X *p-- = NULL; X X#ifdef OPT_FAST_FIRST_CHAR_CHECK X /* X * 1. new chars don't match pattern X * 2. no old match X * --> goto next word X */ X if (!foundchar && !found) X continue; X#endif X X cutoff = (found ? path : path + count); X X#if (DEBUG > 1) X fprintf(stderr, "Path: %s cutoff: %s count: %d\n", X path, cutoff, count); X#endif /* DEBUG */ X X for (found = 0, s = p; s >= cutoff; s--) X if (*s == *patend) { /* fast first char check */ X for (p = patend - 1, q = s - 1; *p != NULL; X p--, q--) X if (*q != *p) X break; X if (*p == NULL) { /* fast match success */ X found = 1; X if (!globflag || X !fnmatch(pathpart, path, 0)) X (void)puts(path); X X break; X } X } X } X} X#endif X X/* X * extract last glob-free subpattern in name for fast pre-match; prepend X * '\0' for backwards match; return end of new pattern X */ Xstatic char globfree[100]; X Xchar * Xpatprep(name) X char *name; X{ X register char *endmark, *p, *subp; X X subp = globfree; X *subp++ = '\0'; X p = name + strlen(name) - 1; X /* skip trailing metacharacters (and [] ranges) */ X for (; p >= name; p--) X if (index("*?", *p) == 0) X break; X if (p < name) X p = name; X if (*p == ']') X for (p--; p >= name; p--) X if (*p == '[') { X p--; X break; X } X if (p < name) X p = name; X /* X * if pattern has only metacharacters, check every path (force '/' X * search) X */ X if ((p == name) && index("?*[]", *p) != 0) X *subp++ = '/'; X else { X for (endmark = p; p >= name; p--) X if (index("]*?", *p) != 0) X break; X for (++p; X (p <= endmark) && subp < (globfree + sizeof(globfree));) X *subp++ = *p++; X } X *subp = '\0'; X return(--subp); X} END-of-locate.c exit