Date: Tue, 5 Sep 1995 23:20:53 +0200 From: Wolfram Schneider <wosch@cs.tu-berlin.de> To: current@freebsd.org Subject: locate diffs Message-ID: <199509052120.XAA01543@localhost>
next in thread | raw e-mail | index | archive | help
# This is a shell archive. Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file". Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
# Makefile.diff
# README
# locate.1.diff
# locate.c
#
echo x - Makefile.diff
sed 's/^X//' >Makefile.diff << 'END-of-Makefile.diff'
X--- 1.1 1995/09/05 20:13:49
X+++ Makefile 1995/09/05 20:23:42
X@@ -2,8 +2,12 @@
X
X PROG= locate
X
X+MMAP= -DMMAP -DHAVE_MMAP=1 -DHAVE_WORKING_MMAP=1
X+CFLAGS+= ${MMAP}
X+CFLAGS+= -DFAST -DDEBUG=1
X+
X beforeinstall:
X- install -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
X+ ${INSTALL} -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
X ${.CURDIR}/updatedb.csh ${DESTDIR}/usr/libexec/locate.updatedb
X
X .include "../../Makefile.inc"
END-of-Makefile.diff
echo x - README
sed 's/^X//' >README << 'END-of-README'
X#
X# $Id: README,v 1.2 1995/09/05 20:42:27 wosch Exp $
X#
X
X95/09/05
X
XNew
X Option
X -d database
X Enviroment
X $LOCATE_PATH
X
XImpromements
X Option -m for mmap (-DMMAP)
X optimized fast first char check (-DFAST)
X
XKnown Bugs
X to many #ifdefs
X to few comments and manpages, GNU-locate is much better
X
X
XWolfram
X
X--
XWolfram Schneider <wosch@cs.tu-berlin.de>
Xhttp://hyperg.cs.tu-berlin.de/C~wosch
END-of-README
echo x - locate.1.diff
sed 's/^X//' >locate.1.diff << 'END-of-locate.1.diff'
X--- 1.1 1995/09/05 19:57:37
X+++ locate.1 1995/09/05 20:11:09
X@@ -38,13 +38,15 @@
X .Nm locate
X .Nd find files
X .Sh SYNOPSIS
X-.Ar locate
X-pattern
X+.Nm locate
X+.Op Fl d Ar database
X+pattern ...
X .Sh DESCRIPTION
X .Nm Locate
X searches a database for all pathnames which match the specified
X .Ar pattern .
X-The database is recomputed periodically, and contains the pathnames
X+The database is recomputed periodically (mostly weekly or daily),
X+and contains the pathnames
X of all files which are publicly accessible.
X .Pp
X Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``[''
X@@ -60,9 +62,21 @@
X As a special case, a pattern containing no globbing characters (``foo'')
X is matched as though it were ``*foo*''.
X .Sh FILES
X-.Bl -tag -width /var/db/locate.database -compact
X+.Bl -tag -width /usr/libexec/locate.updatedb -compact
X .It Pa /var/db/locate.database
X+locate database
X+.It Pa /usr/libexec/locate.updatedb
X+rebuilding script
X+.It Pa /etc/weekly
X+start rebuilding locate database
X .El
X+
X+.Sh ENVIROMENT
X+.Bl -tag -width /usr/libexec/locate.updatedb -compact
X+.It Pa LOCATE_PATH
X+locate database
X+.El
X+
X .Sh SEE ALSO
X .Xr find 1 ,
X .Xr fnmatch 3
END-of-locate.1.diff
echo x - locate.c
sed 's/^X//' >locate.c << 'END-of-locate.c'
X/*
X * Copyright (c) 1989, 1993
X * The Regents of the University of California. All rights reserved.
X *
X * This code is derived from software contributed to Berkeley by
X * James A. Woods.
X *
X * Redistribution and use in source and binary forms, with or without
X * modification, are permitted provided that the following conditions
X * are met:
X * 1. Redistributions of source code must retain the above copyright
X * notice, this list of conditions and the following disclaimer.
X * 2. Redistributions in binary form must reproduce the above copyright
X * notice, this list of conditions and the following disclaimer in the
X * documentation and/or other materials provided with the distribution.
X * 3. All advertising materials mentioning features or use of this software
X * must display the following acknowledgement:
X * This product includes software developed by the University of
X * California, Berkeley and its contributors.
X * 4. Neither the name of the University nor the names of its contributors
X * may be used to endorse or promote products derived from this software
X * without specific prior written permission.
X *
X * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
X * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
X * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
X * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
X * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
X * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
X * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
X * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
X * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
X * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
X * SUCH DAMAGE.
X */
X
X/*
X * $Id: locate.c,v 1.10 1995/09/05 20:34:13 wosch Exp $
X */
X
X#ifndef lint
Xstatic char copyright[] =
X"@(#) Copyright (c) 1989, 1993\n\
X The Regents of the University of California. All rights reserved.\n";
X#endif /* not lint */
X
X#ifndef lint
Xstatic char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
X#endif /* not lint */
X
X/*
X * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
X *
X * Locate scans a file list for the full pathname of a file given only part
X * of the name. The list has been processed with with "front-compression"
X * and bigram coding. Front compression reduces space by a factor of 4-5,
X * bigram coding by a further 20-25%.
X *
X * The codes are:
X *
X * 0-28 likeliest differential counts + offset to make nonnegative
X * 30 switch code for out-of-range count to follow in next word
X * 128-255 bigram codes (128 most common, as determined by 'updatedb')
X * 32-127 single character (printable) ascii residue (ie, literal)
X *
X * A novel two-tiered string search technique is employed:
X *
X * First, a metacharacter-free subpattern and partial pathname is matched
X * BACKWARDS to avoid full expansion of the pathname list. The time savings
X * is 40-50% over forward matching, which cannot efficiently handle
X * overlapped search patterns and compressed path residue.
X *
X * Then, the actual shell glob-style regular expression (if in this form) is
X * matched against the candidate pathnames using the slower routines provided
X * in the standard 'find'.
X */
X
X#include <sys/param.h>
X
X#include <fnmatch.h>
X#include <unistd.h>
X#include <stdio.h>
X#include <string.h>
X#include <stdlib.h>
X
X#ifdef MMAP
X# include <sys/types.h>
X# include <sys/stat.h>
X# include <sys/mman.h>
X# include <fcntl.h>
X#endif
X
X#ifdef FAST
X# define OPT_FAST_FIRST_CHAR_CHECK
X#endif
X
X#include "locate.h"
X#include "pathnames.h"
X
XFILE *fp;
Xchar *path_fcodes;
X
X
Xvoid usage ()
X{
X (void)fprintf(stderr, "usage: locate [-d database] pattern ...\n");
X exit(1);
X}
X
Xint
Xmain(argc, argv)
X int argc;
X char *argv[];
X{
X register int ch;
X int f_mmap = 0;
X#ifdef MMAP
X struct stat sb;
X int fd, len;
X caddr_t p;
X#endif
X
X path_fcodes = getenv("LOCATE_PATH");
X if (path_fcodes == NULL)
X path_fcodes = _PATH_FCODES;
X
X while ((ch = getopt(argc, argv, "?md:")) != EOF)
X switch((char)ch) {
X case 'd':
X path_fcodes = optarg;
X break;
X case 'm':
X f_mmap = 1;
X break;
X
X case '?':
X default:
X usage();
X }
X argv += optind;
X argc -= optind;
X
X if (argc < 1)
X usage();
X
X
X if (!f_mmap) {
X
X while (*argv) {
X if (!(fp = fopen(path_fcodes, "r"))) {
X (void)fprintf(stderr, "locate: no database file %s.\n",
X path_fcodes);
X exit(1);
X }
X fastfind(*argv++);
X fclose(fp);
X }
X
X } else {
X#ifndef MMAP
X (void)fprintf(stderr, "mmap not implemented\n");
X exit(1);
X#else
X if ((fd = open(path_fcodes, O_RDONLY)) < 0) {
X perror("open");
X exit(1);
X }
X
X if (fstat(fd, &sb) == -1) {
X perror("fstat");
X exit(1);
X }
X len = sb.st_size;
X
X if ((p = mmap((caddr_t)0, (size_t)len,
X PROT_READ, MAP_SHARED,
X fd, (off_t)0)) < 0) {
X perror("mmap"); exit(1);
X }
X
X while (*argv) {
X fastfind_mmap(*argv++, p, len);
X }
X close(fd);
X#endif
X }
X
X
X exit(0);
X}
X
Xfastfind(pathpart)
X char *pathpart;
X{
X register char *p, *s, *patend, *q;
X register int c, foundchar;
X
X int count, found, globflag;
X char *cutoff, *patprep();
X char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
X
X for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
X p[c] = getc(fp), s[c] = getc(fp);
X
X p = pathpart;
X globflag = index(p, '*') || index(p, '?') || index(p, '[');
X patend = patprep(p);
X
X found = 0;
X for (c = getc(fp), count = 0; c != EOF;) {
X#if 0
X count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
X#else
X if (c == SWITCH) {
X count += getw(fp) - OFFSET;
X#if (DEBUG > 1)
X fprintf(stderr, "%s %d\n", path, count);
X#endif
X } else {
X count += c - OFFSET;
X }
X#endif
X /* overlay old path */
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X foundchar = 0;
X#endif
X for (p = path + count; (c = getc(fp)) > SWITCH;)
X if (c < PARITY) {
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X if (c == *patend)
X foundchar++;
X#endif
X *p++ = c;
X }
X else { /* bigrams are parity-marked */
X c &= PARITY - 1;
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X if (bigram1[c] == *patend ||
X bigram2[c] == *patend)
X foundchar++;
X#endif
X *p++ = bigram1[c],
X *p++ = bigram2[c];
X }
X *p-- = NULL;
X
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X /*
X * 1. new chars don't match pattern
X * 2. no old match
X * --> goto next word
X */
X if (!foundchar && !found)
X continue;
X#endif
X
X cutoff = (found ? path : path + count);
X
X#if (DEBUG > 1)
X fprintf(stderr, "Path: %s cutoff: %s count: %d\n",
X path, cutoff, count);
X#endif /* DEBUG */
X
X for (found = 0, s = p; s >= cutoff; s--)
X if (*s == *patend) { /* fast first char check */
X for (p = patend - 1, q = s - 1; *p != NULL;
X p--, q--)
X if (*q != *p)
X break;
X if (*p == NULL) { /* fast match success */
X found = 1;
X if (!globflag ||
X !fnmatch(pathpart, path, 0))
X (void)puts(path);
X
X break;
X }
X }
X }
X}
X
X#ifdef MMAP
Xfastfind_mmap(pathpart, paddr, len)
X char *pathpart;
X u_char *paddr; /* important! must be an unsigned char */
X int len;
X{
X register char *s, *p, *patend, *q;
X register int c, foundchar;
X
X int count, found, globflag, i;
X char *cutoff, *patprep();
X char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
X
X for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
X p[c] = *paddr++, s[c] = *paddr++;
X
X len -= (NBG + NBG);
X
X p = pathpart;
X globflag = index(p, '*') || index(p, '?') || index(p, '[');
X patend = patprep(p);
X
X found = 0;
X for (c = *paddr++, count = 0; len > 0; len--) {
X if (c == SWITCH) {
X
X#if 1
X /* read integer direct from mmap pointer */
X count += *(int *)paddr - OFFSET;
X len -= sizeof(int); paddr += sizeof(int);
X#else
X
X
X#if 1 /* Hack for reading integer from mmap'ed file */
X /* little endian, lsb first: i386, vax */
X for (c = 0, i = 0; i < sizeof(int); i++, paddr++, len--)
X c += (*paddr << (8 * i));
X count += c - OFFSET;
X#else
X /* big endian, msb first: 6800, ibm, net, sun4, sol2 */
X for (c = 0, i = 1 - sizeof(int); i <= 0; i++, paddr++, len--)
X c += (*paddr << (8 * -i));
X count += c - OFFSET;
X
X#endif /* big endian */
X#endif /* *(int*)mmap pointer */
X
X
X#if (DEBUG > 1)
X fprintf(stderr, "%s %d\n", path, count);
X#endif
X
X#if (DEBUG >= 1)
X /*
X * something is wrong, we got further segv/sigbus or
X * garbage. Should not be reached ... (wrong byteorder?)
X */
X
X if (count < 0 || count > MAXPATHLEN) {
X fprintf(stderr, "count out of range: %d\nPath: %s\n",
X count, path);
X exit(1);
X }
X#endif /* DEBUG */
X
X } else {
X count += c - OFFSET;
X }
X /* overlay old path */
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X foundchar = 0;
X#endif
X
X#if (DEBUG > 1)
X fprintf(stderr, "path: %s cutoff: %s count: %d c: %c%c%c\n",
X path, cutoff, count, *(paddr), *(paddr+1), *(paddr+2));
X#endif /* DEBUG */
X
X
X for (p = path + count; (c = *paddr++) > SWITCH; len--)
X if (c < PARITY) {
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X if (c == *patend)
X foundchar++;
X#endif
X *p++ = c;
X }
X else { /* bigrams are parity-marked */
X c &= PARITY - 1;
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X if (bigram1[c] == *patend ||
X bigram2[c] == *patend)
X foundchar++;
X#endif
X *p++ = bigram1[c],
X *p++ = bigram2[c];
X }
X *p-- = NULL;
X
X#ifdef OPT_FAST_FIRST_CHAR_CHECK
X /*
X * 1. new chars don't match pattern
X * 2. no old match
X * --> goto next word
X */
X if (!foundchar && !found)
X continue;
X#endif
X
X cutoff = (found ? path : path + count);
X
X#if (DEBUG > 1)
X fprintf(stderr, "Path: %s cutoff: %s count: %d\n",
X path, cutoff, count);
X#endif /* DEBUG */
X
X for (found = 0, s = p; s >= cutoff; s--)
X if (*s == *patend) { /* fast first char check */
X for (p = patend - 1, q = s - 1; *p != NULL;
X p--, q--)
X if (*q != *p)
X break;
X if (*p == NULL) { /* fast match success */
X found = 1;
X if (!globflag ||
X !fnmatch(pathpart, path, 0))
X (void)puts(path);
X
X break;
X }
X }
X }
X}
X#endif
X
X/*
X * extract last glob-free subpattern in name for fast pre-match; prepend
X * '\0' for backwards match; return end of new pattern
X */
Xstatic char globfree[100];
X
Xchar *
Xpatprep(name)
X char *name;
X{
X register char *endmark, *p, *subp;
X
X subp = globfree;
X *subp++ = '\0';
X p = name + strlen(name) - 1;
X /* skip trailing metacharacters (and [] ranges) */
X for (; p >= name; p--)
X if (index("*?", *p) == 0)
X break;
X if (p < name)
X p = name;
X if (*p == ']')
X for (p--; p >= name; p--)
X if (*p == '[') {
X p--;
X break;
X }
X if (p < name)
X p = name;
X /*
X * if pattern has only metacharacters, check every path (force '/'
X * search)
X */
X if ((p == name) && index("?*[]", *p) != 0)
X *subp++ = '/';
X else {
X for (endmark = p; p >= name; p--)
X if (index("]*?", *p) != 0)
X break;
X for (++p;
X (p <= endmark) && subp < (globfree + sizeof(globfree));)
X *subp++ = *p++;
X }
X *subp = '\0';
X return(--subp);
X}
END-of-locate.c
exit
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199509052120.XAA01543>
