Date: Fri, 11 May 2001 14:39:05 +0300 From: Ruslan Ermilov <ru@FreeBSD.org> To: audit@FreeBSD.org, freebsd-standards@bostonradio.org Subject: Please review: new implementation of hsearch(3) Message-ID: <20010511143904.A58159@sunbay.com>
next in thread | raw e-mail | index | archive | help
--h31gzZEtNLTqOjlF Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hi! Our current hsearch(3) implementation is buggy in many ways. The most limiting factor is that it can only handle char * item.data despite the type was recently changed to void *. (See example in the manpage.) The attached patch merely merges NetBSD re-implementation of the hsearch(3), which is POSIX.1-200x conformant. Cheers, -- Ruslan Ermilov Oracle Developer/DBA, ru@sunbay.com Sunbay Software AG, ru@FreeBSD.org FreeBSD committer, +380.652.512.251 Simferopol, Ukraine http://www.FreeBSD.org The Power To Serve http://www.oracle.com Enabling The Information Age --h31gzZEtNLTqOjlF Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="libc_hsearch.patch" Index: db/hash/Makefile.inc =================================================================== RCS file: /home/ncvs/src/lib/libc/db/hash/Makefile.inc,v retrieving revision 1.3 diff -u -p -r1.3 Makefile.inc --- db/hash/Makefile.inc 1999/08/27 23:58:18 1.3 +++ db/hash/Makefile.inc 2001/05/11 10:59:18 @@ -4,4 +4,4 @@ .PATH: ${.CURDIR}/../libc/db/hash SRCS+= hash.c hash_bigkey.c hash_buf.c hash_func.c hash_log2.c \ - hash_page.c hsearch.c ndbm.c + hash_page.c ndbm.c Index: db/hash/hsearch.c =================================================================== RCS file: hsearch.c diff -N hsearch.c --- /home/ru/tmp/cvskuzjM35216 Fri May 11 13:59:18 2001 +++ /dev/null Fri May 11 10:43:20 2001 @@ -1,109 +0,0 @@ -/*- - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/lib/libc/db/hash/hsearch.c,v 1.3 2000/07/06 20:04:34 alfred Exp $ - */ - -#if defined(LIBC_SCCS) && !defined(lint) -static char sccsid[] = "@(#)hsearch.c 8.4 (Berkeley) 7/21/94"; -#endif /* LIBC_SCCS and not lint */ - -#include <sys/types.h> - -#include <fcntl.h> -#include <string.h> - -#include <db.h> -#include <search.h> - -static DB *dbp = NULL; -static ENTRY retval; - -extern int -hcreate(nel) - size_t nel; -{ - HASHINFO info; - - info.nelem = nel; - info.bsize = 256; - info.ffactor = 8; - info.cachesize = 0; - info.hash = NULL; - info.lorder = 0; - dbp = (DB *)__hash_open(NULL, O_CREAT | O_RDWR, 0600, &info, 0); - return (dbp != NULL); -} - -extern ENTRY * -hsearch(item, action) - ENTRY item; - ACTION action; -{ - DBT key, val; - int status; - - if (!dbp) - return (NULL); - key.data = (u_char *)item.key; - key.size = strlen(item.key) + 1; - - if (action == ENTER) { - val.data = (u_char *)item.data; - val.size = strlen(item.data) + 1; - status = (dbp->put)(dbp, &key, &val, R_NOOVERWRITE); - if (status) - return (NULL); - } else { - /* FIND */ - status = (dbp->get)(dbp, &key, &val, 0); - if (status) - return (NULL); - else - item.data = (char *)val.data; - } - retval.key = item.key; - retval.data = item.data; - return (&retval); -} - -extern void -hdestroy() -{ - if (dbp) { - (void)(dbp->close)(dbp); - dbp = NULL; - } -} Index: stdlib/Makefile.inc =================================================================== RCS file: /home/ncvs/src/lib/libc/stdlib/Makefile.inc,v retrieving revision 1.26 diff -u -p -r1.26 Makefile.inc --- stdlib/Makefile.inc 2001/04/23 11:11:00 1.26 +++ stdlib/Makefile.inc 2001/05/11 10:59:18 @@ -5,8 +5,8 @@ .PATH: ${.CURDIR}/../libc/${MACHINE_ARCH}/stdlib ${.CURDIR}/../libc/stdlib MISRCS+=abort.c abs.c atexit.c atof.c atoi.c atol.c bsearch.c calloc.c div.c \ - exit.c getenv.c getopt.c getsubopt.c heapsort.c labs.c ldiv.c \ - malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \ + exit.c getenv.c getopt.c getsubopt.c hcreate.c heapsort.c labs.c \ + ldiv.c malloc.c merge.c putenv.c qsort.c radixsort.c rand.c random.c \ reallocf.c realpath.c setenv.c strhash.c strtol.c strtoll.c strtoq.c \ strtoul.c strtoull.c strtouq.c system.c tdelete.c tfind.c tsearch.c \ twalk.c @@ -25,11 +25,12 @@ SRCS+= strtod.c .if ${LIB} == "c" MAN+= abort.3 abs.3 alloca.3 atexit.3 atof.3 atoi.3 atol.3 bsearch.3 \ - div.3 exit.3 getenv.3 getopt.3 getsubopt.3 labs.3 \ + div.3 exit.3 getenv.3 getopt.3 getsubopt.3 hcreate.3 labs.3 \ ldiv.3 malloc.3 memory.3 qsort.3 radixsort.3 rand.3 random.3 \ realpath.3 strtod.3 strtol.3 strtoul.3 system.3 tsearch.3 MLINKS+=getenv.3 putenv.3 getenv.3 setenv.3 getenv.3 unsetenv.3 +MLINKS+=hcreate.3 hdestroy.3 hcreate.3 hsearch.3 MLINKS+=qsort.3 heapsort.3 qsort.3 mergesort.3 MLINKS+=rand.3 rand_r.3 rand.3 srand.3 rand.3 sranddev.3 MLINKS+=random.3 initstate.3 random.3 setstate.3 random.3 srandom.3 \ Index: stdlib/hcreate.3 =================================================================== RCS file: hcreate.3 diff -N hcreate.3 --- /dev/null Fri May 11 10:43:20 2001 +++ hcreate.3 Fri May 11 13:59:18 2001 @@ -0,0 +1,206 @@ +.\" $FreeBSD$ +.\" +.Dd May 8, 2001 +.Os +.Dt HCREATE 3 +.Sh NAME +.Nm hcreate , hdestroy , hsearch +.Nd manage hash search table +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In search.h +.Ft int +.Fn hcreate "size_t nel" +.Ft void +.Fn hdestroy void +.Ft ENTRY * +.Fn hsearch "ENTRY item" "ACTION action" +.Sh DESCRIPTION +The +.Fn hcreate , +.Fn hdestroy , +and +.Fn hsearch +functions manage hash search tables. +.Pp +The +.Fn hcreate +function allocates sufficient space for the table, and the application should +ensure it is called before +.Fn hsearch +is used. +The +.Fa nel +argument is an estimate of the maximum +number of entries that the table should contain. +This number may be adjusted upward by the +algorithm in order to obtain certain mathematically favorable circumstances. +.Pp +The +.Fn hdestroy +function disposes of the search table, and may be followed by another call to +.Fn hcreate . +After the call to +.Fn hdestroy , +the data can no longer be considered accessible. +.Pp +The +.Fn hsearch +function is a hash-table search routine. +It returns a pointer into a hash table +indicating the location at which an entry can be found. +The +.Fa item +argument is a structure of type +.Vt ENTRY +(defined in the +.Aq Pa search.h +header) containing two pointers: +.Fa item.key +points to the comparison key (a +.Vt "char *" ) , +and +.Fa item.data +(a +.Vt "void *" ) +points to any other data to be associated with +that key. +The comparison function used by +.Fn hsearch +is +.Xr strcmp 3 . +The +.Fa action +argument is a +member of an enumeration type +.Vt ACTION +indicating the disposition of the entry if it cannot be +found in the table. +.Dv ENTER +indicates that the +.Fa item +should be inserted in the table at an +appropriate point. +.Dv FIND +indicates that no entry should be made. +Unsuccessful resolution is +indicated by the return of a +.Dv NULL +pointer. +.Sh RETURN VALUES +The +.Fn hcreate +function returns 0 if it cannot allocate sufficient space for the table; +otherwise, it returns non-zero. +.Pp +The +.Fn hdestroy +function does not return a value. +.Pp +The +.Fn hsearch +function returns a +.Dv NULL +pointer if either the +.Fa action +is +.Dv FIND +and the +.Fa item +could not be found or the +.Fa action +is +.Dv ENTER +and the table is full. +.Sh ERRORS +The +.Fn hcreate +and +.Fn hsearch +functions may fail if: +.Bl -tag -width Er +.It Bq Er ENOMEM +Insufficient storage space is available. +.El +.Sh EXAMPLES +The following example reads in strings followed by two numbers +and stores them in a hash table, discarding duplicates. +It then reads in strings and finds the matching entry in the hash +table and prints it out. +.Bd -literal +#include <stdio.h> +#include <search.h> +#include <string.h> + +struct info { /* This is the info stored in the table */ + int age, room; /* other than the key. */ +}; + +#define NUM_EMPL 5000 /* # of elements in search table. */ + +int +main(void) +{ + char string_space[NUM_EMPL*20]; /* Space to store strings. */ + struct info info_space[NUM_EMPL]; /* Space to store employee info. */ + char *str_ptr = string_space; /* Next space in string_space. */ + struct info *info_ptr = info_space; /* Next space in info_space. */ + ENTRY item; + ENTRY *found_item; /* Name to look for in table. */ + char name_to_find[30]; + int i = 0; + + /* Create table; no error checking is performed. */ + (void) hcreate(NUM_EMPL); + + while (scanf("%s%d%d", str_ptr, &info_ptr->age, + &info_ptr->room) != EOF && i++ < NUM_EMPL) { + /* Put information in structure, and structure in item. */ + item.key = str_ptr; + item.data = info_ptr; + str_ptr += strlen(str_ptr) + 1; + info_ptr++; + /* Put item into table. */ + (void) hsearch(item, ENTER); + } + + /* Access table. */ + item.key = name_to_find; + while (scanf("%s", item.key) != EOF) { + if ((found_item = hsearch(item, FIND)) != NULL) { + /* If item is in the table. */ + (void)printf("found %s, age = %d, room = %d\n", + found_item->key, + ((struct info *)found_item->data)->age, + ((struct info *)found_item->data)->room); + } else + (void)printf("no such employee %s\n", name_to_find); + } + return 0; +} +.Ed +.Sh SEE ALSO +.Xr bsearch 3 , +.Xr lsearch 3 , +.Xr malloc 3 , +.Xr strcmp 3 , +.Xr tsearch 3 +.Sh STANDARDS +The +.Fn hcreate , +.Fn hdestroy , +and +.Fn hsearch +functions conform to +.St -xpg4.2 . +.Sh HISTORY +The +.Fn hcreate , +.Fn hdestroy , +and +.Fn hsearch +functions first appeared in +.At V . +.Sh BUGS +The interface permits the use of only one hash table at a time. Index: stdlib/hcreate.c =================================================================== RCS file: hcreate.c diff -N hcreate.c --- /dev/null Fri May 11 10:43:20 2001 +++ hcreate.c Fri May 11 13:59:18 2001 @@ -0,0 +1,184 @@ +/* $NetBSD: hcreate.c,v 1.2 2001/02/19 21:26:04 ross Exp $ */ +/* $FreeBSD$ */ + +/* + * Copyright (c) 2001 Christopher G. Demetriou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed for the + * NetBSD Project. See http://www.netbsd.org/ for + * information about NetBSD. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * <<Id: LICENSE,v 1.2 2000/06/14 15:57:33 cgd Exp>> + */ + +/* + * hcreate() / hsearch() / hdestroy() + * + * SysV/XPG4 hash table functions. + * + * Implementation done based on NetBSD manual page and Solaris manual page, + * plus my own personal experience about how they're supposed to work. + * + * I tried to look at Knuth (as cited by the Solaris manual page), but + * nobody had a copy in the office, so... + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +__RCSID("$NetBSD: hcreate.c,v 1.2 2001/02/19 21:26:04 ross Exp $"); +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <errno.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include "namespace.h" + +/* + * DO NOT MAKE THIS STRUCTURE LARGER THAN 32 BYTES (4 ptrs on 64-bit + * ptr machine) without adjusting MAX_BUCKETS_LG2 below. + */ +struct internal_entry { + SLIST_ENTRY(internal_entry) link; + ENTRY ent; +}; +SLIST_HEAD(internal_head, internal_entry); + +#define MIN_BUCKETS_LG2 4 +#define MIN_BUCKETS (1 << MIN_BUCKETS_LG2) + +/* + * max * sizeof internal_entry must fit into size_t. + * assumes internal_entry is <= 32 (2^5) bytes. + */ +#define MAX_BUCKETS_LG2 (sizeof (size_t) * 8 - 1 - 5) +#define MAX_BUCKETS ((size_t)1 << MAX_BUCKETS_LG2) + +/* Default hash function, from db/hash/hash_func.c */ +extern u_int32_t (*__default_hash)(const void *, size_t); + +static struct internal_head *htable; +static size_t htablesize; + +int +hcreate(size_t nel) +{ + size_t idx; + unsigned int p2; + + /* Make sure this this isn't called when a table already exists. */ + if (htable != NULL) { + errno = EINVAL; + return 0; + } + + /* If nel is too small, make it min sized. */ + if (nel < MIN_BUCKETS) + nel = MIN_BUCKETS; + + /* If it's too large, cap it. */ + if (nel > MAX_BUCKETS) + nel = MAX_BUCKETS; + + /* If it's is not a power of two in size, round up. */ + if ((nel & (nel - 1)) != 0) { + for (p2 = 0; nel != 0; p2++) + nel >>= 1; + nel = 1 << p2; + } + + /* Allocate the table. */ + htablesize = nel; + htable = malloc(htablesize * sizeof htable[0]); + if (htable == NULL) { + errno = ENOMEM; + return 0; + } + + /* Initialize it. */ + for (idx = 0; idx < htablesize; idx++) + SLIST_INIT(&htable[idx]); + + return 1; +} + +void +hdestroy(void) +{ + struct internal_entry *ie; + size_t idx; + + if (htable == NULL) + return; + + for (idx = 0; idx < htablesize; idx++) { + while (!SLIST_EMPTY(&htable[idx])) { + ie = SLIST_FIRST(&htable[idx]); + SLIST_REMOVE_HEAD(&htable[idx], link); + free(ie->ent.key); + free(ie); + } + } + free(htable); + htable = NULL; +} + +ENTRY * +hsearch(ENTRY item, ACTION action) +{ + struct internal_head *head; + struct internal_entry *ie; + uint32_t hashval; + size_t len; + + len = strlen(item.key); + hashval = (*__default_hash)(item.key, len); + + head = &htable[hashval & (htablesize - 1)]; + ie = SLIST_FIRST(head); + while (ie != NULL) { + if (strcmp(ie->ent.key, item.key) == 0) + break; + ie = SLIST_NEXT(ie, link); + } + + if (ie != NULL) + return &ie->ent; + else if (action == FIND) + return NULL; + + ie = malloc(sizeof *ie); + if (ie == NULL) + return NULL; + ie->ent.key = item.key; + ie->ent.data = item.data; + + SLIST_INSERT_HEAD(head, ie, link); + return &ie->ent; +} --h31gzZEtNLTqOjlF-- To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-audit" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010511143904.A58159>