Date: Wed, 17 Aug 2011 14:08:02 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r224939 - in user/gabor/tre-integration: contrib/tre/lib include lib/libc/regex Message-ID: <201108171408.p7HE82gh008332@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Wed Aug 17 14:08:02 2011 New Revision: 224939 URL: http://svn.freebsd.org/changeset/base/224939 Log: - Introduce a public interface for fast matching Added: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c (contents, props changed) user/gabor/tre-integration/include/fastmatch.h (contents, props changed) user/gabor/tre-integration/include/hashtable.h - copied unchanged from r223646, user/gabor/tre-integration/contrib/tre/lib/hashtable.h Deleted: user/gabor/tre-integration/contrib/tre/lib/hashtable.h Modified: user/gabor/tre-integration/contrib/tre/lib/hashtable.c user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h user/gabor/tre-integration/include/Makefile user/gabor/tre-integration/lib/libc/regex/Makefile.inc Added: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Aug 17 14:08:02 2011 (r224939) @@ -0,0 +1,185 @@ +/*- + * Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif /* HAVE_CONFIG_H */ +#include <fastmatch.h> +#include <string.h> + +#include "tre-fastmatch.h" +#include "tre-internal.h" +#include "xmalloc.h" + +/* XXX: clean up */ +#define CONV_PAT \ + int ret; \ + tre_char_t *wregex; \ + size_t wlen; \ + \ + wregex = xmalloc(sizeof(tre_char_t) * (n + 1)); \ + if (wregex == NULL) \ + return REG_ESPACE; \ + else \ + { \ + int consumed; \ + tre_char_t *wcptr = wregex; \ + mbstate_t state; \ + memset(&state, '\0', sizeof(state)); \ + while (n > 0) \ + { \ + consumed = tre_mbrtowc(wcptr, regex, n, &state); \ + \ + switch (consumed) \ + { \ + case 0: \ + if (*regex == '\0') \ + consumed = 1; \ + else \ + { \ + xfree(wregex); \ + return REG_BADPAT; \ + } \ + break; \ + case -1: \ + DPRINT(("mbrtowc: error %d: %s.\n", errno, \ + strerror(errno))); \ + xfree(wregex); \ + return REG_BADPAT; \ + case -2: \ + consumed = n; \ + break; \ + } \ + regex += consumed; \ + n -= consumed; \ + wcptr++; \ + } \ + wlen = wcptr - wregex; \ + } \ + \ + wregex[wlen] = L'\0'; + +int +tre_fixncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags) +{ + CONV_PAT; + + ret = tre_compile_literal(preg, wregex, n, cflags); + xfree(wregex); + + return ret; +} + +int +tre_fastncomp(fastmatch_t *preg, const char *regex, size_t n, int cflags) +{ + CONV_PAT; + + ret = (cflags & REG_LITERAL) ? + tre_compile_literal(preg, wregex, n, cflags) : + tre_compile_fast(preg, wregex, n, cflags); + xfree(wregex); + + return ret; +} + + +int +tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags) +{ + return tre_fixncomp(preg, regex, 0, cflags); +} + +int +tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags) +{ + return tre_fastncomp(preg, regex, 0, cflags); +} + +int +tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags) +{ + return tre_compile_literal(preg, regex, n, cflags); +} + +int +tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t n, int cflags) +{ + return (cflags & REG_LITERAL) ? + tre_compile_literal(preg, regex, n, cflags) : + tre_compile_fast(preg, regex, n, cflags); +} + +int +tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags) +{ + return tre_fixwncomp(preg, regex, 0, cflags); +} + +int +tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags) +{ + return tre_fastwncomp(preg, regex, 0, cflags); +} + +void +tre_fastfree(fastmatch_t *preg) +{ + tre_free_fast(preg); +} + +int +tre_fastnexec(const fastmatch_t *preg, const char *string, size_t n, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; + + return tre_match_fast(preg, string, n, type, nmatch, + pmatch, eflags); +} + +int +tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags) +{ + return tre_fastnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags); +} + +int +tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t n, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + return tre_match_fast(preg, string, n, STR_WIDE, nmatch, + pmatch, eflags); +} + +int +tre_fastwexec(const fastmatch_t *preg, const wchar_t *string, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + return tre_fastwnexec(preg, string, (size_t)-1, nmatch, pmatch, eflags); +} + Modified: user/gabor/tre-integration/contrib/tre/lib/hashtable.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/hashtable.c Wed Aug 17 13:58:39 2011 (r224938) +++ user/gabor/tre-integration/contrib/tre/lib/hashtable.c Wed Aug 17 14:08:02 2011 (r224939) @@ -25,11 +25,10 @@ */ #include <sys/hash.h> +#include <hashtable.h> #include <stdlib.h> #include <string.h> -#include "hashtable.h" - hashtable *hashtable_init(size_t table_size, size_t key_size, size_t value_size) { Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Aug 17 13:58:39 2011 (r224938) +++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Aug 17 14:08:02 2011 (r224939) @@ -28,6 +28,7 @@ #ifdef HAVE_CONFIG_H #include <config.h> #endif /* HAVE_CONFIG_H */ +#include <hashtable.h> #include <limits.h> #include <regex.h> #include <stdbool.h> Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Aug 17 13:58:39 2011 (r224938) +++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Aug 17 14:08:02 2011 (r224939) @@ -28,6 +28,8 @@ #ifndef TRE_FASTMATCH_H #define TRE_FASTMATCH_H 1 +#include <fastmatch.h> +#include <hashtable.h> #include <limits.h> #include <regex.h> #include <stdbool.h> @@ -35,25 +37,6 @@ #include "hashtable.h" #include "tre-internal.h" -typedef struct { - size_t wlen; - size_t len; - tre_char_t *wpattern; - int hasdot; - int qsBc[UCHAR_MAX + 1]; - int *bmGs; - char *pattern; - int defBc; - hashtable *qsBc_table; - int *sbmGs; - /* flags */ - bool bol; - bool eol; - bool word; - bool icase; - bool newline; -} fastmatch_t; - int tre_compile_literal(fastmatch_t *preg, const tre_char_t *regex, size_t, int); int tre_compile_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int); Modified: user/gabor/tre-integration/include/Makefile ============================================================================== --- user/gabor/tre-integration/include/Makefile Wed Aug 17 13:58:39 2011 (r224938) +++ user/gabor/tre-integration/include/Makefile Wed Aug 17 14:08:02 2011 (r224939) @@ -9,9 +9,9 @@ CLEANFILES= osreldate.h version vers.c SUBDIR= arpa gssapi protocols rpcsvc rpc INCS= a.out.h ar.h assert.h bitstring.h complex.h cpio.h _ctype.h ctype.h \ db.h \ - dirent.h dlfcn.h elf.h elf-hints.h err.h fmtmsg.h fnmatch.h fstab.h \ - fts.h ftw.h getopt.h glob.h grp.h gssapi.h \ - ieeefp.h ifaddrs.h \ + dirent.h dlfcn.h elf.h elf-hints.h err.h fastmatch.h fmtmsg.h fnmatch.h \ + fstab.h fts.h ftw.h getopt.h glob.h grp.h gssapi.h \ + hashtable.h ieeefp.h ifaddrs.h \ inttypes.h iso646.h kenv.h langinfo.h libgen.h limits.h link.h \ locale.h malloc.h malloc_np.h memory.h monetary.h mpool.h mqueue.h \ ndbm.h netconfig.h \ Added: user/gabor/tre-integration/include/fastmatch.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/include/fastmatch.h Wed Aug 17 14:08:02 2011 (r224939) @@ -0,0 +1,101 @@ +/*- + * Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef FASTMATCH_H +#define FASTMATCH_H 1 + +#include <hashtable.h> +#include <limits.h> +#include <regex.h> +#include <stdbool.h> +#include <wchar.h> + +typedef struct { + size_t wlen; + size_t len; + wchar_t *wpattern; + int hasdot; + int qsBc[UCHAR_MAX + 1]; + int *bmGs; + char *pattern; + int defBc; + hashtable *qsBc_table; + int *sbmGs; + + /* flags */ + bool bol; + bool eol; + bool word; + bool icase; + bool newline; +} fastmatch_t; + +extern int +tre_fixcomp(fastmatch_t *preg, const char *regex, int cflags); + +extern int +tre_fastcomp(fastmatch_t *preg, const char *regex, int cflags); + +extern int +tre_fastexec(const fastmatch_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags); + +extern void +tre_fastfree(fastmatch_t *preg); + +extern int +tre_fixwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags); + +extern int +tre_fastwcomp(fastmatch_t *preg, const wchar_t *regex, int cflags); + +extern int +tre_fastwexec(const fastmatch_t *preg, const wchar_t *string, + size_t nmatch, regmatch_t pmatch[], int eflags); + +/* Versions with a maximum length argument and therefore the capability to + handle null characters in the middle of the strings. */ +extern int +tre_fixncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags); + +extern int +tre_fastncomp(fastmatch_t *preg, const char *regex, size_t len, int cflags); + +extern int +tre_fastnexec(const fastmatch_t *preg, const char *string, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); + +extern int +tre_fixwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags); + +extern int +tre_fastwncomp(fastmatch_t *preg, const wchar_t *regex, size_t len, int cflags); + +extern int +tre_fastwnexec(const fastmatch_t *preg, const wchar_t *string, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags); + +#endif /* FASTMATCH_H */ Copied: user/gabor/tre-integration/include/hashtable.h (from r223646, user/gabor/tre-integration/contrib/tre/lib/hashtable.h) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/include/hashtable.h Wed Aug 17 14:08:02 2011 (r224939, copy of r223646, user/gabor/tre-integration/contrib/tre/lib/hashtable.h) @@ -0,0 +1,51 @@ +/*- + * Copyright (C) 2011 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef HASHTABLE_H +#define HASHTABLE_H 1 + +#include <sys/types.h> + +typedef struct { + void *key; + void *value; +} hashtable_entry; + +typedef struct { + size_t key_size; + size_t table_size; + size_t usage; + size_t value_size; + hashtable_entry **entries; +} hashtable; + +void hashtable_free(hashtable *); +int hashtable_get(hashtable *, const void *, void *); +hashtable *hashtable_init(size_t, size_t, size_t); +int hashtable_put(hashtable *, const void *, const void *); +int hashtable_remove(hashtable *, const void *); + +#endif /* HASHTABLE.H */ Modified: user/gabor/tre-integration/lib/libc/regex/Makefile.inc ============================================================================== --- user/gabor/tre-integration/lib/libc/regex/Makefile.inc Wed Aug 17 13:58:39 2011 (r224938) +++ user/gabor/tre-integration/lib/libc/regex/Makefile.inc Wed Aug 17 14:08:02 2011 (r224939) @@ -5,9 +5,10 @@ CFLAGS+=-DHAVE_CONFIG_H -DTRE_LIBC_BUILD -I${.CURDIR}/../../contrib/tre -SRCS+= hashtable.c regcomp.c regerror.c regexec.c tre-ast.c tre-compile.c \ - tre-fastmatch.c tre-match-approx.c tre-match-backtrack.c \ - tre-match-parallel.c tre-mem.c tre-parse.c tre-stack.c xmalloc.c +SRCS+= fastmatch.c hashtable.c regcomp.c regerror.c regexec.c tre-ast.c \ + tre-compile.c tre-fastmatch.c tre-match-approx.c \ + tre-match-backtrack.c tre-match-parallel.c tre-mem.c tre-parse.c \ + tre-stack.c xmalloc.c MAN+= regex.3 re_format.7
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201108171408.p7HE82gh008332>