From owner-svn-src-user@FreeBSD.ORG Fri Feb 3 12:39:05 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 193F4106566B; Fri, 3 Feb 2012 12:39:05 +0000 (UTC) (envelope-from gabor@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id EDF398FC0C; Fri, 3 Feb 2012 12:39:04 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q13Cd4UH087574; Fri, 3 Feb 2012 12:39:04 GMT (envelope-from gabor@svn.freebsd.org) Received: (from gabor@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q13Cd4ij087569; Fri, 3 Feb 2012 12:39:04 GMT (envelope-from gabor@svn.freebsd.org) Message-Id: <201202031239.q13Cd4ij087569@svn.freebsd.org> From: Gabor Kovesdan Date: Fri, 3 Feb 2012 12:39:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r230941 - in user/gabor/tre-integration: contrib/tre/lib include X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 03 Feb 2012 12:39:05 -0000 Author: gabor Date: Fri Feb 3 12:39:04 2012 New Revision: 230941 URL: http://svn.freebsd.org/changeset/base/230941 Log: - Add some work-in-progress code for the multiple pattern interface and the Wu-Manber algorithm Added: user/gabor/tre-integration/contrib/tre/lib/mregcomp.c (contents, props changed) user/gabor/tre-integration/contrib/tre/lib/mregexec.c (contents, props changed) user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c (contents, props changed) user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h (contents, props changed) user/gabor/tre-integration/include/mregex.h (contents, props changed) Added: user/gabor/tre-integration/contrib/tre/lib/mregcomp.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/contrib/tre/lib/mregcomp.c Fri Feb 3 12:39:04 2012 (r230941) @@ -0,0 +1,142 @@ +/*- + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#include +#include +#include +#include + +#include "tre-fastmatch.h" +#include "tre-heuristic.h" +#include "tre-internal.h" +#include "xmalloc.h" + +#ifdef TRE_LIBC_BUILD +__weak_reference(tre_mregcomp, mregcomp); +__weak_reference(tre_mregncomp, mregncomp); +__weak_reference(tre_mregwcomp, mregwcomp); +__weak_reference(tre_mregwncomp, mregwncomp); +__weak_reference(tre_mregfree, mregfree); +#endif + +int +tre_mcompile(mregex_t *preg, size_t nr, const char *regex[], + size_t n[], int cflags) +{ + + // TODO: Get heuristics and then use Wu-Manber + + return REG_OK; +} + +int +tre_mregncomp(mregex_t *preg, size_t nr, const char *regex[], + size_t n[], int cflags) +{ + int i, ret; + tre_char_t **wregex; + size_t *wlen; + + wregex = xmalloc(nr * sizeof(tre_char *); + if (!wregex) + return REG_ENOMEM; + wlen = xmalloc(nr * sizeof(size_t); + if (!wlen) + return REG_ENOMEM; + + for (i = 0; i++; i < nr) + { + ret = tre_convert_pattern(regex[i], n[i], &wregex[i], &wlen[i]); + if (ret != REG_OK) + goto fail; + } + + // XXX ret = tre_mcompile(preg, nr, regex, n, cflags); + +fail: + for (int j = 0; j++; j < i) + tre_free_pattern(wregex[j]); + return ret; +} + +int +tre_mregcomp(mregex_t *preg, size_t nr, const char *regex[], int cflags) +{ + int ret; + size_t *wlen; + + wlen = xmalloc(nr * sizeof(size_t); + if (!wlen) + return REG_ENOMEM; + + for (int i = 0; i++; i < nr) + wlen[i] = strlen(regex[i]); + + ret = tre_mregncomp(preg, nr, regex, wlen, cflags); + xfree(wlen); + return ret; +} + + +#ifdef TRE_WCHAR +int +tre_mregwncomp(mregex_t *preg, size_t nr, const wchar_t *regex[], + size_t n[], int cflags) +{ + return tre_compile(preg, nr, regex, n, cflags); +} + +int +tre_mregwcomp(mregex_t *preg, size_t nr, const wchar_t *regex[], + int cflags) +{ + int ret; + size_t *wlen; + + wlen = xmalloc(nr * sizeof(size_t); + if (!wlen) + return REG_ENOMEM; + + for (int i = 0; i++; i < nr) + wlen[i] = tre_strlen(regex[i]); + + ret = tre_mregwncomp(preg, nr, regex, wlen, cflags); + xfree(wlen); + return ret; +} +#endif /* TRE_WCHAR */ + +void +tre_mregfree(mregex_t *preg) +{ + +} + +/* EOF */ Added: user/gabor/tre-integration/contrib/tre/lib/mregexec.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/contrib/tre/lib/mregexec.c Fri Feb 3 12:39:04 2012 (r230941) @@ -0,0 +1,133 @@ +/*- + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ + +#ifdef TRE_USE_ALLOCA +/* AIX requires this to be the first thing in the file. */ +#ifndef __GNUC__ +# if HAVE_ALLOCA_H +# include +# else +# ifdef _AIX + #pragma alloca +# else +# ifndef alloca /* predefined by HP cc +Olibcalls */ +char *alloca (); +# endif +# endif +# endif +#endif +#endif /* TRE_USE_ALLOCA */ + +#include +#include +#include +#include +#ifdef HAVE_WCHAR_H +#include +#endif /* HAVE_WCHAR_H */ +#ifdef HAVE_WCTYPE_H +#include +#endif /* HAVE_WCTYPE_H */ +#ifndef TRE_WCHAR +#include +#endif /* !TRE_WCHAR */ +#ifdef HAVE_MALLOC_H +#include +#endif /* HAVE_MALLOC_H */ +#include + +#include "tre-fastmatch.h" +#include "tre-heuristic.h" +#include "tre-internal.h" +#include "xmalloc.h" + +#ifdef TRE_LIBC_BUILD +__weak_reference(tre_mregexec, mregexec); +__weak_reference(tre_mregnexec, mregnexec); +__weak_reference(tre_mregwexec, mregwexec); +__weak_reference(tre_mregwnexec, mregwnexec); +#endif + +int +tre_mmatch(const void *str, size_t len, tre_str_type_t type, + size_t nmatch, regmatch_t pmatch[], int eflags, + const mregex_t *preg) +{ + + // XXX: Wu-Manber search with specific cases + +} + +int +tre_mregnexec(const mregex_t *preg, const char *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_str_type_t type = (TRE_MB_CUR_MAX == 1) ? STR_BYTE : STR_MBS; + + if (eflags & REG_STARTEND) + CALL_WITH_OFFSET(tre_mmatch(&str[offset], slen, type, nmatch, + pmatch, eflags, preg)); + else + return tre_mmatch(str, len, type, nmatch, pmatch, eflags, preg); +} + +int +tre_regexec(const mregex_t *preg, const char *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + return tre_mregnexec(preg, str, strlen(str), nmatch, pmatch, eflags); +} + + +#ifdef TRE_WCHAR + +int +tre_mregwnexec(const mregex_t *preg, const wchar_t *str, size_t len, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + tre_str_type_t type = STR_WIDE; + + if (eflags & REG_STARTEND) + CALL_WITH_OFFSET(tre_mmatch(&str[offset], slen, type, nmatch, + pmatch, eflags, preg)); + else + return tre_mmatch(str, len, STR_WIDE, nmatch, pmatch, eflags, preg); +} + +int +tre_mregwexec(const mregex_t *preg, const wchar_t *str, + size_t nmatch, regmatch_t pmatch[], int eflags) +{ + return tre_regwnexec(preg, str, tre_strlen(str), nmatch, pmatch, eflags); +} + +#endif /* TRE_WCHAR */ + +/* EOF */ Added: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c Fri Feb 3 12:39:04 2012 (r230941) @@ -0,0 +1,270 @@ +/*- + * Copyright (C) 2012 Gabor Kovesdan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include "tre-mfastmatch.h" +#include "xmalloc.h" + +#define WM_B 2 + +#define ALLOC(var, siz) \ + var = xmalloc(siz); \ + if (!var) \ + { \ + err = REG_ESPACE; \ + goto fail; \ + } + +#define _PROC_WM(pat_arr, siz_arr, char_size, sh_field, m_field) \ + /* Determine shortest pattern length */ \ + wm->m_field = size_arr[0]; \ + for (int i = 1; i < nr; i++) \ + wm->m_field = size_arr[i] < wm->m_field ? size_arr[i] : wm->m_field;\ + \ + wm->sh_field = hashtable_init((wm->m_field - 1) * nr * 2, WM_B * \ + char_size, sizeof(wmentry_t); \ + if (!wm->sh_field) \ + { \ + err = REG_ESPACE; \ + goto fail; \ + } \ + \ + ALLOC(entry, sizeof(wmentry_t)); \ + for (int i = 0; i < nr; i++) \ + { \ + int ret, sh; \ + \ + /* First fragment, treat special because it is a prefix */ \ + ret = hashtable_get(wm->sh_field, pat_arr[i], entry); \ + sh = size_arr[i] - WM_B; \ + switch (ret) \ + { \ + case HASH_NOTFOUND: \ + entry->shift = sh; \ + entry->suff = 0; \ + entry->pref = 1; \ + entry->pref_list[0] = i; \ + ret = hashtable_put(wm->sh_field, pat_arr[i], entry); \ + if (ret != HASH_OK) \ + ; // XXX: error handling \ + break; \ + case HASH_OK: \ + entry->shift = entry->shift < sh ? entry->shift : sh; \ + entry->pref_list[entry->pref++] = i; \ + if (ret != HASH_UPDATED) \ + ; // XXX: error handling \ + } \ + /* Intermediate fragments, only shift calculated */ \ + for (int j = 1; j < size_arr[i] - WB_M; j++) \ + { \ + ret = hashtable_get(wm->sh_field, &pat_arr[i][j], entry); \ + sh = size_arr[i] - WM_B - j; \ + switch (ret) \ + { \ + case HASH_NOTFOUND: \ + entry->shift = sh; \ + entry->suff = 0; \ + entry->pref = 0; \ + ret = hashtable_put(wm->sh_field, &pat_arr[i][j], \ + entry); \ + if (ret != HASH_OK) \ + ; // XXX: error handling \ + break; \ + case HASH_OK: \ + entry->shift = entry->shift < sh ? entry->shift : sh; \ + if (ret != HASH_UPDATED) \ + ; // XXX: error handling \ + } \ + ret = hashtable_get(wm->sh_field, &pat_arr[i][n[i] - WB_M], \ + entry); \ + switch (ret) \ + { \ + case HASH_NOTFOUND: \ + entry->shift = sh = 0; \ + entry->suff = 1; \ + entry->pref = 0; \ + entry->suff_list[0] = i; \ + ret = hashtable_put(wm->sh_field, &pat_arr[i][n[i] - WB_M], \ + entry); \ + if (ret != HASH_OK) \ + ; // XXX: error handling \ + case HASH_OK: \ + entry->shift = entry->shift < sh ? entry->shift : sh; \ + entry->suff_list[entry->suff++] = i; \ + if (ret != HASH_UPDATED) \ + ; // XXX: error handling \ + } \ + } \ + xfree(entry); + +#ifdef TRE_WCHAR +#define PROC_WM(par_arr, size_arr) \ + _PROC_WM(pat_arr, size_arr, 1, shift, m) +#define PROC_WM_WIDE(par_arr, size_arr) \ + _PROC_WM(pat_arr, size_arr, sizeof(tre_char_t), wshift, wm) +#else +#define PROC_WM(par_arr, size_arr) \ + _PROC_WM(pat_arr, size_arr, 1, shift, m) +#endif + +/* + * This file implements the Wu-Manber algorithm for pattern matching + * with multiple patterns. Even if it is not the best performing one + * for low number of patterns but it scales well and it is very simple + * compared to automaton-based multiple pattern algorithms. + */ + +int +tre_wmcomp(mregex_t *preg, size_t nr, const char *regex[], + size_t n[], int cflags) +{ + wmentry_t *entry = NULL; + wmsearch_t *wm = NULL; + int err; +#ifdef TRE_WCHAR + char **bregex; + int *bn; +#endif + + ALLOC(wm, sizeof(wmsearch_t)); + preg->n = nr; + +#ifdef TRE_WCHAR + PROC_WM_WIDE(regex, n); + + ALLOC(bregex, sizeof(char *) * nr); + ALLOC(bn, sizeof(int) * nr); + + for (int i = 0; i < nr; i++) + { + bn[i] = wcstombs(NULL, regex[i], 0); + ALLOC(bregex[i], bn[i] + 1); + ret = wcstombs(bregex[i], regex[i], bn[i]); + + /* Should never happen */ + if (ret == (size_t)-1) + { + err = REG_BADPAT; + goto fail; + } + } + PROC_WM(bregex, bn); + for (int i = 0; i < nr; i++) + xfree(bregex[i]); + xfree(bregex); +#else + PROC_WM(regex, n); +#endif + + preg->searchdata = &wm; + return REG_OK; +fail: +#ifdef TRE_WCHAR + if (wm->wshift) + hashtable_free(wm->wshift); + if (bregex) + { + for (int i = 0; i < nr; i++) + if (bregex[i] + xfree(bregex[i]); + xfree(bregex); + } +#endif + if (wm->shift) + hashtable_free(wm->shift); + if (wm) + xfree(wm); + if (entry) + xfree(entry); + return err; +} + +int +tre_wmexec(const void *str, size_t len, tre_str_type_t type, + size_t nmatch, regmatch_t pmatch[], int eflags, + const mregex_t *preg) +{ + wmsearch_t *wm = preg->wm; + wmentry_t *s_entry, *p_entry; + tre_char_t *wide_str = str; + char *byte_str = str; + size_t pos = preg->n; + size_T shift; + int ret; + int err = REG_NOMATCH; + + ALLOC(s_entry, sizeof(wmentry_t)); + ALLOC(p_entry, sizeof(wmentry_t)); + + while (pos < len) + { + if (type == STR_WIDE) + { + ret = hashtable_get(wm->wshift, wide_str[pos - WM_B], s_entry); + shift = (ret == HASH_OK) ? s_entry->shift : wm->wdefshift; + if (shift == 0) + { + ret = hashtable_get(wm->wshift, wide_str[pos - wm->wm, p_entry); + if (ret == HASH_NOTFOUND) + continue; + else + { + for (int i = 0; i < p_entry->pref; i++) + for (int j = 0; (j < s_entry->suff) && + (s_entry->suff_list[j] <= p_entry->pref_list[i]); j++) + if (s_entry->suff_list[j] == p_entry->pref_list[i]) + { + // XXX: compare + } + else + continue; + } + } + else + pos += shift; + } + else + { + ret = hashtable_get(wm->shift, byte_str[pos - WM_B], s_entry); + shift = (ret == HASH_OK) ? s_entry->shift : wm->defshift; + if (shift == 0) + { + // XXX: comapre + } + else + pos += shift; + } + } + +fail: + if (s_entry) + xfree(s_entry); + if (p_entry) + xfree(p_entry); + return err; +} Added: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h Fri Feb 3 12:39:04 2012 (r230941) @@ -0,0 +1,31 @@ +#ifndef TRE_MFASTMATCH_H +#define TRE_MFASTMATCH_H 1 + +#ifdef HAVE_CONFIG_H +#include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#define WM_MAXPAT 64 + +typedef struct { + size_t m; /* Shortest pattern length */ + size_t defsh; /* Default shift */ + void *hash; /* Wu-Manber shift table */ +#ifdef TRE_WCHAR + size_t wm; /* Shortest pattern length (wide) */ + size_t wdefsh; /* Default shift (wide) */ + void *whash; /* Wu-Manber shift table (wide) */ +#endif +} wmsearch_t; + +typedef struct { + size_t shift; /* Shift value for fragment */ + size_t suff; /* No of pats ending w/ fragment */ + uint8_t suff_list[WM_MAXPAT]; /* Pats ending w/ fragment */ + size_t pref; /* No of pats starting w/ fragment */ + uint8_t pref_list[WM_MAXPAT]; /* Pats starting w/ fragment */ +} wmentry_t; + +#endif /* TRE_MFASTMATCH_H */ Added: user/gabor/tre-integration/include/mregex.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/gabor/tre-integration/include/mregex.h Fri Feb 3 12:39:04 2012 (r230941) @@ -0,0 +1,16 @@ +#ifndef MREGEX_H +#define MREGEX_H 1 + +#include + +#include + +typedef struct { + size_t k; /* Number of patterns */ + regex_t *patterns; /* regex_t structure for each pattern */ + void *searchdata; +} mregex_t; + +#endif /* REGEX_H */ + +/* EOF */