Date: Mon, 6 Feb 2012 18:13:35 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r231094 - in user/gabor/tre-integration: contrib/tre/lib include Message-ID: <201202061813.q16IDZ9X047687@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Mon Feb 6 18:13:34 2012 New Revision: 231094 URL: http://svn.freebsd.org/changeset/base/231094 Log: Implement some missing parts of the Wu-Manber algorithm. It is almost complete now. Modified: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h user/gabor/tre-integration/include/regex.h Modified: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c Mon Feb 6 18:11:01 2012 (r231093) +++ user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.c Mon Feb 6 18:13:34 2012 (r231094) @@ -27,9 +27,18 @@ #ifdef HAVE_CONFIG_H #include <config.h> #endif /* HAVE_CONFIG_H */ +#include <mregex.h> +#include <regex.h> #include "tre-mfastmatch.h" #include "xmalloc.h" +/* TODO: + * + * - REG_ICASE + * - Store pattern and sizes in pat/wpat/siz/wsiz + * - Test + */ + #define WM_B 2 #define ALLOC(var, siz) \ @@ -40,6 +49,13 @@ goto fail; \ } +#define FAIL \ + do \ + { \ + err = REG_BADPAT; \ + goto fail; \ + } while (0) + #define _PROC_WM(pat_arr, siz_arr, char_size, sh_field, m_field) \ /* Determine shortest pattern length */ \ wm->m_field = size_arr[0]; \ @@ -71,13 +87,13 @@ entry->pref_list[0] = i; \ ret = hashtable_put(wm->sh_field, pat_arr[i], entry); \ if (ret != HASH_OK) \ - ; // XXX: error handling \ + FAIL; \ break; \ case HASH_OK: \ entry->shift = entry->shift < sh ? entry->shift : sh; \ entry->pref_list[entry->pref++] = i; \ if (ret != HASH_UPDATED) \ - ; // XXX: error handling \ + FAIL; \ } \ /* Intermediate fragments, only shift calculated */ \ for (int j = 1; j < size_arr[i] - WB_M; j++) \ @@ -93,12 +109,12 @@ ret = hashtable_put(wm->sh_field, &pat_arr[i][j], \ entry); \ if (ret != HASH_OK) \ - ; // XXX: error handling \ + FAIL; \ break; \ case HASH_OK: \ entry->shift = entry->shift < sh ? entry->shift : sh; \ if (ret != HASH_UPDATED) \ - ; // XXX: error handling \ + FAIL; \ } \ ret = hashtable_get(wm->sh_field, &pat_arr[i][n[i] - WB_M], \ entry); \ @@ -112,12 +128,12 @@ ret = hashtable_put(wm->sh_field, &pat_arr[i][n[i] - WB_M], \ entry); \ if (ret != HASH_OK) \ - ; // XXX: error handling \ + FAIL; \ case HASH_OK: \ entry->shift = entry->shift < sh ? entry->shift : sh; \ entry->suff_list[entry->suff++] = i; \ if (ret != HASH_UPDATED) \ - ; // XXX: error handling \ + FAIL; \ } \ } \ xfree(entry); @@ -204,16 +220,75 @@ fail: return err; } +#define MATCH(beg, end, p) \ + do \ + { \ + match->rm_so = beg; \ + match->rm_eo = end; \ + match->p = p; \ + err = REG_OK; \ + goto finish; \ + } while (0); + +#define _WMSEARCH(data, pats, sizes, mlen, tbl, dshift) \ + do \ + { \ + ret = hashtable_get(tbl, data[pos - WM_B], s_entry); \ + shift = (ret == HASH_OK) ? s_entry->shift : dshift; \ + if (shift == 0) \ + { \ + ret = hashtable_get(tbl, data[pos - mlen, p_entry); \ + if (ret == HASH_NOTFOUND) \ + { \ + pos += 1; \ + continue; \ + } \ + else \ + { \ + for (int i = 0; i < p_entry->pref; i++) \ + for (int j = 0; (j < s_entry->suff) && \ + (s_entry->suff_list[j] <= p_entry->pref_list[i]); \ + j++) \ + if (s_entry->suff_list[j] == p_entry->pref_list[i]) \ + { \ + size_t idx = s_entry->suff_list[j]; \ + int k; \ + \ + if (len - pos > sizes[idx] - mlen) \ + { \ + for (k = WM_B; k < sizes[idx]; k++) \ + if (pats[idx][k] != data[pos - mlen + k]) \ + break; \ + if (k == sizes[idx]) \ + // XXX: match \ + } \ + } \ + else \ + continue; \ + pos += 1; \ + } \ + } \ + else \ + pos += shift; \ + } while (0); + +#define WMSEARCH \ + _WMSEARCH(byte_str, wm->pat, wm->siz, wm->m, wm->hash, \ + wm->defsh) +#define WMSEARCH_WIDE \ + _WMSEARCH(wide_str, wm->wpats, wm->wsiz, wm->wm, wm->whash, \ + wm->wdefsh) + int tre_wmexec(const void *str, size_t len, tre_str_type_t type, size_t nmatch, regmatch_t pmatch[], int eflags, - const mregex_t *preg) + const mregex_t *preg, regmatch_t *match) { wmsearch_t *wm = preg->wm; wmentry_t *s_entry, *p_entry; tre_char_t *wide_str = str; char *byte_str = str; - size_t pos = preg->n; + size_t pos = preg->m; size_T shift; int ret; int err = REG_NOMATCH; @@ -224,47 +299,43 @@ tre_wmexec(const void *str, size_t len, while (pos < len) { if (type == STR_WIDE) - { - ret = hashtable_get(wm->wshift, wide_str[pos - WM_B], s_entry); - shift = (ret == HASH_OK) ? s_entry->shift : wm->wdefshift; - if (shift == 0) - { - ret = hashtable_get(wm->wshift, wide_str[pos - wm->wm, p_entry); - if (ret == HASH_NOTFOUND) - continue; - else - { - for (int i = 0; i < p_entry->pref; i++) - for (int j = 0; (j < s_entry->suff) && - (s_entry->suff_list[j] <= p_entry->pref_list[i]); j++) - if (s_entry->suff_list[j] == p_entry->pref_list[i]) - { - // XXX: compare - } - else - continue; - } - } - else - pos += shift; - } + WMSEARCH; else - { - ret = hashtable_get(wm->shift, byte_str[pos - WM_B], s_entry); - shift = (ret == HASH_OK) ? s_entry->shift : wm->defshift; - if (shift == 0) - { - // XXX: comapre - } - else - pos += shift; - } + WMSEARCH_WIDE; } fail: +finish: if (s_entry) xfree(s_entry); if (p_entry) xfree(p_entry); return err; } + +void +wmfree(mregex_t *preg) +{ + wmsearch_t wm = preg->wm; + + if (wm->hash) + hashtable_free(wm->hash); + for (int i = 0; i < wm->n; i++) + if (wm->pat[i]) + xfree(wm->pat[i]); + if (wm->pat) + xfree(wm->pat); + if (wm->siz) + xfree(wm->siz); +#ifdef TRE_WCHAR + if (wm->whash) + hashtable_free(wm->whash); + for (int i = 0; i < wm->wn; i++) + if (wm->wpat[i]) + xfree(wm->wpat[i]); + if (wm->wpat) + xfree(wm->wpat); + if (wm->wsiz) + xfree(wm->wsiz); +#endif +} Modified: user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h Mon Feb 6 18:11:01 2012 (r231093) +++ user/gabor/tre-integration/contrib/tre/lib/tre-mfastmatch.h Mon Feb 6 18:13:34 2012 (r231094) @@ -10,10 +10,16 @@ #define WM_MAXPAT 64 typedef struct { + char **pat; /* Patterns */ + size_t *siz; /* Pattern sizes */ + size_t n; /* No of patterns */ size_t m; /* Shortest pattern length */ size_t defsh; /* Default shift */ void *hash; /* Wu-Manber shift table */ #ifdef TRE_WCHAR + tre_char_t **wpat; /* Patterns (wide) */ + size_t wsiz; /* Pattern sizes (wide) */ + size_t wn; /* No of patterns (wide) */ size_t wm; /* Shortest pattern length (wide) */ size_t wdefsh; /* Default shift (wide) */ void *whash; /* Wu-Manber shift table (wide) */ Modified: user/gabor/tre-integration/include/regex.h ============================================================================== --- user/gabor/tre-integration/include/regex.h Mon Feb 6 18:11:01 2012 (r231093) +++ user/gabor/tre-integration/include/regex.h Mon Feb 6 18:13:34 2012 (r231094) @@ -79,6 +79,7 @@ typedef struct { } regex_t; typedef struct { + size_t p; regoff_t rm_so; regoff_t rm_eo; } regmatch_t;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201202061813.q16IDZ9X047687>