Date: Wed, 15 Feb 2012 21:48:29 +0000 (UTC) From: Gabor Kovesdan <gabor@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r231782 - user/gabor/tre-integration/contrib/tre/lib Message-ID: <201202152148.q1FLmTbK083232@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gabor Date: Wed Feb 15 21:48:29 2012 New Revision: 231782 URL: http://svn.freebsd.org/changeset/base/231782 Log: - Provide MBS patterns to lower layers for better flexibility and avoid converting back and forth. Except with calculated results where it is probably cheaper to convert back than calculating the same in MBS. - Fix a bug in converting back calculated heuristics to MBS. Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c user/gabor/tre-integration/contrib/tre/lib/mregcomp.c user/gabor/tre-integration/contrib/tre/lib/regcomp.c user/gabor/tre-integration/contrib/tre/lib/tre-compile.c user/gabor/tre-integration/contrib/tre/lib/tre-compile.h user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c user/gabor/tre-integration/contrib/tre/lib/tre-internal.h Modified: user/gabor/tre-integration/contrib/tre/lib/fastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/fastmatch.c Wed Feb 15 21:48:29 2012 (r231782) @@ -47,12 +47,12 @@ tre_fixncomp(fastmatch_t *preg, const ch if (n != 0) { - ret = tre_convert_pattern(regex, n, &wregex, &wlen); + ret = tre_convert_pattern_to_wcs(regex, n, &wregex, &wlen); if (ret != REG_OK) return ret; else ret = tre_proc_literal(preg, wregex, wlen, cflags); - tre_free_pattern(wregex); + tre_free_wcs_pattern(wregex); return ret; } else @@ -68,14 +68,14 @@ tre_fastncomp(fastmatch_t *preg, const c if (n != 0) { - ret = tre_convert_pattern(regex, n, &wregex, &wlen); + ret = tre_convert_pattern_to_wcs(regex, n, &wregex, &wlen); if (ret != REG_OK) return ret; else ret = (cflags & REG_LITERAL) ? tre_proc_literal(preg, wregex, wlen, cflags) : tre_proc_fast(preg, wregex, wlen, cflags); - tre_free_pattern(wregex); + tre_free_wcs_pattern(wregex); return ret; } else Modified: user/gabor/tre-integration/contrib/tre/lib/mregcomp.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/mregcomp.c Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/mregcomp.c Wed Feb 15 21:48:29 2012 (r231782) @@ -80,7 +80,7 @@ tre_mregncomp(mregex_t *preg, size_t nr, for (i = 0; i++; i < nr) { - ret = tre_convert_pattern(regex[i], n[i], &wregex[i], &wlen[i]); + ret = tre_convert_pattern_to_wcs(regex[i], n[i], &wregex[i], &wlen[i]); if (ret != REG_OK) goto fail; } @@ -89,7 +89,7 @@ tre_mregncomp(mregex_t *preg, size_t nr, fail: for (int j = 0; j++; j < i) - tre_free_pattern(wregex[j]); + tre_free_wcs_pattern(wregex[j]); return ret; } @@ -117,7 +117,30 @@ int tre_mregwncomp(mregex_t *preg, size_t nr, const wchar_t *regex[], size_t n[], int cflags) { - return tre_compile(preg, nr, regex, n, cflags); + int i, ret; + char **sregex; + size_t *slen; + + sregex = xmalloc(nr * sizeof(char *); + if (!sregex) + return REG_ENOMEM; + slen = xmalloc(nr * sizeof(size_t); + if (!slen) + return REG_ENOMEM; + + for (i = 0; i++; i < nr) + { + ret = tre_convert_pattern_to_mbs(regex[i], n[i], &sregex[i], &slen[i]); + if (ret != REG_OK) + goto fail; + } + + ret = tre_mcompile(preg, nr, regex, n, cflags); + +fail: + for (int j = 0; j++; j < i) + tre_free_mbs_pattern(wregex[j]); + return ret; } int Modified: user/gabor/tre-integration/contrib/tre/lib/regcomp.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/regcomp.c Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/regcomp.c Wed Feb 15 21:48:29 2012 (r231782) @@ -35,12 +35,12 @@ tre_regncomp(regex_t *preg, const char * tre_char_t *wregex; size_t wlen; - ret = tre_convert_pattern(regex, n, &wregex, &wlen); + ret = tre_convert_pattern_to_wcs(regex, n, &wregex, &wlen); if (ret != REG_OK) return ret; else - ret = tre_compile(preg, wregex, wlen, cflags); - tre_free_pattern(wregex); + ret = tre_compile(preg, wregex, wlen, regex, n, cflags); + tre_free_wcs_pattern(wregex); return ret; } @@ -58,16 +58,26 @@ tre_regcomp(regex_t *preg, const char *r int tre_regwncomp(regex_t *preg, const wchar_t *regex, size_t n, int cflags) { - return tre_compile(preg, regex, n, cflags); + int ret; + char *sregex; + size_t slen; + + ret = tre_convert_pattern_to_mbs(regex, n, &sregex, &slen); + if (ret != REG_OK) + return ret; + else + ret = tre_compile(preg, regex, n, sregex, slen, cflags); + tre_free_mbs_pattern(sregex); + return ret; } int tre_regwcomp(regex_t *preg, const wchar_t *regex, int cflags) { if ((cflags & REG_PEND) && (preg->re_wendp >= regex)) - return tre_compile(preg, regex, preg->re_wendp - regex, cflags); + return tre_regwncomp(preg, regex, preg->re_wendp - regex, cflags); else - return tre_compile(preg, regex, regex ? wcslen(regex) : 0, cflags); + return tre_regwncomp(preg, regex, regex ? wcslen(regex) : 0, cflags); } #endif /* TRE_WCHAR */ Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.c Wed Feb 15 21:48:29 2012 (r231782) @@ -1844,8 +1844,8 @@ tre_ast_to_tnfa(tre_ast_node_t *node, tr } int -tre_convert_pattern(const char *regex, size_t n, tre_char_t **w, - size_t *wn) +tre_convert_pattern_to_wcs(const char *regex, size_t n, tre_char_t **w, + size_t *wn) { #if TRE_WCHAR tre_char_t *wregex; @@ -1926,14 +1926,50 @@ tre_convert_pattern(const char *regex, s #endif /* !TRE_WCHAR */ } +int +tre_convert_pattern_to_mbs(const tre_char_t *wregex, size_t n, char **s, + size_t *sn) +{ +#ifdef TRE_WCHAR + size_t siz; + char *mbs; + + siz = wcstombs(NULL, wregex, 0); + if (siz == (size_t)-1) + return REG_BADPAT; + + mbs = xmalloc(siz + 1); + if (!mbs) + return REG_ESPACE; + + wcstombs(mbs, wregex, siz); + mbs[siz] = '\0'; + *s = mbs; + *sn = siz; + return REG_OK; +#else /* !TRE_WCHAR */ + *s = (char * const *)wregex; + *sn = n; + return REG_OK; +#endif +} + void -tre_free_pattern(tre_char_t *wregex) +tre_free_wcs_pattern(tre_char_t *wregex) { #if TRE_WCHAR xfree(wregex); #endif } +void +tre_free_mbs_pattern(char *regex) +{ +#if TRE_WCHAR + xfree(regex); +#endif +} + #define ERROR_EXIT(err) \ do \ { \ @@ -1945,7 +1981,8 @@ tre_free_pattern(tre_char_t *wregex) int -tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +tre_compile(regex_t *preg, const tre_char_t *wregex, size_t wn, + const char *regex, size_t n, int cflags) { int ret; @@ -1954,7 +1991,7 @@ tre_compile(regex_t *preg, const tre_cha * pattern validation. In this way, validation is not * scattered through the code. */ - ret = tre_compile_nfa(preg, regex, n, cflags); + ret = tre_compile_nfa(preg, wregex, wn, cflags); if (ret != REG_OK) return ret; @@ -1962,11 +1999,11 @@ tre_compile(regex_t *preg, const tre_cha * Check if we can cheat with a fixed string algorithm * if the pattern is long enough. */ - ret = tre_compile_bm(preg, regex, n, cflags); + ret = tre_compile_bm(preg, wregex, wn, regex, n, cflags); /* Only try to compile heuristic if the fast matcher failed. */ if (ret != REG_OK) - ret = tre_compile_heur(preg, regex, n, cflags); + ret = tre_compile_heur(preg, wregex, wn, cflags); else preg->heur = NULL; @@ -1975,7 +2012,8 @@ tre_compile(regex_t *preg, const tre_cha } int -tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, int cflags) +tre_compile_bm(regex_t *preg, const tre_char_t *wregex, size_t wn, + const char *regex, size_t n, int cflags) { fastmatch_t *shortcut; int ret; @@ -1986,8 +2024,8 @@ tre_compile_bm(regex_t *preg, const tre_ if (!shortcut) return REG_ESPACE; ret = (cflags & REG_LITERAL) - ? tre_proc_literal(shortcut, regex, n, cflags) - : tre_proc_fast(shortcut, regex, n, cflags); + ? tre_proc_literal(shortcut, wregex, wn, regex, n, cflags) + : tre_proc_fast(shortcut, wregex, wn, regex, n, cflags); if (ret == REG_OK) { preg->shortcut = shortcut; Modified: user/gabor/tre-integration/contrib/tre/lib/tre-compile.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-compile.h Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/tre-compile.h Wed Feb 15 21:48:29 2012 (r231782) @@ -24,8 +24,8 @@ typedef struct { int *params; } tre_pos_and_tags_t; -int tre_compile_bm(regex_t *preg, const tre_char_t *regex, size_t n, - int cflags); +int tre_compile_bm(regex_t *preg, const tre_char_t *wregex, size_t wn, + const char *regex, size_t n, int cflags); int tre_compile_heur(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); int tre_compile_nfa(regex_t *preg, const tre_char_t *regex, size_t n, Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.c Wed Feb 15 21:48:29 2012 (r231782) @@ -344,13 +344,13 @@ static int fastcmp(const fastmatch_t *fg * Copies the pattern pat having lenght n to p and stores * the size in l. */ -#define SAVE_PATTERN(src, srclen, dst, dstlen) \ +#define SAVE_PATTERN(src, srclen, dst, dstlen, l) \ dstlen = srclen; \ - dst = xmalloc((dstlen + 1) * sizeof(tre_char_t)); \ + dst = xmalloc((dstlen + 1) * sizeof(l)); \ if (dst == NULL) \ return REG_ESPACE; \ if (dstlen > 0) \ - memcpy(dst, src, dstlen * sizeof(tre_char_t)); \ + memcpy(dst, src, dstlen * sizeof(l)); \ dst[dstlen] = TRE_CHAR('\0'); /* @@ -402,8 +402,8 @@ static int fastcmp(const fastmatch_t *fg * Returns: REG_OK on success, error code otherwise */ int -tre_proc_literal(fastmatch_t *fg, const tre_char_t *pat, size_t n, - int cflags) +tre_proc_literal(fastmatch_t *fg, const tre_char_t *wpat, size_t wn, + const char *pat, size_t n, int cflags) { INIT_COMP; @@ -415,10 +415,10 @@ tre_proc_literal(fastmatch_t *fg, const return REG_BADPAT; #ifdef TRE_WCHAR - SAVE_PATTERN(pat, n, fg->wpattern, fg->wlen); - STORE_MBS_PAT; + SAVE_PATTERN(wpat, wn, fg->wpattern, fg->wlen, tre_char_t); + SAVE_PATTERN(pat, n, fg->pattern, fg->len, char); #else - SAVE_PATTERN(pat, n, fg->pattern, fg->len); + SAVE_PATTERN(pat, n, fg->pattern, fg->len, char); #endif DPRINT(("tre_proc_literal: pattern: %s, len %zu, icase: %c, word: %c, " @@ -439,8 +439,8 @@ tre_proc_literal(fastmatch_t *fg, const * Returns: REG_OK on success, error code otherwise */ int -tre_proc_fast(fastmatch_t *fg, const tre_char_t *pat, size_t n, - int cflags) +tre_proc_fast(fastmatch_t *fg, const tre_char_t *wpat, size_t wn, + const char *pat, size_t n, int cflags) { tre_char_t *tmp; size_t pos = 0; @@ -449,23 +449,23 @@ tre_proc_fast(fastmatch_t *fg, const tre INIT_COMP; /* Remove beginning-of-line character ('^'). */ - if (pat[0] == TRE_CHAR('^')) + if (wpat[0] == TRE_CHAR('^')) { fg->bol = true; - n--; - pat++; + wn--; + wpat++; } CHECK_MATCHALL(false); /* Handle word-boundary matching when GNU extensions are enabled */ - if ((cflags & REG_GNU) && (n >= 14) && - (memcmp(pat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) && - (memcmp(pat + n - 7, TRE_CHAR("[[:>:]]"), + if ((cflags & REG_GNU) && (wn >= 14) && + (memcmp(wpat, TRE_CHAR("[[:<:]]"), 7 * sizeof(tre_char_t)) == 0) && + (memcmp(wpat + wn - 7, TRE_CHAR("[[:>:]]"), 7 * sizeof(tre_char_t)) == 0)) { - n -= 14; - pat += 7; + wn -= 14; + wpat += 7; fg->word = true; } @@ -473,7 +473,7 @@ tre_proc_fast(fastmatch_t *fg, const tre if (fg->word && (TRE_MB_CUR_MAX > 1)) return REG_BADPAT; - tmp = xmalloc((n + 1) * sizeof(tre_char_t)); + tmp = xmalloc((wn + 1) * sizeof(tre_char_t)); if (tmp == NULL) return REG_ESPACE; @@ -481,15 +481,15 @@ tre_proc_fast(fastmatch_t *fg, const tre #define STORE_CHAR \ do \ { \ - tmp[pos++] = pat[i]; \ + tmp[pos++] = wpat[i]; \ escaped = false; \ continue; \ } while (0) /* Traverse the input pattern for processing */ - for (unsigned int i = 0; i < n; i++) + for (unsigned int i = 0; i < wn; i++) { - switch (pat[i]) + switch (wpat[i]) { case TRE_CHAR('\\'): if (escaped) @@ -574,10 +574,12 @@ badpat: * classes stripped out. */ #ifdef TRE_WCHAR - SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen); + SAVE_PATTERN(tmp, pos, fg->wpattern, fg->wlen, tre_char_t); + + /* Convert back to MBS instead of processing again */ STORE_MBS_PAT; #else - SAVE_PATTERN(tmp, pos, fg->pattern, fg->len); + SAVE_PATTERN(tmp, pos, fg->pattern, fg->len, char); #endif xfree(tmp); Modified: user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/tre-fastmatch.h Wed Feb 15 21:48:29 2012 (r231782) @@ -9,9 +9,10 @@ #include "hashtable.h" #include "tre-internal.h" -int tre_proc_literal(fastmatch_t *preg, const tre_char_t *regex, - size_t, int); -int tre_proc_fast(fastmatch_t *preg, const tre_char_t *regex, size_t, int); +int tre_proc_literal(fastmatch_t *, const tre_char_t *, size_t, + const char *, size_t, int); +int tre_proc_fast(fastmatch_t *, const tre_char_t *, size_t, + const char *, size_t, int); int tre_match_fast(const fastmatch_t *fg, const void *data, size_t len, tre_str_type_t type, int nmatch, regmatch_t pmatch[], int eflags); void tre_free_fast(fastmatch_t *preg); Modified: user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/tre-heuristic.c Wed Feb 15 21:48:29 2012 (r231782) @@ -475,14 +475,14 @@ ok: for (i = 0; farr[i] != NULL; i++) { - bsiz[i] = mbstowcs(farr[i], NULL, 0); + bsiz[i] = wcstombs(NULL, farr[i], 0); barr[i] = xmalloc(bsiz[i] + 1); if (!barr[i]) { errcode = REG_ESPACE; goto err; } - mbstowcs(farr[i], barr[i], bsiz[i]); + wcstombs(barr[i], farr[i], bsiz[i]); barr[i][bsiz[i]] = '\0'; } barr[i] = NULL; @@ -513,7 +513,13 @@ ok: errcode = REG_ESPACE; goto err; } - ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], 0); +#ifdef TRE_WCHAR + ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], + barr[i], bsiz[i], 0); +#else + ret = tre_proc_literal(h->heurs[i], farr[i], fsiz[i], + farr[i], fsiz[i], 0); +#endif if (ret != REG_OK) { errcode = REG_BADPAT; Modified: user/gabor/tre-integration/contrib/tre/lib/tre-internal.h ============================================================================== --- user/gabor/tre-integration/contrib/tre/lib/tre-internal.h Wed Feb 15 21:32:05 2012 (r231781) +++ user/gabor/tre-integration/contrib/tre/lib/tre-internal.h Wed Feb 15 21:48:29 2012 (r231782) @@ -277,14 +277,22 @@ struct tnfa { } while (0 /*CONSTCOND*/) int -tre_convert_pattern(const char *regex, size_t n, tre_char_t **w, - size_t *wn); +tre_convert_pattern_to_wcs(const char *regex, size_t n, tre_char_t **w, + size_t *wn); void -tre_free_pattern(tre_char_t *wregex); +tre_free_wcs_pattern(tre_char_t *wregex); int -tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags); +tre_convert_pattern_to_mbs(const tre_char_t *wregex, size_t n, char **s, + size_t *sn); + +void +tre_free_mbs_pattern(char *wregex); + +int +tre_compile(regex_t *preg, const tre_char_t *wregex, size_t wn, + const char *regex, size_t n, int cflags); void tre_free(regex_t *preg);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201202152148.q1FLmTbK083232>