Date: Sun, 4 May 2014 16:50:01 GMT From: Pedro Giffuni <pfg@freebsd.org> To: freebsd-bugs@FreeBSD.org Subject: Re: kern/169302: [libc] [patch] Applied MidnightBSD regex memory consumption limits Message-ID: <201405041650.s44Go1M3064711@freefall.freebsd.org>
next in thread | raw e-mail | index | archive | help
The following reply was made to PR kern/169302; it has been noted by GNATS. From: Pedro Giffuni <pfg@freebsd.org> To: "bug-followup@FreeBSD.org" <bug-followup@FreeBSD.org>, "zblacher@sandvine.com" <zblacher@sandvine.com> Cc: Subject: Re: kern/169302: [libc] [patch] Applied MidnightBSD regex memory consumption limits Date: Sun, 04 May 2014 11:41:45 -0500 This is a multi-part message in MIME format. --------------020708080805040202010709 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Updated patch, based on the NetBSD changes. Also use calloc(1) as in OpenBSD. --------------020708080805040202010709 Content-Type: text/plain; charset=us-ascii; name="patch-regex-pr169302.txt" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="patch-regex-pr169302.txt" Index: lib/libc/regex/engine.c =================================================================== --- lib/libc/regex/engine.c (revision 265307) +++ lib/libc/regex/engine.c (working copy) @@ -219,7 +219,7 @@ } else { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && - stop - dp >= g->mlen && + (size_t)(stop - dp) >= g->mlen && memcmp(dp, g->must, (size_t)g->mlen) == 0) break; if (dp == stop) /* we didn't find g->must */ Index: lib/libc/regex/regcomp.c =================================================================== --- lib/libc/regex/regcomp.c (revision 265307) +++ lib/libc/regex/regcomp.c (working copy) @@ -86,11 +86,11 @@ #endif /* === regcomp.c === */ -static void p_ere(struct parse *p, int stop); -static void p_ere_exp(struct parse *p); +static void p_ere(struct parse *p, int stop, size_t reclimit); +static void p_ere_exp(struct parse *p, size_t reclimit); static void p_str(struct parse *p); -static void p_bre(struct parse *p, int end1, int end2); -static int p_simp_re(struct parse *p, int starordinary); +static void p_bre(struct parse *p, int end1, int end2, size_t reclimit); +static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); static int p_count(struct parse *p); static void p_bracket(struct parse *p); static void p_b_term(struct parse *p, cset *cs); @@ -102,7 +102,7 @@ static void bothcases(struct parse *p, wint_t ch); static void ordinary(struct parse *p, wint_t ch); static void nonnewline(struct parse *p); -static void repeat(struct parse *p, sopno start, int from, int to); +static void repeat(struct parse *p, sopno start, int from, int to, size_t reclimit); static int seterr(struct parse *p, int e); static cset *allocset(struct parse *p); static void freeset(struct parse *p, cset *cs); @@ -167,6 +167,13 @@ #define never 0 /* some <assert.h>s have bugs too */ #endif +#define MEMLIMIT 0x8000000 +#define MEMSIZE(p) \ + ((p)->ncsalloc / CHAR_BIT * NC + \ + (p)->ncsalloc * sizeof(cset) + \ + (p)->ssize * sizeof(sop)) +#define RECLIMIT 256 + /* Macro used by computejump()/computematchjump() */ #define MIN(a,b) ((a)<(b)?(a):(b)) @@ -214,7 +221,7 @@ if (g == NULL) return(REG_ESPACE); p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ - p->strip = (sop *)malloc(p->ssize * sizeof(sop)); + p->strip = calloc(p->ssize, sizeof(sop)); p->slen = 0; if (p->strip == NULL) { free((char *)g); @@ -249,11 +256,11 @@ EMIT(OEND, 0); g->firststate = THERE(); if (cflags®_EXTENDED) - p_ere(p, OUT); + p_ere(p, OUT, 0); else if (cflags®_NOSPEC) p_str(p); else - p_bre(p, OUT, OUT); + p_bre(p, OUT, OUT, 0); EMIT(OEND, 0); g->laststate = THERE(); @@ -294,7 +301,8 @@ */ static void p_ere(struct parse *p, - int stop) /* character this ERE should end at */ + int stop, /* character this ERE should end at */ + size_t reclimit) { char c; sopno prevback; @@ -302,11 +310,16 @@ sopno conc; int first = 1; /* is this the first alternative? */ + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + for (;;) { /* do a bunch of concatenated expressions */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) - p_ere_exp(p); + p_ere_exp(p, reclimit); (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) @@ -335,10 +348,10 @@ /* - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op - == static void p_ere_exp(struct parse *p); + == static void p_ere_exp(struct parse *p, size_t reclimit); */ static void -p_ere_exp(struct parse *p) +p_ere_exp(struct parse *p, size_t reclimit) { char c; wint_t wc; @@ -361,7 +374,7 @@ p->pbegin[subno] = HERE(); EMIT(OLPAREN, subno); if (!SEE(')')) - p_ere(p, ')'); + p_ere(p, ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -465,7 +478,7 @@ count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); @@ -499,7 +512,7 @@ /* - p_bre - BRE parser top level, anchoring and concatenation == static void p_bre(struct parse *p, int end1, \ - == int end2); + == int end2, size_t reclimit); * Giving end1 as OUT essentially eliminates the end1/end2 check. * * This implementation is a bit of a kludge, in that a trailing $ is first @@ -509,8 +522,14 @@ static void p_bre(struct parse *p, int end1, /* first terminating character */ - int end2) /* second terminating character */ + int end2, /* second terminating character */ + size_t reclimit) { + if (reclimit++ > RECLIMIT || p->error == REG_ESPACE) { + p->error = REG_ESPACE; + return; + } + sopno start = HERE(); int first = 1; /* first subexpression? */ int wasdollar = 0; @@ -521,7 +540,7 @@ p->g->nbol++; } while (MORE() && !SEETWO(end1, end2)) { - wasdollar = p_simp_re(p, first); + wasdollar = p_simp_re(p, first, reclimit); first = 0; } if (wasdollar) { /* oops, that was a trailing anchor */ @@ -536,11 +555,12 @@ /* - p_simp_re - parse a simple RE, an atom possibly followed by a repetition - == static int p_simp_re(struct parse *p, int starordinary); + == static int p_simp_re(struct parse *p, int starordinary, size_t reclimit); */ static int /* was the simple RE an unbackslashed $? */ p_simp_re(struct parse *p, - int starordinary) /* is a leading * an ordinary character? */ + int starordinary, /* is a leading * an ordinary character? */ + size_t reclimit) { int c; int count; @@ -580,7 +600,7 @@ EMIT(OLPAREN, subno); /* the MORE here is an error heuristic */ if (MORE() && !SEETWO('\\', ')')) - p_bre(p, '\\', ')'); + p_bre(p, '\\', ')', reclimit); if (subno < NPAREN) { p->pend[subno] = HERE(); assert(p->pend[subno] != 0); @@ -641,7 +661,7 @@ count2 = INFINITY; } else /* just a single number */ count2 = count; - repeat(p, pos, count, count2); + repeat(p, pos, count, count2, 0); if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); @@ -996,13 +1016,15 @@ /* - repeat - generate code for a bounded repetition, recursively if needed - == static void repeat(struct parse *p, sopno start, int from, int to); + == static void repeat(struct parse *p, sopno start, int from, int to, + == size_t reclimit ); */ static void repeat(struct parse *p, sopno start, /* operand from here to end of strip */ int from, /* repeated from this number */ - int to) /* to this number of times (maybe INFINITY) */ + int to, /* to this number of times (maybe INFINITY) */ + size_t reclimit) { sopno finish = HERE(); # define N 2 @@ -1011,7 +1033,9 @@ # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) sopno copy; - if (p->error != 0) /* head off possible runaway recursion */ + if (reclimit++ > RECLIMIT) + p->error = REG_ESPACE; + if (p->error) return; assert(from <= to); @@ -1025,7 +1049,7 @@ case REP(0, INF): /* as x{1,}? */ /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ INSERT(OCH_, start); /* offset is wrong... */ - repeat(p, start+1, 1, to); + repeat(p, start+1, 1, to, reclimit); ASTERN(OOR1, start); AHEAD(start); /* ... fix it */ EMIT(OOR2, 0); @@ -1045,7 +1069,7 @@ ASTERN(O_CH, THERETHERE()); copy = dupl(p, start+1, finish+1); assert(copy == finish+4); - repeat(p, copy, 1, to-1); + repeat(p, copy, 1, to-1, reclimit); break; case REP(1, INF): /* as x+ */ INSERT(OPLUS_, start); @@ -1053,11 +1077,11 @@ break; case REP(N, N): /* as xx{m-1,n-1} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to-1); + repeat(p, copy, from-1, to-1, reclimit); break; case REP(N, INF): /* as xx{n-1,INF} */ copy = dupl(p, start, finish); - repeat(p, copy, from-1, to); + repeat(p, copy, from-1, to, reclimit); break; default: /* "can't happen" */ SETERROR(REG_ASSERT); /* just in case */ @@ -1112,8 +1136,13 @@ { cset *cs, *ncs; + + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; + ncs = realloc(p->g->sets, (p->g->ncsets + 1) * sizeof(*ncs)); if (ncs == NULL) { +oomem: SETERROR(REG_ESPACE); return (NULL); } @@ -1347,17 +1376,22 @@ enlarge(struct parse *p, sopno size) { sop *sp; + sopno osize; if (p->ssize >= size) return 1; - + osize = p->ssize; + p->ssize = size; + if (MEMSIZE(p) > MEMLIMIT) + goto oomem; sp = (sop *)realloc(p->strip, size*sizeof(sop)); if (sp == NULL) { +oomem: + p->ssize = osize; SETERROR(REG_ESPACE); return 0; } p->strip = sp; - p->ssize = size; return 1; } Index: lib/libc/regex/regex2.h =================================================================== --- lib/libc/regex/regex2.h (revision 265307) +++ lib/libc/regex/regex2.h (working copy) @@ -73,7 +73,7 @@ * immediately *preceding* "execution" of that operator. */ typedef unsigned long sop; /* strip operator */ -typedef long sopno; +typedef size_t sopno; #define OPRMASK 0xf8000000L #define OPDMASK 0x07ffffffL #define OPSHIFT ((unsigned)27) @@ -165,7 +165,7 @@ int magic; # define MAGIC2 ((('R'^0200)<<8)|'E') sop *strip; /* malloced area for strip */ - int ncsets; /* number of csets in use */ + size_t ncsets; /* number of csets in use */ cset *sets; /* -> cset [ncsets] */ int cflags; /* copy of regcomp() cflags argument */ sopno nstates; /* = number of sops */ @@ -175,13 +175,13 @@ # define USEBOL 01 /* used ^ */ # define USEEOL 02 /* used $ */ # define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ + size_t nbol; /* number of ^ used */ + size_t neol; /* number of $ used */ char *must; /* match must contain this string */ int moffset; /* latest point at which must may be located */ int *charjump; /* Boyer-Moore char jump table */ int *matchjump; /* Boyer-Moore match jump table */ - int mlen; /* length of must */ + size_t mlen; /* length of must */ size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ sopno nplus; /* how deep does it nest +s? */ --------------020708080805040202010709--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201405041650.s44Go1M3064711>