FreeBSD Mail Archives

Date:      Sat, 2 Jan 2021 06:01:03 GMT
From:      Kyle Evans <kevans@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 8929690a6359 - main - contrib: remove libgnuregex
Message-ID:  <202101020601.1026131n081839@gitrepo.freebsd.org>

index | next in thread | raw e-mail


The branch main has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=8929690a6359dfba4ed398ef66a4c13e071f10bf

commit 8929690a6359dfba4ed398ef66a4c13e071f10bf
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2021-01-02 05:59:21 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2021-01-02 06:00:58 +0000

    contrib: remove libgnuregex
    
    This should have been a part of 47d1ad2413da, but it was overlooked. All of
    the build bits have been previously removed, and nothing references this
    anymore.
---
 contrib/libgnuregex/regcomp.c        | 3856 ------------------------------
 contrib/libgnuregex/regex.c          |   76 -
 contrib/libgnuregex/regex.h          |  582 -----
 contrib/libgnuregex/regex_internal.c | 1732 --------------
 contrib/libgnuregex/regex_internal.h |  769 ------
 contrib/libgnuregex/regexec.c        | 4380 ----------------------------------
 6 files changed, 11395 deletions(-)

diff --git a/contrib/libgnuregex/regcomp.c b/contrib/libgnuregex/regcomp.c
deleted file mode 100644
index 82b50ad1dd3b..000000000000
--- a/contrib/libgnuregex/regcomp.c
+++ /dev/null
@@ -1,3856 +0,0 @@
-/* Extended regular expression matching and search library.
-   Copyright (C) 2002-2007,2009,2010,2011,2012 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
-					  size_t length, reg_syntax_t syntax);
-static void re_compile_fastmap_iter (regex_t *bufp,
-				     const re_dfastate_t *init_state,
-				     char *fastmap);
-static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
-#ifdef RE_ENABLE_I18N
-static void free_charset (re_charset_t *cset);
-#endif /* RE_ENABLE_I18N */
-static void free_workarea_compile (regex_t *preg);
-static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-#ifdef RE_ENABLE_I18N
-static void optimize_utf8 (re_dfa_t *dfa);
-#endif
-static reg_errcode_t analyze (regex_t *preg);
-static reg_errcode_t preorder (bin_tree_t *root,
-			       reg_errcode_t (fn (void *, bin_tree_t *)),
-			       void *extra);
-static reg_errcode_t postorder (bin_tree_t *root,
-				reg_errcode_t (fn (void *, bin_tree_t *)),
-				void *extra);
-static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
-static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
-static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
-				 bin_tree_t *node);
-static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
-static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
-static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
-static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
-static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
-				   unsigned int constraint);
-static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
-static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
-					 int node, int root);
-static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
-static int fetch_number (re_string_t *input, re_token_t *token,
-			 reg_syntax_t syntax);
-static int peek_token (re_token_t *token, re_string_t *input,
-			reg_syntax_t syntax) internal_function;
-static bin_tree_t *parse (re_string_t *regexp, regex_t *preg,
-			  reg_syntax_t syntax, reg_errcode_t *err);
-static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg,
-				  re_token_t *token, reg_syntax_t syntax,
-				  int nest, reg_errcode_t *err);
-static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg,
-				 re_token_t *token, reg_syntax_t syntax,
-				 int nest, reg_errcode_t *err);
-static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg,
-				     re_token_t *token, reg_syntax_t syntax,
-				     int nest, reg_errcode_t *err);
-static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg,
-				  re_token_t *token, reg_syntax_t syntax,
-				  int nest, reg_errcode_t *err);
-static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp,
-				 re_dfa_t *dfa, re_token_t *token,
-				 reg_syntax_t syntax, reg_errcode_t *err);
-static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
-				      re_token_t *token, reg_syntax_t syntax,
-				      reg_errcode_t *err);
-static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
-					    re_string_t *regexp,
-					    re_token_t *token, int token_len,
-					    re_dfa_t *dfa,
-					    reg_syntax_t syntax,
-					    int accept_hyphen);
-static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
-					  re_string_t *regexp,
-					  re_token_t *token);
-#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_equiv_class (bitset_t sbcset,
-					re_charset_t *mbcset,
-					int *equiv_class_alloc,
-					const unsigned char *name);
-static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
-				      bitset_t sbcset,
-				      re_charset_t *mbcset,
-				      int *char_class_alloc,
-				      const unsigned char *class_name,
-				      reg_syntax_t syntax);
-#else  /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (bitset_t sbcset,
-					const unsigned char *name);
-static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
-				      bitset_t sbcset,
-				      const unsigned char *class_name,
-				      reg_syntax_t syntax);
-#endif /* not RE_ENABLE_I18N */
-static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
-				       RE_TRANSLATE_TYPE trans,
-				       const unsigned char *class_name,
-				       const unsigned char *extra,
-				       int non_match, reg_errcode_t *err);
-static bin_tree_t *create_tree (re_dfa_t *dfa,
-				bin_tree_t *left, bin_tree_t *right,
-				re_token_type_t type);
-static bin_tree_t *create_token_tree (re_dfa_t *dfa,
-				      bin_tree_t *left, bin_tree_t *right,
-				      const re_token_t *token);
-static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
-static void free_token (re_token_t *node);
-static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
-static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
-
-/* This table gives an error message for each of the error codes listed
-   in regex.h.  Obviously the order here has to be same as there.
-   POSIX doesn't require that we do anything for REG_NOERROR,
-   but why not be nice?  */
-
-const char __re_error_msgid[] attribute_hidden =
-  {
-#define REG_NOERROR_IDX	0
-    gettext_noop ("Success")	/* REG_NOERROR */
-    "\0"
-#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
-    gettext_noop ("No match")	/* REG_NOMATCH */
-    "\0"
-#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")
-    gettext_noop ("Invalid regular expression") /* REG_BADPAT */
-    "\0"
-#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
-    gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
-    "\0"
-#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")
-    gettext_noop ("Invalid character class name") /* REG_ECTYPE */
-    "\0"
-#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")
-    gettext_noop ("Trailing backslash") /* REG_EESCAPE */
-    "\0"
-#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")
-    gettext_noop ("Invalid back reference") /* REG_ESUBREG */
-    "\0"
-#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")
-    gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */
-    "\0"
-#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
-    gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
-    "\0"
-#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
-    gettext_noop ("Unmatched \\{") /* REG_EBRACE */
-    "\0"
-#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")
-    gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
-    "\0"
-#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
-    gettext_noop ("Invalid range end")	/* REG_ERANGE */
-    "\0"
-#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")
-    gettext_noop ("Memory exhausted") /* REG_ESPACE */
-    "\0"
-#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")
-    gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
-    "\0"
-#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
-    gettext_noop ("Premature end of regular expression") /* REG_EEND */
-    "\0"
-#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")
-    gettext_noop ("Regular expression too big") /* REG_ESIZE */
-    "\0"
-#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")
-    gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
-  };
-
-const size_t __re_error_msgid_idx[] attribute_hidden =
-  {
-    REG_NOERROR_IDX,
-    REG_NOMATCH_IDX,
-    REG_BADPAT_IDX,
-    REG_ECOLLATE_IDX,
-    REG_ECTYPE_IDX,
-    REG_EESCAPE_IDX,
-    REG_ESUBREG_IDX,
-    REG_EBRACK_IDX,
-    REG_EPAREN_IDX,
-    REG_EBRACE_IDX,
-    REG_BADBR_IDX,
-    REG_ERANGE_IDX,
-    REG_ESPACE_IDX,
-    REG_BADRPT_IDX,
-    REG_EEND_IDX,
-    REG_ESIZE_IDX,
-    REG_ERPAREN_IDX
-  };
-
-/* Entry points for GNU code.  */
-
-/* re_compile_pattern is the GNU regular expression compiler: it
-   compiles PATTERN (of length LENGTH) and puts the result in BUFP.
-   Returns 0 if the pattern was valid, otherwise an error string.
-
-   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
-   are set in BUFP on entry.  */
-
-const char *
-re_compile_pattern (pattern, length, bufp)
-    const char *pattern;
-    size_t length;
-    struct re_pattern_buffer *bufp;
-{
-  reg_errcode_t ret;
-
-  /* And GNU code determines whether or not to get register information
-     by passing null for the REGS argument to re_match, etc., not by
-     setting no_sub, unless RE_NO_SUB is set.  */
-  bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
-
-  /* Match anchors at newline.  */
-  bufp->newline_anchor = 1;
-
-  ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
-
-  if (!ret)
-    return NULL;
-  return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
-}
-#ifdef _LIBC
-weak_alias (__re_compile_pattern, re_compile_pattern)
-#endif
-
-/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
-   also be assigned to arbitrarily: each pattern buffer stores its own
-   syntax, so it can be changed between regex compilations.  */
-/* This has no initializer because initialized variables in Emacs
-   become read-only after dumping.  */
-reg_syntax_t re_syntax_options;
-
-
-/* Specify the precise syntax of regexps for compilation.  This provides
-   for compatibility for various utilities which historically have
-   different, incompatible syntaxes.
-
-   The argument SYNTAX is a bit mask comprised of the various bits
-   defined in regex.h.  We return the old syntax.  */
-
-reg_syntax_t
-re_set_syntax (syntax)
-    reg_syntax_t syntax;
-{
-  reg_syntax_t ret = re_syntax_options;
-
-  re_syntax_options = syntax;
-  return ret;
-}
-#ifdef _LIBC
-weak_alias (__re_set_syntax, re_set_syntax)
-#endif
-
-int
-re_compile_fastmap (bufp)
-    struct re_pattern_buffer *bufp;
-{
-  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
-  char *fastmap = bufp->fastmap;
-
-  memset (fastmap, '\0', sizeof (char) * SBC_MAX);
-  re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
-  if (dfa->init_state != dfa->init_state_word)
-    re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
-  if (dfa->init_state != dfa->init_state_nl)
-    re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
-  if (dfa->init_state != dfa->init_state_begbuf)
-    re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
-  bufp->fastmap_accurate = 1;
-  return 0;
-}
-#ifdef _LIBC
-weak_alias (__re_compile_fastmap, re_compile_fastmap)
-#endif
-
-static inline void
-__attribute ((always_inline))
-re_set_fastmap (char *fastmap, int icase, int ch)
-{
-  fastmap[ch] = 1;
-  if (icase)
-    fastmap[tolower (ch)] = 1;
-}
-
-/* Helper function for re_compile_fastmap.
-   Compile fastmap for the initial_state INIT_STATE.  */
-
-static void
-re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
-			 char *fastmap)
-{
-  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
-  int node_cnt;
-  int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
-  for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
-    {
-      int node = init_state->nodes.elems[node_cnt];
-      re_token_type_t type = dfa->nodes[node].type;
-
-      if (type == CHARACTER)
-	{
-	  re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
-#ifdef RE_ENABLE_I18N
-	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
-	    {
-	      unsigned char *buf = alloca (dfa->mb_cur_max), *p;
-	      wchar_t wc;
-	      mbstate_t state;
-
-	      p = buf;
-	      *p++ = dfa->nodes[node].opr.c;
-	      while (++node < dfa->nodes_len
-		     &&	dfa->nodes[node].type == CHARACTER
-		     && dfa->nodes[node].mb_partial)
-		*p++ = dfa->nodes[node].opr.c;
-	      memset (&state, '\0', sizeof (state));
-	      if (__mbrtowc (&wc, (const char *) buf, p - buf,
-			     &state) == p - buf
-		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
-		      != (size_t) -1))
-		re_set_fastmap (fastmap, 0, buf[0]);
-	    }
-#endif
-	}
-      else if (type == SIMPLE_BRACKET)
-	{
-	  int i, ch;
-	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
-	    {
-	      int j;
-	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
-	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
-		if (w & ((bitset_word_t) 1 << j))
-		  re_set_fastmap (fastmap, icase, ch);
-	    }
-	}
-#ifdef RE_ENABLE_I18N
-      else if (type == COMPLEX_BRACKET)
-	{
-	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
-	  int i;
-
-# ifdef _LIBC
-	  /* See if we have to try all bytes which start multiple collation
-	     elements.
-	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
-		  collation element, and don't catch 'b' since 'b' is
-		  the only collation element which starts from 'b' (and
-		  it is caught by SIMPLE_BRACKET).  */
-	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
-		  && (cset->ncoll_syms || cset->nranges))
-		{
-		  const int32_t *table = (const int32_t *)
-		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-		  for (i = 0; i < SBC_MAX; ++i)
-		    if (table[i] < 0)
-		      re_set_fastmap (fastmap, icase, i);
-		}
-# endif /* _LIBC */
-
-	  /* See if we have to start the match at all multibyte characters,
-	     i.e. where we would not find an invalid sequence.  This only
-	     applies to multibyte character sets; for single byte character
-	     sets, the SIMPLE_BRACKET again suffices.  */
-	  if (dfa->mb_cur_max > 1
-	      && (cset->nchar_classes || cset->non_match || cset->nranges
-# ifdef _LIBC
-		  || cset->nequiv_classes
-# endif /* _LIBC */
-		 ))
-	    {
-	      unsigned char c = 0;
-	      do
-		{
-		  mbstate_t mbs;
-		  memset (&mbs, 0, sizeof (mbs));
-		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
-		    re_set_fastmap (fastmap, false, (int) c);
-		}
-	      while (++c != 0);
-	    }
-
-	  else
-	    {
-	      /* ... Else catch all bytes which can start the mbchars.  */
-	      for (i = 0; i < cset->nmbchars; ++i)
-		{
-		  char buf[256];
-		  mbstate_t state;
-		  memset (&state, '\0', sizeof (state));
-		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
-		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
-		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
-		    {
-		      if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
-			  != (size_t) -1)
-			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
-		    }
-		}
-	    }
-	}
-#endif /* RE_ENABLE_I18N */
-      else if (type == OP_PERIOD
-#ifdef RE_ENABLE_I18N
-	       || type == OP_UTF8_PERIOD
-#endif /* RE_ENABLE_I18N */
-	       || type == END_OF_RE)
-	{
-	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);
-	  if (type == END_OF_RE)
-	    bufp->can_be_null = 1;
-	  return;
-	}
-    }
-}
-
-/* Entry point for POSIX code.  */
-/* regcomp takes a regular expression as a string and compiles it.
-
-   PREG is a regex_t *.  We do not expect any fields to be initialized,
-   since POSIX says we shouldn't.  Thus, we set
-
-     `buffer' to the compiled pattern;
-     `used' to the length of the compiled pattern;
-     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
-       REG_EXTENDED bit in CFLAGS is set; otherwise, to
-       RE_SYNTAX_POSIX_BASIC;
-     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
-     `fastmap' to an allocated space for the fastmap;
-     `fastmap_accurate' to zero;
-     `re_nsub' to the number of subexpressions in PATTERN.
-
-   PATTERN is the address of the pattern string.
-
-   CFLAGS is a series of bits which affect compilation.
-
-     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
-     use POSIX basic syntax.
-
-     If REG_NEWLINE is set, then . and [^...] don't match newline.
-     Also, regexec will try a match beginning after every newline.
-
-     If REG_ICASE is set, then we considers upper- and lowercase
-     versions of letters to be equivalent when matching.
-
-     If REG_NOSUB is set, then when PREG is passed to regexec, that
-     routine will report only success or failure, and nothing about the
-     registers.
-
-   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
-   the return codes and their meanings.)  */
-
-int
-regcomp (preg, pattern, cflags)
-    regex_t *__restrict preg;
-    const char *__restrict pattern;
-    int cflags;
-{
-  reg_errcode_t ret;
-  reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
-			 : RE_SYNTAX_POSIX_BASIC);
-
-  preg->buffer = NULL;
-  preg->allocated = 0;
-  preg->used = 0;
-
-  /* Try to allocate space for the fastmap.  */
-  preg->fastmap = re_malloc (char, SBC_MAX);
-  if (BE (preg->fastmap == NULL, 0))
-    return REG_ESPACE;
-
-  syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;
-
-  /* If REG_NEWLINE is set, newlines are treated differently.  */
-  if (cflags & REG_NEWLINE)
-    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
-      syntax &= ~RE_DOT_NEWLINE;
-      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
-      /* It also changes the matching behavior.  */
-      preg->newline_anchor = 1;
-    }
-  else
-    preg->newline_anchor = 0;
-  preg->no_sub = !!(cflags & REG_NOSUB);
-  preg->translate = NULL;
-
-  ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
-
-  /* POSIX doesn't distinguish between an unmatched open-group and an
-     unmatched close-group: both are REG_EPAREN.  */
-  if (ret == REG_ERPAREN)
-    ret = REG_EPAREN;
-
-  /* We have already checked preg->fastmap != NULL.  */
-  if (BE (ret == REG_NOERROR, 1))
-    /* Compute the fastmap now, since regexec cannot modify the pattern
-       buffer.  This function never fails in this implementation.  */
-    (void) re_compile_fastmap (preg);
-  else
-    {
-      /* Some error occurred while compiling the expression.  */
-      re_free (preg->fastmap);
-      preg->fastmap = NULL;
-    }
-
-  return (int) ret;
-}
-#ifdef _LIBC
-weak_alias (__regcomp, regcomp)
-#endif
-
-/* Returns a message corresponding to an error code, ERRCODE, returned
-   from either regcomp or regexec.   We don't use PREG here.  */
-
-size_t
-regerror (errcode, preg, errbuf, errbuf_size)
-    int errcode;
-    const regex_t *__restrict preg;
-    char *__restrict errbuf;
-    size_t errbuf_size;
-{
-  const char *msg;
-  size_t msg_size;
-
-  if (BE (errcode < 0
-	  || errcode >= (int) (sizeof (__re_error_msgid_idx)
-			       / sizeof (__re_error_msgid_idx[0])), 0))
-    /* Only error codes returned by the rest of the code should be passed
-       to this routine.  If we are given anything else, or if other regex
-       code generates an invalid error code, then the program has a bug.
-       Dump core so we can fix it.  */
-    abort ();
-
-  msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
-
-  msg_size = strlen (msg) + 1; /* Includes the null.  */
-
-  if (BE (errbuf_size != 0, 1))
-    {
-      if (BE (msg_size > errbuf_size, 0))
-	{
-#if defined HAVE_MEMPCPY || defined _LIBC
-	  *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
-#else
-	  memcpy (errbuf, msg, errbuf_size - 1);
-	  errbuf[errbuf_size - 1] = 0;
-#endif
-	}
-      else
-	memcpy (errbuf, msg, msg_size);
-    }
-
-  return msg_size;
-}
-#ifdef _LIBC
-weak_alias (__regerror, regerror)
-#endif
-
-
-#ifdef RE_ENABLE_I18N
-/* This static array is used for the map to single-byte characters when
-   UTF-8 is used.  Otherwise we would allocate memory just to initialize
-   it the same all the time.  UTF-8 is the preferred encoding so this is
-   a worthwhile optimization.  */
-static const bitset_t utf8_sb_map =
-{
-  /* Set the first 128 bits.  */
-  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
-};
-#endif
-
-
-static void
-free_dfa_content (re_dfa_t *dfa)
-{
-  int i, j;
-
-  if (dfa->nodes)
-    for (i = 0; i < dfa->nodes_len; ++i)
-      free_token (dfa->nodes + i);
-  re_free (dfa->nexts);
-  for (i = 0; i < dfa->nodes_len; ++i)
-    {
-      if (dfa->eclosures != NULL)
-	re_node_set_free (dfa->eclosures + i);
-      if (dfa->inveclosures != NULL)
-	re_node_set_free (dfa->inveclosures + i);
-      if (dfa->edests != NULL)
-	re_node_set_free (dfa->edests + i);
-    }
-  re_free (dfa->edests);
-  re_free (dfa->eclosures);
-  re_free (dfa->inveclosures);
-  re_free (dfa->nodes);
-
-  if (dfa->state_table)
-    for (i = 0; i <= dfa->state_hash_mask; ++i)
-      {
-	struct re_state_table_entry *entry = dfa->state_table + i;
-	for (j = 0; j < entry->num; ++j)
-	  {
-	    re_dfastate_t *state = entry->array[j];
-	    free_state (state);
-	  }
-	re_free (entry->array);
-      }
-  re_free (dfa->state_table);
-#ifdef RE_ENABLE_I18N
-  if (dfa->sb_char != utf8_sb_map)
-    re_free (dfa->sb_char);
-#endif
-  re_free (dfa->subexp_map);
-#ifdef DEBUG
-  re_free (dfa->re_str);
-#endif
-
-  re_free (dfa);
-}
-
-
-/* Free dynamically allocated space used by PREG.  */
-
-void
-regfree (preg)
-    regex_t *preg;
-{
-  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  if (BE (dfa != NULL, 1))
-    free_dfa_content (dfa);
-  preg->buffer = NULL;
-  preg->allocated = 0;
-
-  re_free (preg->fastmap);
-  preg->fastmap = NULL;
-
-  re_free (preg->translate);
-  preg->translate = NULL;
-}
-#ifdef _LIBC
-weak_alias (__regfree, regfree)
-#endif
-
-/* Entry points compatible with 4.2 BSD regex library.  We don't define
-   them unless specifically requested.  */
-
-#if defined _REGEX_RE_COMP || defined _LIBC
-
-/* BSD has one and only one pattern buffer.  */
-static struct re_pattern_buffer re_comp_buf;
-
-char *
-# ifdef _LIBC
-/* Make these definitions weak in libc, so POSIX programs can redefine
-   these names if they don't use our functions, and still use
-   regcomp/regexec above without link errors.  */
-weak_function
-# endif
-re_comp (s)
-     const char *s;
-{
-  reg_errcode_t ret;
-  char *fastmap;
-
-  if (!s)
-    {
-      if (!re_comp_buf.buffer)
-	return gettext ("No previous regular expression");
-      return 0;
-    }
-
-  if (re_comp_buf.buffer)
-    {
-      fastmap = re_comp_buf.fastmap;
-      re_comp_buf.fastmap = NULL;
-      __regfree (&re_comp_buf);
-      memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
-      re_comp_buf.fastmap = fastmap;
-    }
-
-  if (re_comp_buf.fastmap == NULL)
-    {
-      re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
-      if (re_comp_buf.fastmap == NULL)
-	return (char *) gettext (__re_error_msgid
-				 + __re_error_msgid_idx[(int) REG_ESPACE]);
-    }
-
-  /* Since `re_exec' always passes NULL for the `regs' argument, we
-     don't need to initialize the pattern buffer fields which affect it.  */
-
-  /* Match anchors at newlines.  */
-  re_comp_buf.newline_anchor = 1;
-
-  ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
-
-  if (!ret)
-    return NULL;
-
-  /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
-  return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
-}
-
-#ifdef _LIBC
-libc_freeres_fn (free_mem)
-{
-  __regfree (&re_comp_buf);
-}
-#endif
-
-#endif /* _REGEX_RE_COMP */
-
-/* Internal entry point.
-   Compile the regular expression PATTERN, whose length is LENGTH.
-   SYNTAX indicate regular expression's syntax.  */
-
-static reg_errcode_t
-re_compile_internal (regex_t *preg, const char * pattern, size_t length,
-		     reg_syntax_t syntax)
-{
-  reg_errcode_t err = REG_NOERROR;
-  re_dfa_t *dfa;
-  re_string_t regexp;
-
-  /* Initialize the pattern buffer.  */
-  preg->fastmap_accurate = 0;
-  preg->syntax = syntax;
-  preg->not_bol = preg->not_eol = 0;
-  preg->used = 0;
-  preg->re_nsub = 0;
-  preg->can_be_null = 0;
-  preg->regs_allocated = REGS_UNALLOCATED;
-
-  /* Initialize the dfa.  */
-  dfa = (re_dfa_t *) preg->buffer;
-  if (BE (preg->allocated < sizeof (re_dfa_t), 0))
-    {
-      /* If zero allocated, but buffer is non-null, try to realloc
-	 enough space.  This loses if buffer's address is bogus, but
-	 that is the user's responsibility.  If ->buffer is NULL this
-	 is a simple allocation.  */
-      dfa = re_realloc (preg->buffer, re_dfa_t, 1);
-      if (dfa == NULL)
-	return REG_ESPACE;
-      preg->allocated = sizeof (re_dfa_t);
-      preg->buffer = (unsigned char *) dfa;
-    }
-  preg->used = sizeof (re_dfa_t);
-
-  err = init_dfa (dfa, length);
-  if (BE (err != REG_NOERROR, 0))
-    {
-      free_dfa_content (dfa);
-      preg->buffer = NULL;
-      preg->allocated = 0;
-      return err;
-    }
-#ifdef DEBUG
-  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
-  dfa->re_str = re_malloc (char, length + 1);
-  strncpy (dfa->re_str, pattern, length + 1);
-#endif
-
-  __libc_lock_init (dfa->lock);
-
-  err = re_string_construct (&regexp, pattern, length, preg->translate,
-			     syntax & RE_ICASE, dfa);
-  if (BE (err != REG_NOERROR, 0))
-    {
-    re_compile_internal_free_return:
-      free_workarea_compile (preg);
-      re_string_destruct (&regexp);
-      free_dfa_content (dfa);
-      preg->buffer = NULL;
-      preg->allocated = 0;
-      return err;
-    }
-
-  /* Parse the regular expression, and build a structure tree.  */
-  preg->re_nsub = 0;
-  dfa->str_tree = parse (&regexp, preg, syntax, &err);
-  if (BE (dfa->str_tree == NULL, 0))
-    goto re_compile_internal_free_return;
-
-  /* Analyze the tree and create the nfa.  */
-  err = analyze (preg);
-  if (BE (err != REG_NOERROR, 0))
-    goto re_compile_internal_free_return;
-
-#ifdef RE_ENABLE_I18N
-  /* If possible, do searching in single byte encoding to speed things up.  */
-  if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
-    optimize_utf8 (dfa);
-#endif
-
-  /* Then create the initial state of the dfa.  */
-  err = create_initial_state (dfa);
-
-  /* Release work areas.  */
-  free_workarea_compile (preg);
-  re_string_destruct (&regexp);
-
-  if (BE (err != REG_NOERROR, 0))
-    {
-      free_dfa_content (dfa);
-      preg->buffer = NULL;
-      preg->allocated = 0;
-    }
-
-  return err;
-}
-
-/* Initialize DFA.  We use the length of the regular expression PAT_LEN
-   as the initial length of some arrays.  */
-
-static reg_errcode_t
-init_dfa (re_dfa_t *dfa, size_t pat_len)
-{
-  unsigned int table_size;
-#ifndef _LIBC
-  char *codeset_name;
-#endif
-
-  memset (dfa, '\0', sizeof (re_dfa_t));
-
-  /* Force allocation of str_tree_storage the first time.  */
-  dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
-
-  /* Avoid overflows.  */
-  if (pat_len == SIZE_MAX)
-    return REG_ESPACE;
-
-  dfa->nodes_alloc = pat_len + 1;
-  dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
-
-  /*  table_size = 2 ^ ceil(log pat_len) */
-  for (table_size = 1; ; table_size <<= 1)
-    if (table_size > pat_len)
-      break;
-
-  dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
-  dfa->state_hash_mask = table_size - 1;
-
-  dfa->mb_cur_max = MB_CUR_MAX;
-#ifdef _LIBC
-  if (dfa->mb_cur_max == 6
-      && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
-    dfa->is_utf8 = 1;
-  dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
-		       != 0);
-#else
-# ifdef HAVE_LANGINFO_CODESET
-  codeset_name = nl_langinfo (CODESET);
-# else
-  codeset_name = getenv ("LC_ALL");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
-    codeset_name = getenv ("LC_CTYPE");
-  if (codeset_name == NULL || codeset_name[0] == '\0')
-    codeset_name = getenv ("LANG");
-  if (codeset_name == NULL)
-    codeset_name = "";
-  else if (strchr (codeset_name, '.') !=  NULL)
-    codeset_name = strchr (codeset_name, '.') + 1;
-# endif
-
-  if (strcasecmp (codeset_name, "UTF-8") == 0
-      || strcasecmp (codeset_name, "UTF8") == 0)
-    dfa->is_utf8 = 1;
-
-  /* We check exhaustively in the loop below if this charset is a
-     superset of ASCII.  */
-  dfa->map_notascii = 0;
-#endif
-
-#ifdef RE_ENABLE_I18N
-  if (dfa->mb_cur_max > 1)
-    {
-      if (dfa->is_utf8)
-	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
-      else
-	{
-	  int i, j, ch;
-
-	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
-	  if (BE (dfa->sb_char == NULL, 0))
-	    return REG_ESPACE;
-
-	  /* Set the bits corresponding to single byte chars.  */
-	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
-	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
-	      {
-		wint_t wch = __btowc (ch);
-		if (wch != WEOF)
-		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
-# ifndef _LIBC
-		if (isascii (ch) && wch != ch)
-		  dfa->map_notascii = 1;
-# endif
-	      }
-	}
-    }
-#endif
-
-  if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
-    return REG_ESPACE;
-  return REG_NOERROR;
-}
-
-/* Initialize WORD_CHAR table, which indicate which character is
-   "word".  In this case "word" means that it is the word construction
-   character used by some operators like "\<", "\>", etc.  */
-
-static void
-internal_function
-init_word_char (re_dfa_t *dfa)
-{
-  dfa->word_ops_used = 1;
-  int i = 0;
-  int ch = 0;
-  if (BE (dfa->map_notascii == 0, 1))
-    {
-      /* Avoid uint32_t and uint64_t as some non-GCC platforms lack
-	 them, an issue when this code is used in Gnulib.  */
-      bitset_word_t bits0 = 0x00000000;
-      bitset_word_t bits1 = 0x03ff0000;
-      bitset_word_t bits2 = 0x87fffffe;
-      bitset_word_t bits3 = 0x07fffffe;
-
-      if (BITSET_WORD_BITS == 64)
-	{
-          /* Pacify gcc -Woverflow on 32-bit platformns.  */
-          dfa->word_char[0] = bits1 << 31 << 1 | bits0;
-          dfa->word_char[1] = bits3 << 31 << 1 | bits2;
-	  i = 2;
-	}
-      else if (BITSET_WORD_BITS == 32)
-	{
-          dfa->word_char[0] = bits0;
-          dfa->word_char[1] = bits1;
-          dfa->word_char[2] = bits2;
-          dfa->word_char[3] = bits3;
-	  i = 4;
-	}
-      else
-	goto general_case;
-      ch = 128;
*** 10468 LINES SKIPPED ***

home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101020601.1026131n081839>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation