Date: Mon, 7 Feb 2022 04:59:10 GMT From: Cy Schubert <cy@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: ae7ca647e4d5 - stable/13 - sqlite3: Vendor import of sqlite3 3.37.1 Message-ID: <202202070459.2174xAJ4062797@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by cy: URL: https://cgit.FreeBSD.org/src/commit/?id=ae7ca647e4d583d573a4ab64151df6259fbaca7d commit ae7ca647e4d583d573a4ab64151df6259fbaca7d Author: Cy Schubert <cy@FreeBSD.org> AuthorDate: 2022-01-06 00:03:14 +0000 Commit: Cy Schubert <cy@FreeBSD.org> CommitDate: 2022-02-07 04:58:39 +0000 sqlite3: Vendor import of sqlite3 3.37.1 Changes at https://www.sqlite.org/releaselog/3_37_1.html. (cherry picked from commit 0197ba465f275a32975d5c93b811e7caaec5b438) --- contrib/sqlite3/Makefile.msc | 2 - contrib/sqlite3/configure | 20 +- contrib/sqlite3/configure.ac | 2 +- contrib/sqlite3/shell.c | 1922 ++- contrib/sqlite3/sqlite3.c | 18589 +++++++++++++++++------------ contrib/sqlite3/sqlite3.h | 294 +- contrib/sqlite3/sqlite3ext.h | 12 + contrib/sqlite3/sqlite3rc.h | 2 +- contrib/sqlite3/tea/configure | 18 +- contrib/sqlite3/tea/configure.ac | 2 +- contrib/sqlite3/tea/generic/tclsqlite3.c | 146 +- contrib/sqlite3/tea/win/nmakehlp.c | 215 +- 12 files changed, 13070 insertions(+), 8154 deletions(-) diff --git a/contrib/sqlite3/Makefile.msc b/contrib/sqlite3/Makefile.msc index 1f177557a25a..40d0e8113da0 100644 --- a/contrib/sqlite3/Makefile.msc +++ b/contrib/sqlite3/Makefile.msc @@ -290,7 +290,6 @@ OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_STMTVTAB=1 OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBPAGE_VTAB=1 OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DBSTAT_VTAB=1 OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_BYTECODE_VTAB=1 -OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_DESERIALIZE=1 !ENDIF OPT_FEATURE_FLAGS = $(OPT_FEATURE_FLAGS) -DSQLITE_ENABLE_COLUMN_METADATA=1 !ENDIF @@ -960,7 +959,6 @@ LIBRESOBJS = SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_FTS4=1 SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_EXPLAIN_COMMENTS=1 SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_OFFSET_SQL_FUNC=1 -SHELL_COMPILE_OPTS = $(SHELL_COMPILE_OPTS) -DSQLITE_ENABLE_DESERIALIZE=1 !ENDIF diff --git a/contrib/sqlite3/configure b/contrib/sqlite3/configure index 4a16edbb6e02..c742c255b440 100755 --- a/contrib/sqlite3/configure +++ b/contrib/sqlite3/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for sqlite 3.35.5. +# Generated by GNU Autoconf 2.69 for sqlite 3.37.1. # # Report bugs to <http://www.sqlite.org>. # @@ -590,8 +590,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='sqlite' PACKAGE_TARNAME='sqlite' -PACKAGE_VERSION='3.35.5' -PACKAGE_STRING='sqlite 3.35.5' +PACKAGE_VERSION='3.37.1' +PACKAGE_STRING='sqlite 3.37.1' PACKAGE_BUGREPORT='http://www.sqlite.org' PACKAGE_URL='' @@ -1342,7 +1342,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures sqlite 3.35.5 to adapt to many kinds of systems. +\`configure' configures sqlite 3.37.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1413,7 +1413,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of sqlite 3.35.5:";; + short | recursive ) echo "Configuration of sqlite 3.37.1:";; esac cat <<\_ACEOF @@ -1539,7 +1539,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -sqlite configure 3.35.5 +sqlite configure 3.37.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1954,7 +1954,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by sqlite $as_me 3.35.5, which was +It was created by sqlite $as_me 3.37.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2820,7 +2820,7 @@ fi # Define the identity of the package. PACKAGE='sqlite' - VERSION='3.35.5' + VERSION='3.37.1' cat >>confdefs.h <<_ACEOF @@ -14569,7 +14569,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by sqlite $as_me 3.35.5, which was +This file was extended by sqlite $as_me 3.37.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -14626,7 +14626,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -sqlite config.status 3.35.5 +sqlite config.status 3.37.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/contrib/sqlite3/configure.ac b/contrib/sqlite3/configure.ac index 166f7019cd5c..a1057765d675 100644 --- a/contrib/sqlite3/configure.ac +++ b/contrib/sqlite3/configure.ac @@ -10,7 +10,7 @@ # AC_PREREQ(2.61) -AC_INIT(sqlite, 3.35.5, http://www.sqlite.org) +AC_INIT(sqlite, 3.37.1, http://www.sqlite.org) AC_CONFIG_SRCDIR([sqlite3.c]) AC_CONFIG_AUX_DIR([.]) diff --git a/contrib/sqlite3/shell.c b/contrib/sqlite3/shell.c index 27de22a3819b..26c0ae57174f 100644 --- a/contrib/sqlite3/shell.c +++ b/contrib/sqlite3/shell.c @@ -35,6 +35,18 @@ #define _CRT_SECURE_NO_WARNINGS #endif +/* +** Optionally #include a user-defined header, whereby compilation options +** may be set prior to where they take effect, but after platform setup. +** If SQLITE_CUSTOM_INCLUDE=? is defined, its value names the #include +** file. Note that this macro has a like effect on sqlite3.c compilation. +*/ +#ifdef SQLITE_CUSTOM_INCLUDE +# define INC_STRINGIFY_(f) #f +# define INC_STRINGIFY(f) INC_STRINGIFY_(f) +# include INC_STRINGIFY(SQLITE_CUSTOM_INCLUDE) +#endif + /* ** Determine if we are dealing with WinRT, which provides only a subset of ** the full Win32 API. @@ -244,6 +256,7 @@ static sqlite3_int64 timeOfDay(void){ static sqlite3_vfs *clockVfs = 0; sqlite3_int64 t; if( clockVfs==0 ) clockVfs = sqlite3_vfs_find(0); + if( clockVfs==0 ) return 0; /* Never actually happens */ if( clockVfs->iVersion>=2 && clockVfs->xCurrentTimeInt64!=0 ){ clockVfs->xCurrentTimeInt64(clockVfs, &t); }else{ @@ -640,19 +653,38 @@ static int strlenChar(const char *z){ } /* -** Return true if zFile does not exist or if it is not an ordinary file. +** Return open FILE * if zFile exists, can be opened for read +** and is an ordinary file or a character stream source. +** Otherwise return 0. */ +static FILE * openChrSource(const char *zFile){ #ifdef _WIN32 -# define notNormalFile(X) 0 + struct _stat x = {0}; +# define STAT_CHR_SRC(mode) ((mode & (_S_IFCHR|_S_IFIFO|_S_IFREG))!=0) + /* On Windows, open first, then check the stream nature. This order + ** is necessary because _stat() and sibs, when checking a named pipe, + ** effectively break the pipe as its supplier sees it. */ + FILE *rv = fopen(zFile, "rb"); + if( rv==0 ) return 0; + if( _fstat(_fileno(rv), &x) != 0 + || !STAT_CHR_SRC(x.st_mode)){ + fclose(rv); + rv = 0; + } + return rv; #else -static int notNormalFile(const char *zFile){ - struct stat x; - int rc; - memset(&x, 0, sizeof(x)); - rc = stat(zFile, &x); - return rc || !S_ISREG(x.st_mode); -} + struct stat x = {0}; + int rc = stat(zFile, &x); +# define STAT_CHR_SRC(mode) (S_ISREG(mode)||S_ISFIFO(mode)||S_ISCHR(mode)) + if( rc!=0 ) return 0; + if( STAT_CHR_SRC(x.st_mode) ){ + return fopen(zFile, "rb"); + }else{ + return 0; + } #endif +#undef STAT_CHR_SRC +} /* ** This routine reads a line of text from FILE in, stores @@ -842,7 +874,7 @@ static void appendText(ShellText *p, char const *zAppend, char quote){ } } - if( p->n+len>=p->nAlloc ){ + if( p->z==0 || p->n+len>=p->nAlloc ){ p->nAlloc = p->nAlloc*2 + len + 20; p->z = realloc(p->z, p->nAlloc); if( p->z==0 ) shell_out_of_memory(); @@ -988,7 +1020,7 @@ static void shellAddSchemaName( sqlite3 *db = sqlite3_context_db_handle(pCtx); UNUSED_PARAMETER(nVal); if( zIn!=0 && strncmp(zIn, "CREATE ", 7)==0 ){ - for(i=0; i<(int)(sizeof(aPrefix)/sizeof(aPrefix[0])); i++){ + for(i=0; i<ArraySize(aPrefix); i++){ int n = strlen30(aPrefix[i]); if( strncmp(zIn+7, aPrefix[i], n)==0 && zIn[n+7]==' ' ){ char *z = 0; @@ -2191,6 +2223,11 @@ int sqlite3_shathree_init( ** $path is a relative path, then $path is interpreted relative to $dir. ** And the paths returned in the "name" column of the table are also ** relative to directory $dir. +** +** Notes on building this extension for Windows: +** Unless linked statically with the SQLite library, a preprocessor +** symbol, FILEIO_WIN32_DLL, must be #define'd to create a stand-alone +** DLL form of this extension for WIN32. See its use below for details. */ /* #include "sqlite3ext.h" */ SQLITE_EXTENSION_INIT1 @@ -2344,6 +2381,22 @@ static sqlite3_uint64 fileTimeToUnixTime( return (fileIntervals.QuadPart - epochIntervals.QuadPart) / 10000000; } + +#if defined(FILEIO_WIN32_DLL) && (defined(_WIN32) || defined(WIN32)) +# /* To allow a standalone DLL, use this next replacement function: */ +# undef sqlite3_win32_utf8_to_unicode +# define sqlite3_win32_utf8_to_unicode utf8_to_utf16 +# +LPWSTR utf8_to_utf16(const char *z){ + int nAllot = MultiByteToWideChar(CP_UTF8, 0, z, -1, NULL, 0); + LPWSTR rv = sqlite3_malloc(nAllot * sizeof(WCHAR)); + if( rv!=0 && 0 < MultiByteToWideChar(CP_UTF8, 0, z, -1, rv, nAllot) ) + return rv; + sqlite3_free(rv); + return 0; +} +#endif + /* ** This function attempts to normalize the time values found in the stat() ** buffer to UTC. This is necessary on Win32, where the runtime library @@ -3118,6 +3171,14 @@ int sqlite3_fileio_init( return rc; } +#if defined(FILEIO_WIN32_DLL) && (defined(_WIN32) || defined(WIN32)) +/* To allow a standalone DLL, make test_windirent.c use the same + * redefined SQLite API calls as the above extension code does. + * Just pull in this .c to accomplish this. As a beneficial side + * effect, this extension becomes a single translation unit. */ +# include "test_windirent.c" +#endif + /************************* End ../ext/misc/fileio.c ********************/ /************************* Begin ../ext/misc/completion.c ******************/ /* @@ -4156,9 +4217,11 @@ static int apndOpen( rc = pBaseVfs->xOpen(pBaseVfs, zName, pBaseFile, flags, pOutFlags); if( rc==SQLITE_OK ){ rc = pBaseFile->pMethods->xFileSize(pBaseFile, &sz); + if( rc ){ + pBaseFile->pMethods->xClose(pBaseFile); + } } if( rc ){ - pBaseFile->pMethods->xClose(pBaseFile); pFile->pMethods = 0; return rc; } @@ -4281,6 +4344,7 @@ int sqlite3_appendvfs_init( (void)pzErrMsg; (void)db; pOrig = sqlite3_vfs_find(0); + if( pOrig==0 ) return SQLITE_ERROR; apnd_vfs.iVersion = pOrig->iVersion; apnd_vfs.pAppData = pOrig; apnd_vfs.szOsFile = pOrig->szOsFile + sizeof(ApndFile); @@ -4963,10 +5027,11 @@ static void decimalSubFunc( Decimal *pA = decimal_new(context, argv[0], 0, 0); Decimal *pB = decimal_new(context, argv[1], 0, 0); UNUSED_PARAMETER(argc); - if( pB==0 ) return; - pB->sign = !pB->sign; - decimal_add(pA, pB); - decimal_result(context, pA); + if( pB ){ + pB->sign = !pB->sign; + decimal_add(pA, pB); + decimal_result(context, pA); + } decimal_free(pA); decimal_free(pB); } @@ -5335,7 +5400,11 @@ static void ieee754func( e += 1075; if( e<=0 ){ /* Subnormal */ - m >>= 1-e; + if( 1-e >= 64 ){ + m = 0; + }else{ + m >>= 1-e; + } e = 0; }else if( e>0x7ff ){ e = 0x7ff; @@ -5756,11 +5825,12 @@ static int seriesFilter( ** (8) output in descending order */ static int seriesBestIndex( - sqlite3_vtab *tabUnused, + sqlite3_vtab *pVTab, sqlite3_index_info *pIdxInfo ){ int i, j; /* Loop over constraints */ int idxNum = 0; /* The query plan bitmask */ + int bStartSeen = 0; /* EQ constraint seen on the START column */ int unusableMask = 0; /* Mask of unusable constraints */ int nArg = 0; /* Number of arguments that seriesFilter() expects */ int aIdx[3]; /* Constraints on start, stop, and step */ @@ -5770,7 +5840,7 @@ static int seriesBestIndex( ** are the last three columns in the virtual table. */ assert( SERIES_COLUMN_STOP == SERIES_COLUMN_START+1 ); assert( SERIES_COLUMN_STEP == SERIES_COLUMN_START+2 ); - (void)tabUnused; + aIdx[0] = aIdx[1] = aIdx[2] = -1; pConstraint = pIdxInfo->aConstraint; for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ @@ -5780,6 +5850,7 @@ static int seriesBestIndex( iCol = pConstraint->iColumn - SERIES_COLUMN_START; assert( iCol>=0 && iCol<=2 ); iMask = 1 << iCol; + if( iCol==0 ) bStartSeen = 1; if( pConstraint->usable==0 ){ unusableMask |= iMask; continue; @@ -5794,6 +5865,18 @@ static int seriesBestIndex( pIdxInfo->aConstraintUsage[j].omit = !SQLITE_SERIES_CONSTRAINT_VERIFY; } } + /* The current generate_column() implementation requires at least one + ** argument (the START value). Legacy versions assumed START=0 if the + ** first argument was omitted. Compile with -DZERO_ARGUMENT_GENERATE_SERIES + ** to obtain the legacy behavior */ +#ifndef ZERO_ARGUMENT_GENERATE_SERIES + if( !bStartSeen ){ + sqlite3_free(pVTab->zErrMsg); + pVTab->zErrMsg = sqlite3_mprintf( + "first argument to \"generate_series()\" missing or unusable"); + return SQLITE_ERROR; + } +#endif if( (unusableMask & ~idxNum)!=0 ){ /* The start, stop, and step columns are inputs. Therefore if there ** are unusable constraints on any of start, stop, or step then @@ -5867,7 +5950,7 @@ int sqlite3_series_init( int rc = SQLITE_OK; SQLITE_EXTENSION_INIT2(pApi); #ifndef SQLITE_OMIT_VIRTUALTABLE - if( sqlite3_libversion_number()<3008012 ){ + if( sqlite3_libversion_number()<3008012 && pzErrMsg!=0 ){ *pzErrMsg = sqlite3_mprintf( "generate_series() requires SQLite 3.8.12 or later"); return SQLITE_ERROR; @@ -5878,6 +5961,780 @@ int sqlite3_series_init( } /************************* End ../ext/misc/series.c ********************/ +/************************* Begin ../ext/misc/regexp.c ******************/ +/* +** 2012-11-13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** The code in this file implements a compact but reasonably +** efficient regular-expression matcher for posix extended regular +** expressions against UTF8 text. +** +** This file is an SQLite extension. It registers a single function +** named "regexp(A,B)" where A is the regular expression and B is the +** string to be matched. By registering this function, SQLite will also +** then implement the "B regexp A" operator. Note that with the function +** the regular expression comes first, but with the operator it comes +** second. +** +** The following regular expression syntax is supported: +** +** X* zero or more occurrences of X +** X+ one or more occurrences of X +** X? zero or one occurrences of X +** X{p,q} between p and q occurrences of X +** (X) match X +** X|Y X or Y +** ^X X occurring at the beginning of the string +** X$ X occurring at the end of the string +** . Match any single character +** \c Character c where c is one of \{}()[]|*+?. +** \c C-language escapes for c in afnrtv. ex: \t or \n +** \uXXXX Where XXXX is exactly 4 hex digits, unicode value XXXX +** \xXX Where XX is exactly 2 hex digits, unicode value XX +** [abc] Any single character from the set abc +** [^abc] Any single character not in the set abc +** [a-z] Any single character in the range a-z +** [^a-z] Any single character not in the range a-z +** \b Word boundary +** \w Word character. [A-Za-z0-9_] +** \W Non-word character +** \d Digit +** \D Non-digit +** \s Whitespace character +** \S Non-whitespace character +** +** A nondeterministic finite automaton (NFA) is used for matching, so the +** performance is bounded by O(N*M) where N is the size of the regular +** expression and M is the size of the input string. The matcher never +** exhibits exponential behavior. Note that the X{p,q} operator expands +** to p copies of X following by q-p copies of X? and that the size of the +** regular expression in the O(N*M) performance bound is computed after +** this expansion. +*/ +#include <string.h> +#include <stdlib.h> +/* #include "sqlite3ext.h" */ +SQLITE_EXTENSION_INIT1 + +/* +** The following #defines change the names of some functions implemented in +** this file to prevent name collisions with C-library functions of the +** same name. +*/ +#define re_match sqlite3re_match +#define re_compile sqlite3re_compile +#define re_free sqlite3re_free + +/* The end-of-input character */ +#define RE_EOF 0 /* End of input */ + +/* The NFA is implemented as sequence of opcodes taken from the following +** set. Each opcode has a single integer argument. +*/ +#define RE_OP_MATCH 1 /* Match the one character in the argument */ +#define RE_OP_ANY 2 /* Match any one character. (Implements ".") */ +#define RE_OP_ANYSTAR 3 /* Special optimized version of .* */ +#define RE_OP_FORK 4 /* Continue to both next and opcode at iArg */ +#define RE_OP_GOTO 5 /* Jump to opcode at iArg */ +#define RE_OP_ACCEPT 6 /* Halt and indicate a successful match */ +#define RE_OP_CC_INC 7 /* Beginning of a [...] character class */ +#define RE_OP_CC_EXC 8 /* Beginning of a [^...] character class */ +#define RE_OP_CC_VALUE 9 /* Single value in a character class */ +#define RE_OP_CC_RANGE 10 /* Range of values in a character class */ +#define RE_OP_WORD 11 /* Perl word character [A-Za-z0-9_] */ +#define RE_OP_NOTWORD 12 /* Not a perl word character */ +#define RE_OP_DIGIT 13 /* digit: [0-9] */ +#define RE_OP_NOTDIGIT 14 /* Not a digit */ +#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */ +#define RE_OP_NOTSPACE 16 /* Not a digit */ +#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */ + +/* Each opcode is a "state" in the NFA */ +typedef unsigned short ReStateNumber; + +/* Because this is an NFA and not a DFA, multiple states can be active at +** once. An instance of the following object records all active states in +** the NFA. The implementation is optimized for the common case where the +** number of actives states is small. +*/ +typedef struct ReStateSet { + unsigned nState; /* Number of current states */ + ReStateNumber *aState; /* Current states */ +} ReStateSet; + +/* An input string read one character at a time. +*/ +typedef struct ReInput ReInput; +struct ReInput { + const unsigned char *z; /* All text */ + int i; /* Next byte to read */ + int mx; /* EOF when i>=mx */ +}; + +/* A compiled NFA (or an NFA that is in the process of being compiled) is +** an instance of the following object. +*/ +typedef struct ReCompiled ReCompiled; +struct ReCompiled { + ReInput sIn; /* Regular expression text */ + const char *zErr; /* Error message to return */ + char *aOp; /* Operators for the virtual machine */ + int *aArg; /* Arguments to each operator */ + unsigned (*xNextChar)(ReInput*); /* Next character function */ + unsigned char zInit[12]; /* Initial text to match */ + int nInit; /* Number of characters in zInit */ + unsigned nState; /* Number of entries in aOp[] and aArg[] */ + unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */ +}; + +/* Add a state to the given state set if it is not already there */ +static void re_add_state(ReStateSet *pSet, int newState){ + unsigned i; + for(i=0; i<pSet->nState; i++) if( pSet->aState[i]==newState ) return; + pSet->aState[pSet->nState++] = (ReStateNumber)newState; +} + +/* Extract the next unicode character from *pzIn and return it. Advance +** *pzIn to the first byte past the end of the character returned. To +** be clear: this routine converts utf8 to unicode. This routine is +** optimized for the common case where the next character is a single byte. +*/ +static unsigned re_next_char(ReInput *p){ + unsigned c; + if( p->i>=p->mx ) return 0; + c = p->z[p->i++]; + if( c>=0x80 ){ + if( (c&0xe0)==0xc0 && p->i<p->mx && (p->z[p->i]&0xc0)==0x80 ){ + c = (c&0x1f)<<6 | (p->z[p->i++]&0x3f); + if( c<0x80 ) c = 0xfffd; + }else if( (c&0xf0)==0xe0 && p->i+1<p->mx && (p->z[p->i]&0xc0)==0x80 + && (p->z[p->i+1]&0xc0)==0x80 ){ + c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); + p->i += 2; + if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; + }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 + && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ + c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) + | (p->z[p->i+2]&0x3f); + p->i += 3; + if( c<=0xffff || c>0x10ffff ) c = 0xfffd; + }else{ + c = 0xfffd; + } + } + return c; +} +static unsigned re_next_char_nocase(ReInput *p){ + unsigned c = re_next_char(p); + if( c>='A' && c<='Z' ) c += 'a' - 'A'; + return c; +} + +/* Return true if c is a perl "word" character: [A-Za-z0-9_] */ +static int re_word_char(int c){ + return (c>='0' && c<='9') || (c>='a' && c<='z') + || (c>='A' && c<='Z') || c=='_'; +} + +/* Return true if c is a "digit" character: [0-9] */ +static int re_digit_char(int c){ + return (c>='0' && c<='9'); +} + +/* Return true if c is a perl "space" character: [ \t\r\n\v\f] */ +static int re_space_char(int c){ + return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; +} + +/* Run a compiled regular expression on the zero-terminated input +** string zIn[]. Return true on a match and false if there is no match. +*/ +static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){ + ReStateSet aStateSet[2], *pThis, *pNext; + ReStateNumber aSpace[100]; + ReStateNumber *pToFree; + unsigned int i = 0; + unsigned int iSwap = 0; + int c = RE_EOF+1; + int cPrev = 0; + int rc = 0; + ReInput in; + + in.z = zIn; + in.i = 0; + in.mx = nIn>=0 ? nIn : (int)strlen((char const*)zIn); + + /* Look for the initial prefix match, if there is one. */ + if( pRe->nInit ){ + unsigned char x = pRe->zInit[0]; + while( in.i+pRe->nInit<=in.mx + && (zIn[in.i]!=x || + strncmp((const char*)zIn+in.i, (const char*)pRe->zInit, pRe->nInit)!=0) + ){ + in.i++; + } + if( in.i+pRe->nInit>in.mx ) return 0; + } + + if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){ + pToFree = 0; + aStateSet[0].aState = aSpace; + }else{ + pToFree = sqlite3_malloc64( sizeof(ReStateNumber)*2*pRe->nState ); + if( pToFree==0 ) return -1; + aStateSet[0].aState = pToFree; + } + aStateSet[1].aState = &aStateSet[0].aState[pRe->nState]; + pNext = &aStateSet[1]; + pNext->nState = 0; + re_add_state(pNext, 0); + while( c!=RE_EOF && pNext->nState>0 ){ + cPrev = c; + c = pRe->xNextChar(&in); + pThis = pNext; + pNext = &aStateSet[iSwap]; + iSwap = 1 - iSwap; + pNext->nState = 0; + for(i=0; i<pThis->nState; i++){ + int x = pThis->aState[i]; + switch( pRe->aOp[x] ){ + case RE_OP_MATCH: { + if( pRe->aArg[x]==c ) re_add_state(pNext, x+1); + break; + } + case RE_OP_ANY: { + if( c!=0 ) re_add_state(pNext, x+1); + break; + } + case RE_OP_WORD: { + if( re_word_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTWORD: { + if( !re_word_char(c) && c!=0 ) re_add_state(pNext, x+1); + break; + } + case RE_OP_DIGIT: { + if( re_digit_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTDIGIT: { + if( !re_digit_char(c) && c!=0 ) re_add_state(pNext, x+1); + break; + } + case RE_OP_SPACE: { + if( re_space_char(c) ) re_add_state(pNext, x+1); + break; + } + case RE_OP_NOTSPACE: { + if( !re_space_char(c) && c!=0 ) re_add_state(pNext, x+1); + break; + } + case RE_OP_BOUNDARY: { + if( re_word_char(c)!=re_word_char(cPrev) ) re_add_state(pThis, x+1); + break; + } + case RE_OP_ANYSTAR: { + re_add_state(pNext, x); + re_add_state(pThis, x+1); + break; + } + case RE_OP_FORK: { + re_add_state(pThis, x+pRe->aArg[x]); + re_add_state(pThis, x+1); + break; + } + case RE_OP_GOTO: { + re_add_state(pThis, x+pRe->aArg[x]); + break; + } + case RE_OP_ACCEPT: { + rc = 1; + goto re_match_end; + } + case RE_OP_CC_EXC: { + if( c==0 ) break; + /* fall-through */ goto re_op_cc_inc; + } + case RE_OP_CC_INC: re_op_cc_inc: { + int j = 1; + int n = pRe->aArg[x]; + int hit = 0; + for(j=1; j>0 && j<n; j++){ + if( pRe->aOp[x+j]==RE_OP_CC_VALUE ){ + if( pRe->aArg[x+j]==c ){ + hit = 1; + j = -1; + } + }else{ + if( pRe->aArg[x+j]<=c && pRe->aArg[x+j+1]>=c ){ + hit = 1; + j = -1; + }else{ + j++; + } + } + } + if( pRe->aOp[x]==RE_OP_CC_EXC ) hit = !hit; + if( hit ) re_add_state(pNext, x+n); + break; + } + } + } + } + for(i=0; i<pNext->nState; i++){ + if( pRe->aOp[pNext->aState[i]]==RE_OP_ACCEPT ){ rc = 1; break; } + } +re_match_end: + sqlite3_free(pToFree); + return rc; +} + +/* Resize the opcode and argument arrays for an RE under construction. +*/ +static int re_resize(ReCompiled *p, int N){ + char *aOp; + int *aArg; + aOp = sqlite3_realloc64(p->aOp, N*sizeof(p->aOp[0])); + if( aOp==0 ) return 1; + p->aOp = aOp; + aArg = sqlite3_realloc64(p->aArg, N*sizeof(p->aArg[0])); + if( aArg==0 ) return 1; + p->aArg = aArg; + p->nAlloc = N; + return 0; +} + +/* Insert a new opcode and argument into an RE under construction. The +** insertion point is just prior to existing opcode iBefore. +*/ +static int re_insert(ReCompiled *p, int iBefore, int op, int arg){ + int i; + if( p->nAlloc<=p->nState && re_resize(p, p->nAlloc*2) ) return 0; + for(i=p->nState; i>iBefore; i--){ + p->aOp[i] = p->aOp[i-1]; + p->aArg[i] = p->aArg[i-1]; + } + p->nState++; + p->aOp[iBefore] = (char)op; + p->aArg[iBefore] = arg; + return iBefore; +} + +/* Append a new opcode and argument to the end of the RE under construction. +*/ +static int re_append(ReCompiled *p, int op, int arg){ + return re_insert(p, p->nState, op, arg); +} + +/* Make a copy of N opcodes starting at iStart onto the end of the RE +** under construction. +*/ +static void re_copy(ReCompiled *p, int iStart, int N){ + if( p->nState+N>=p->nAlloc && re_resize(p, p->nAlloc*2+N) ) return; + memcpy(&p->aOp[p->nState], &p->aOp[iStart], N*sizeof(p->aOp[0])); + memcpy(&p->aArg[p->nState], &p->aArg[iStart], N*sizeof(p->aArg[0])); + p->nState += N; +} + +/* Return true if c is a hexadecimal digit character: [0-9a-fA-F] +** If c is a hex digit, also set *pV = (*pV)*16 + valueof(c). If +** c is not a hex digit *pV is unchanged. +*/ +static int re_hex(int c, int *pV){ + if( c>='0' && c<='9' ){ + c -= '0'; + }else if( c>='a' && c<='f' ){ + c -= 'a' - 10; + }else if( c>='A' && c<='F' ){ + c -= 'A' - 10; + }else{ + return 0; + } + *pV = (*pV)*16 + (c & 0xff); + return 1; +} + +/* A backslash character has been seen, read the next character and +** return its interpretation. +*/ +static unsigned re_esc_char(ReCompiled *p){ + static const char zEsc[] = "afnrtv\\()*.+?[$^{|}]"; + static const char zTrans[] = "\a\f\n\r\t\v"; + int i, v = 0; + char c; + if( p->sIn.i>=p->sIn.mx ) return 0; + c = p->sIn.z[p->sIn.i]; + if( c=='u' && p->sIn.i+4<p->sIn.mx ){ + const unsigned char *zIn = p->sIn.z + p->sIn.i; + if( re_hex(zIn[1],&v) + && re_hex(zIn[2],&v) + && re_hex(zIn[3],&v) + && re_hex(zIn[4],&v) + ){ + p->sIn.i += 5; + return v; + } + } + if( c=='x' && p->sIn.i+2<p->sIn.mx ){ + const unsigned char *zIn = p->sIn.z + p->sIn.i; + if( re_hex(zIn[1],&v) + && re_hex(zIn[2],&v) + ){ + p->sIn.i += 3; + return v; + } + } + for(i=0; zEsc[i] && zEsc[i]!=c; i++){} + if( zEsc[i] ){ + if( i<6 ) c = zTrans[i]; + p->sIn.i++; + }else{ + p->zErr = "unknown \\ escape"; + } + return c; +} + +/* Forward declaration */ +static const char *re_subcompile_string(ReCompiled*); + +/* Peek at the next byte of input */ +static unsigned char rePeek(ReCompiled *p){ + return p->sIn.i<p->sIn.mx ? p->sIn.z[p->sIn.i] : 0; +} + +/* Compile RE text into a sequence of opcodes. Continue up to the +** first unmatched ")" character, then return. If an error is found, +** return a pointer to the error message string. +*/ +static const char *re_subcompile_re(ReCompiled *p){ + const char *zErr; + int iStart, iEnd, iGoto; + iStart = p->nState; + zErr = re_subcompile_string(p); + if( zErr ) return zErr; + while( rePeek(p)=='|' ){ + iEnd = p->nState; + re_insert(p, iStart, RE_OP_FORK, iEnd + 2 - iStart); + iGoto = re_append(p, RE_OP_GOTO, 0); + p->sIn.i++; + zErr = re_subcompile_string(p); + if( zErr ) return zErr; + p->aArg[iGoto] = p->nState - iGoto; + } + return 0; +} + +/* Compile an element of regular expression text (anything that can be +** an operand to the "|" operator). Return NULL on success or a pointer +** to the error message if there is a problem. +*/ +static const char *re_subcompile_string(ReCompiled *p){ + int iPrev = -1; + int iStart; + unsigned c; + const char *zErr; + while( (c = p->xNextChar(&p->sIn))!=0 ){ + iStart = p->nState; + switch( c ){ + case '|': + case '$': + case ')': { + p->sIn.i--; + return 0; + } + case '(': { + zErr = re_subcompile_re(p); + if( zErr ) return zErr; + if( rePeek(p)!=')' ) return "unmatched '('"; + p->sIn.i++; + break; + } + case '.': { + if( rePeek(p)=='*' ){ + re_append(p, RE_OP_ANYSTAR, 0); + p->sIn.i++; + }else{ + re_append(p, RE_OP_ANY, 0); + } + break; + } + case '*': { + if( iPrev<0 ) return "'*' without operand"; + re_insert(p, iPrev, RE_OP_GOTO, p->nState - iPrev + 1); + re_append(p, RE_OP_FORK, iPrev - p->nState + 1); + break; + } + case '+': { + if( iPrev<0 ) return "'+' without operand"; + re_append(p, RE_OP_FORK, iPrev - p->nState); + break; + } + case '?': { + if( iPrev<0 ) return "'?' without operand"; + re_insert(p, iPrev, RE_OP_FORK, p->nState - iPrev+1); + break; + } + case '{': { + int m = 0, n = 0; + int sz, j; + if( iPrev<0 ) return "'{m,n}' without operand"; + while( (c=rePeek(p))>='0' && c<='9' ){ m = m*10 + c - '0'; p->sIn.i++; } + n = m; + if( c==',' ){ + p->sIn.i++; + n = 0; + while( (c=rePeek(p))>='0' && c<='9' ){ n = n*10 + c-'0'; p->sIn.i++; } + } + if( c!='}' ) return "unmatched '{'"; + if( n>0 && n<m ) return "n less than m in '{m,n}'"; + p->sIn.i++; + sz = p->nState - iPrev; + if( m==0 ){ + if( n==0 ) return "both m and n are zero in '{m,n}'"; + re_insert(p, iPrev, RE_OP_FORK, sz+1); + n--; + }else{ + for(j=1; j<m; j++) re_copy(p, iPrev, sz); + } + for(j=m; j<n; j++){ + re_append(p, RE_OP_FORK, sz+1); + re_copy(p, iPrev, sz); + } + if( n==0 && m>0 ){ + re_append(p, RE_OP_FORK, -sz); + } + break; + } + case '[': { + int iFirst = p->nState; + if( rePeek(p)=='^' ){ + re_append(p, RE_OP_CC_EXC, 0); + p->sIn.i++; + }else{ + re_append(p, RE_OP_CC_INC, 0); + } + while( (c = p->xNextChar(&p->sIn))!=0 ){ + if( c=='[' && rePeek(p)==':' ){ + return "POSIX character classes not supported"; + } + if( c=='\\' ) c = re_esc_char(p); + if( rePeek(p)=='-' ){ + re_append(p, RE_OP_CC_RANGE, c); + p->sIn.i++; + c = p->xNextChar(&p->sIn); + if( c=='\\' ) c = re_esc_char(p); + re_append(p, RE_OP_CC_RANGE, c); + }else{ + re_append(p, RE_OP_CC_VALUE, c); + } + if( rePeek(p)==']' ){ p->sIn.i++; break; } + } *** 35124 LINES SKIPPED ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202202070459.2174xAJ4062797>