Date: Thu, 28 Sep 2000 04:04:21 +0200 (CEST) From: Hubert Feyrer <feyrer@vivien.franken.de> To: FreeBSD-gnats-submit@freebsd.org, hubertf@netbsd.org Subject: bin/21605: make(1) lacks :C/// variable modifier (patch included) Message-ID: <200009280204.EAA90935@vivien.franken.de>
next in thread | raw e-mail | index | archive | help
>Number: 21605
>Category: bin
>Synopsis: make(1) lacks :C/// variable modifier (patch included)
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: freebsd-bugs
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: change-request
>Submitter-Id: current-users
>Arrival-Date: Wed Sep 27 19:10:01 PDT 2000
>Closed-Date:
>Last-Modified:
>Originator: Hubert Feyrer
>Release: FreeBSD-current as of 20000928
>Organization:
NetBSD.org
>Environment:
FreeBSD 4.0-STABLE, NetBSD-current
>Description:
make(1) lacks the :C/// variable modifier found in NetBSD and
OpenBSD:
C/pattern/replacement/[1g]
The C modifier is just like the S modifier except
that the old and new strings, instead of being simple
strings, are a regular expression (see regex(3)) and
an ed(1)-style replacement string. Normally, the
first occurrence of the pattern in each word of the
value is changed. The `1' modifier causes the sub-
stitution to apply to at most one word; the `g' modi-
fier causes the substitution to apply to as many in-
stances of the search pattern as occur in the word or
words it is found in. Note that `1' and `g' are or-
thogonal; the former specifies whether multiple words
are potentially affected, the latter whether multiple
substitutions can potentially occur within each af-
fected word.
Useful for string manipulations in bsd.port.mk and others,
instead of forking off sed(1) processes. See example below.
Contributed as a result of discussion on the bsdports mailing
list to merge the BSD ports/packages collections, see
http://www.openpackages.org/.
>How-To-Repeat:
$ cat /tmp/m
P= ports/cat/pkg
CAT= ${P:C/^.*\/([^\/]*)\/[^\/]*$/\1/}
PKG= ${P:C/^.*\/([^\/]*)$/\1/}
bla:
@echo P=${P}
@echo CAT=${CAT}
@echo PKG=${PKG}
$
$ make -f /tmp/m
P=pkgsrc/cat/pkg
Unknown modifier 'C'
CAT=
Unknown modifier 'C'
PKG=
$
$ nbmake -f /tmp/m
P=pkgsrc/cat/pkg
CAT=cat
PKG=pkg
The same thing can be applied in bsd.ports.mk to get first,
second and if needed third part of DEPENDS lines.
>Fix:
Apply the following patch:
Patch against FreeBSD-current as of 20000928 to teach it the :C///
variable modifier that's available in NetBSD. :C/// is similar to
the :S/// modifier, but it allows a regular expression as the
term that's being replaced.
Useful for string manipulations in bsd.port.mk and others, instead
of forking off sed(1) processes.
Contributed as a result of discussion on the bsdports mailing
list to merge the BSD ports/packages collections, see
http://www.openpackages.org/.
- Hubert Feyrer <hubertf@netbsd.org>
Index: make.1
===================================================================
RCS file: /home/ncvs/src/usr.bin/make/make.1,v
retrieving revision 1.33
diff -b -u -r1.33 make.1
--- make.1 2000/09/25 18:46:24 1.33
+++ make.1 2000/09/28 01:01:47
@@ -596,6 +596,34 @@
The colon may be escaped with a backslash
.Pq Ql \e .
.Bl -tag -width Cm E\&
+.Sm off
+.It Cm C No \&/ Ar pattern Xo
+.No \&/ Ar replacement
+.No \&/ Op Cm 1g
+.Xc
+.Sm on
+The
+.Cm C
+modifier is just like the
+.Cm S
+modifier except that the old and new strings, instead of being
+simple strings, are a regular expression (see
+.Xr regex 3 )
+and an
+.Xr ed 1 Ns \-style
+replacement string. Normally, the first occurrence of the pattern in
+each word of the value is changed. The
+.Ql 1
+modifier causes the substitution to apply to at most one word; the
+.Ql g
+modifier causes the substitution to apply to as many instances of the
+search pattern as occur in the word or words it is found in. Note that
+.Ql 1
+and
+.Ql g
+are orthogonal; the former specifies whether multiple words are
+potentially affected, the latter whether multiple substitutions can
+potentially occur within each affected word.
.It Cm E
Replaces each word in the variable with its suffix.
.It Cm H
Index: var.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/make/var.c,v
retrieving revision 1.17
diff -b -u -r1.17 var.c
--- var.c 2000/07/09 02:54:54 1.17
+++ var.c 2000/09/28 01:01:48
@@ -86,6 +86,10 @@
*/
#include <ctype.h>
+#ifndef NO_REGEX
+#include <sys/types.h>
+#include <regex.h>
+#endif
#include <stdlib.h>
#include "make.h"
#include "buf.h"
@@ -144,8 +148,11 @@
/* Var*Pattern flags */
#define VAR_SUB_GLOBAL 0x01 /* Apply substitution globally */
+#define VAR_SUB_ONE 0x02 /* Apply substitution to one word */
+#define VAR_SUB_MATCHED 0x04 /* There was a match */
#define VAR_MATCH_START 0x08 /* Match at start of word */
#define VAR_MATCH_END 0x10 /* Match at end of word */
+#define VAR_NOSUBST 0x20 /* don't expand vars in VarGetPattern */
typedef struct {
char *lhs; /* String to match */
@@ -155,6 +162,16 @@
int flags;
} VarPattern;
+#ifndef NO_REGEX
+typedef struct {
+ regex_t re;
+ int nsub;
+ regmatch_t *matches;
+ char *replace;
+ int flags;
+} VarREPattern;
+#endif
+
static int VarCmp __P((ClientData, ClientData));
static Var *VarFind __P((char *, GNode *, int));
static void VarAdd __P((char *, char *, GNode *));
@@ -168,7 +185,13 @@
static Boolean VarSYSVMatch __P((char *, Boolean, Buffer, ClientData));
#endif
static Boolean VarNoMatch __P((char *, Boolean, Buffer, ClientData));
+#ifndef NO_REGEX
+static void VarREError __P((int, regex_t *, const char *));
+static Boolean VarRESubstitute __P((char *, Boolean, Buffer, ClientData));
+#endif
static Boolean VarSubstitute __P((char *, Boolean, Buffer, ClientData));
+static char *VarGetPattern __P((GNode *, int, char **, int, int *, int *,
+ VarPattern *));
static char *VarQuote __P((char *));
static char *VarModify __P((char *, Boolean (*)(char *, Boolean, Buffer,
ClientData),
@@ -1046,8 +1069,172 @@
return(TRUE);
}
+#ifndef NO_REGEX
+/*-
+ *-----------------------------------------------------------------------
+ * VarREError --
+ * Print the error caused by a regcomp or regexec call.
+ *
+ * Results:
+ * None.
+ *
+ * Side Effects:
+ * An error gets printed.
+ *
+ *-----------------------------------------------------------------------
+ */
+static void
+VarREError(err, pat, str)
+ int err;
+ regex_t *pat;
+ const char *str;
+{
+ char *errbuf;
+ int errlen;
+
+ errlen = regerror(err, pat, 0, 0);
+ errbuf = emalloc(errlen);
+ regerror(err, pat, errbuf, errlen);
+ Error("%s: %s", str, errbuf);
+ free(errbuf);
+}
+
+
/*-
*-----------------------------------------------------------------------
+ * VarRESubstitute --
+ * Perform a regex substitution on the given word, placing the
+ * result in the passed buffer.
+ *
+ * Results:
+ * TRUE if a space is needed before more characters are added.
+ *
+ * Side Effects:
+ * None.
+ *
+ *-----------------------------------------------------------------------
+ */
+static Boolean
+VarRESubstitute(word, addSpace, buf, patternp)
+ char *word;
+ Boolean addSpace;
+ Buffer buf;
+ ClientData patternp;
+{
+ VarREPattern *pat;
+ int xrv;
+ char *wp;
+ char *rp;
+ int added;
+ int flags = 0;
+
+#define MAYBE_ADD_SPACE() \
+ if (addSpace && !added) \
+ Buf_AddByte(buf, ' '); \
+ added = 1
+
+ added = 0;
+ wp = word;
+ pat = patternp;
+
+ if ((pat->flags & (VAR_SUB_ONE|VAR_SUB_MATCHED)) ==
+ (VAR_SUB_ONE|VAR_SUB_MATCHED))
+ xrv = REG_NOMATCH;
+ else {
+ tryagain:
+ xrv = regexec(&pat->re, wp, pat->nsub, pat->matches, flags);
+ }
+
+ switch (xrv) {
+ case 0:
+ pat->flags |= VAR_SUB_MATCHED;
+ if (pat->matches[0].rm_so > 0) {
+ MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf, pat->matches[0].rm_so, wp);
+ }
+
+ for (rp = pat->replace; *rp; rp++) {
+ if ((*rp == '\\') && ((rp[1] == '&') || (rp[1] == '\\'))) {
+ MAYBE_ADD_SPACE();
+ Buf_AddByte(buf,rp[1]);
+ rp++;
+ }
+ else if ((*rp == '&') ||
+ ((*rp == '\\') && isdigit((unsigned char)rp[1]))) {
+ int n;
+ char *subbuf;
+ int sublen;
+ char errstr[3];
+
+ if (*rp == '&') {
+ n = 0;
+ errstr[0] = '&';
+ errstr[1] = '\0';
+ } else {
+ n = rp[1] - '0';
+ errstr[0] = '\\';
+ errstr[1] = rp[1];
+ errstr[2] = '\0';
+ rp++;
+ }
+
+ if (n > pat->nsub) {
+ Error("No subexpression %s", &errstr[0]);
+ subbuf = "";
+ sublen = 0;
+ } else if ((pat->matches[n].rm_so == -1) &&
+ (pat->matches[n].rm_eo == -1)) {
+ Error("No match for subexpression %s", &errstr[0]);
+ subbuf = "";
+ sublen = 0;
+ } else {
+ subbuf = wp + pat->matches[n].rm_so;
+ sublen = pat->matches[n].rm_eo - pat->matches[n].rm_so;
+ }
+
+ if (sublen > 0) {
+ MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf, sublen, subbuf);
+ }
+ } else {
+ MAYBE_ADD_SPACE();
+ Buf_AddByte(buf, *rp);
+ }
+ }
+ wp += pat->matches[0].rm_eo;
+ if (pat->flags & VAR_SUB_GLOBAL) {
+ flags |= REG_NOTBOL;
+ if (pat->matches[0].rm_so == 0 && pat->matches[0].rm_eo == 0) {
+ MAYBE_ADD_SPACE();
+ Buf_AddByte(buf, *wp);
+ wp++;
+
+ }
+ if (*wp)
+ goto tryagain;
+ }
+ if (*wp) {
+ MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf, strlen(wp), wp);
+ }
+ break;
+ default:
+ VarREError(xrv, &pat->re, "Unexpected regex error");
+ /* fall through */
+ case REG_NOMATCH:
+ if (*wp) {
+ MAYBE_ADD_SPACE();
+ Buf_AddBytes(buf,strlen(wp),wp);
+ }
+ break;
+ }
+ return(addSpace||added);
+}
+#endif
+
+
+/*-
+ *-----------------------------------------------------------------------
* VarModify --
* Modify each of the words of the passed string using the given
* function. Used to implement all modifiers.
@@ -1090,6 +1277,135 @@
/*-
*-----------------------------------------------------------------------
+ * VarGetPattern --
+ * Pass through the tstr looking for 1) escaped delimiters,
+ * '$'s and backslashes (place the escaped character in
+ * uninterpreted) and 2) unescaped $'s that aren't before
+ * the delimiter (expand the variable substitution unless flags
+ * has VAR_NOSUBST set).
+ * Return the expanded string or NULL if the delimiter was missing
+ * If pattern is specified, handle escaped ampersands, and replace
+ * unescaped ampersands with the lhs of the pattern.
+ *
+ * Results:
+ * A string of all the words modified appropriately.
+ * If length is specified, return the string length of the buffer
+ * If flags is specified and the last character of the pattern is a
+ * $ set the VAR_MATCH_END bit of flags.
+ *
+ * Side Effects:
+ * None.
+ *-----------------------------------------------------------------------
+ */
+static char *
+VarGetPattern(ctxt, err, tstr, delim, flags, length, pattern)
+ GNode *ctxt;
+ int err;
+ char **tstr;
+ int delim;
+ int *flags;
+ int *length;
+ VarPattern *pattern;
+{
+ char *cp;
+ Buffer buf = Buf_Init(0);
+ int junk;
+ if (length == NULL)
+ length = &junk;
+
+#define IS_A_MATCH(cp, delim) \
+ ((cp[0] == '\\') && ((cp[1] == delim) || \
+ (cp[1] == '\\') || (cp[1] == '$') || (pattern && (cp[1] == '&'))))
+
+ /*
+ * Skim through until the matching delimiter is found;
+ * pick up variable substitutions on the way. Also allow
+ * backslashes to quote the delimiter, $, and \, but don't
+ * touch other backslashes.
+ */
+ for (cp = *tstr; *cp && (*cp != delim); cp++) {
+ if (IS_A_MATCH(cp, delim)) {
+ Buf_AddByte(buf, (Byte) cp[1]);
+ cp++;
+ } else if (*cp == '$') {
+ if (cp[1] == delim) {
+ if (flags == NULL)
+ Buf_AddByte(buf, (Byte) *cp);
+ else
+ /*
+ * Unescaped $ at end of pattern => anchor
+ * pattern at end.
+ */
+ *flags |= VAR_MATCH_END;
+ } else {
+ if (flags == NULL || (*flags & VAR_NOSUBST) == 0) {
+ char *cp2;
+ int len;
+ Boolean freeIt;
+
+ /*
+ * If unescaped dollar sign not before the
+ * delimiter, assume it's a variable
+ * substitution and recurse.
+ */
+ cp2 = Var_Parse(cp, ctxt, err, &len, &freeIt);
+ Buf_AddBytes(buf, strlen(cp2), (Byte *) cp2);
+ if (freeIt)
+ free(cp2);
+ cp += len - 1;
+ } else {
+ char *cp2 = &cp[1];
+
+ if (*cp2 == '(' || *cp2 == '{') {
+ /*
+ * Find the end of this variable reference
+ * and suck it in without further ado.
+ * It will be interperated later.
+ */
+ int have = *cp2;
+ int want = (*cp2 == '(') ? ')' : '}';
+ int depth = 1;
+
+ for (++cp2; *cp2 != '\0' && depth > 0; ++cp2) {
+ if (cp2[-1] != '\\') {
+ if (*cp2 == have)
+ ++depth;
+ if (*cp2 == want)
+ --depth;
+ }
+ }
+ Buf_AddBytes(buf, cp2 - cp, (Byte *)cp);
+ cp = --cp2;
+ } else
+ Buf_AddByte(buf, (Byte) *cp);
+ }
+ }
+ }
+ else if (pattern && *cp == '&')
+ Buf_AddBytes(buf, pattern->leftLen, (Byte *)pattern->lhs);
+ else
+ Buf_AddByte(buf, (Byte) *cp);
+ }
+
+ Buf_AddByte(buf, (Byte) '\0');
+
+ if (*cp != delim) {
+ *tstr = cp;
+ *length = 0;
+ return NULL;
+ }
+ else {
+ *tstr = ++cp;
+ cp = (char *) Buf_GetAll(buf, length);
+ *length -= 1; /* Don't count the NULL */
+ Buf_Destroy(buf, FALSE);
+ return cp;
+ }
+}
+
+
+/*-
+ *-----------------------------------------------------------------------
* VarQuote --
* Quote shell meta-characters in the string
*
@@ -1162,6 +1478,7 @@
int cnt; /* Used to count brace pairs when variable in
* in parens or braces */
char *start;
+ char delim;
Boolean dynamic; /* TRUE if the variable is local and we're
* expanding it in a non-local context. This
* is done to support dynamic sources. The
@@ -1420,6 +1737,8 @@
* wildcarding form.
* :S<d><pat1><d><pat2><d>[g]
* Substitute <pat2> for <pat1> in the value
+ * :C<d><pat1><d><pat2><d>[g]
+ * Substitute <pat2> for regex <pat1> in the value
* :H Substitute the head of each word
* :T Substitute the tail of each word
* :E Substitute the extension (minus '.') of
@@ -1667,6 +1986,83 @@
free(pattern.rhs);
break;
}
+#ifndef NO_REGEX
+ case 'C':
+ {
+ VarREPattern pattern;
+ char *re;
+ int error;
+
+ pattern.flags = 0;
+ delim = tstr[1];
+ tstr += 2;
+
+ cp = tstr;
+
+ if ((re = VarGetPattern(ctxt, err, &cp, delim, NULL,
+ NULL, NULL)) == NULL) {
+ /* was: goto cleanup */
+ *lengthPtr = cp - start + 1;
+ if (*freePtr)
+ free(str);
+ if (delim != '\0')
+ Error("Unclosed substitution for %s (%c missing)",
+ v->name, delim);
+ return (var_Error);
+ }
+
+ if ((pattern.replace = VarGetPattern(ctxt, err, &cp,
+ delim, NULL, NULL, NULL)) == NULL){
+ free(re);
+
+ /* was: goto cleanup */
+ *lengthPtr = cp - start + 1;
+ if (*freePtr)
+ free(str);
+ if (delim != '\0')
+ Error("Unclosed substitution for %s (%c missing)",
+ v->name, delim);
+ return (var_Error);
+ }
+
+ for (;; cp++) {
+ switch (*cp) {
+ case 'g':
+ pattern.flags |= VAR_SUB_GLOBAL;
+ continue;
+ case '1':
+ pattern.flags |= VAR_SUB_ONE;
+ continue;
+ }
+ break;
+ }
+
+ termc = *cp;
+
+ error = regcomp(&pattern.re, re, REG_EXTENDED);
+ free(re);
+ if (error) {
+ *lengthPtr = cp - start + 1;
+ VarREError(error, &pattern.re, "RE substitution error");
+ free(pattern.replace);
+ return (var_Error);
+ }
+
+ pattern.nsub = pattern.re.re_nsub + 1;
+ if (pattern.nsub < 1)
+ pattern.nsub = 1;
+ if (pattern.nsub > 10)
+ pattern.nsub = 10;
+ pattern.matches = emalloc(pattern.nsub *
+ sizeof(regmatch_t));
+ newStr = VarModify(str, VarRESubstitute,
+ (ClientData) &pattern);
+ regfree(&pattern.re);
+ free(pattern.replace);
+ free(pattern.matches);
+ break;
+ }
+#endif
case 'Q':
if (tstr[1] == endc || tstr[1] == ':') {
newStr = VarQuote (str);
>Release-Note:
>Audit-Trail:
>Unformatted:
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200009280204.EAA90935>
