From owner-freebsd-hackers@FreeBSD.ORG Thu Aug 7 07:08:37 2003 Return-Path: Delivered-To: freebsd-hackers@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 5BAD637B401 for ; Thu, 7 Aug 2003 07:08:37 -0700 (PDT) Received: from mailout.informatik.tu-muenchen.de (mailout.informatik.tu-muenchen.de [131.159.0.5]) by mx1.FreeBSD.org (Postfix) with ESMTP id 7145E43F75 for ; Thu, 7 Aug 2003 07:08:36 -0700 (PDT) (envelope-from barner@in.tum.de) Received: by zi025.glhnet.mhn.de (Postfix, from userid 1000) id 9F32D38C12; Thu, 7 Aug 2003 16:08:01 +0200 (CEST) Date: Thu, 7 Aug 2003 16:08:01 +0200 From: Simon Barner To: "mitrohin a.s." Message-ID: <20030807140801.GD1475@zi025.glhnet.mhn.de> References: <1059607242.64020.5.camel@mjtdev1.dand06.au.bytecraft.au.com> <3F285560.2090607@acm.org> <1059608748.64020.10.camel@mjtdev1.dand06.au.bytecraft.au.com> <002201c356fa$4a66a700$1200a8c0@gsicomp.on.ca> <20030731134343.GB1323@zi025.glhnet.mhn.de> <1059693358.64020.31.camel@mjtdev1.dand06.au.bytecraft.au.com> <20030801153142.GA487@zi025.glhnet.mhn.de> <3F2B75E2.FBC18052@mindspring.com> <20030804003331.GA408@zi025.glhnet.mhn.de> <20030805020736.GA1805@bspu.secna.ru> Mime-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="m51xatjYGsM+13rf" Content-Disposition: inline In-Reply-To: <20030805020736.GA1805@bspu.secna.ru> User-Agent: Mutt/1.5.4i X-Virus-Scanned: by amavisd-new at informatik.tu-muenchen.de cc: freebsd-hackers@freebsd.org Subject: Re: [patch] Re: getfsent(3) and spaces in fstab X-BeenThere: freebsd-hackers@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Technical Discussions relating to FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Aug 2003 14:08:37 -0000 --m51xatjYGsM+13rf Content-Type: multipart/mixed; boundary="O5XBE6gyVG5Rl6Rj" Content-Disposition: inline --O5XBE6gyVG5Rl6Rj Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable > imho - expensive algorithm... i want to see anything more simple...=20 > like "gtok()" instead "es_strsep() + remove_escapes()"? I have adopted my patch to use your neat gtok() function, but I came to the conclusion that a two-pass algorithm is necessary: The first pass detects whether a line from fstab is the old or the new style format (old style lines may only have unescaped white spaces before a trailing #-comment). Then, the second pass extracts the information. I admit this is rather complicated, but I don't how to handle two sets of delimiters (":\n" and " \n\r\t") with only one pass. Using gtok() to detect the style of line is not an option IMO, since it would convert escape sequences. Now, the following lines can be processed: 1) old style: ::::([' ','\t']*#)* 2) new style format as described in fstab(5) + an optional #-comment at the end of the l= ine 3) empty lines, white space lines, deliberately many white spaces + comment In both the old and the new style lines, white spaces can be written as escape sequences or in double quotes. Could somebody please review my patch - if there are no objections (but I am sure there are some more details that can be improved), I will write a PR in order Regards, Simon --O5XBE6gyVG5Rl6Rj Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="patch-fstab.c" Content-Transfer-Encoding: quoted-printable --- fstab.c.orig Fri Aug 1 17:18:00 2003 +++ fstab.c Thu Aug 7 15:46:39 2003 @@ -84,6 +84,60 @@ _fs_fstab.fs_spec =3D buf; } =20 +/* + * Gets a token from a string *s, that is either empty or is separated by + * a set of delimiters *delim. + * Characters that are in *delim, can occur in the token if the are escape= d, + * i.e. have a '\' prepended. The character '\' itself is encoded as '\\'. + * *s can have a trailing comment (indicated by a '#'), which will cause t= he + * characters after the '#' to be ignored. To encode a '#' within a token, + * use '\#'. + * + * If a token is found, gtok sets the last character after its end + * to '\0' and returns a pointer it. Otherwise the return value is NULL. + * As a side effect, the input string *s modified and points to the next + * character after the end of the current token, i.e. after the '\0'. + */ +char *gtok(char **s, char const *delim) +{ + int quoted, escaped; + static char const esc_set[] =3D { 't', 'r', 'n', 'a', 0 }; + static char const esc_rep[] =3D { '\t', '\r', '\n', '\a', 0 }; + char *tok, *r, *w, *p; + + if (!s || !*s || !*(tok =3D *s + strspn(*s, delim)) || *tok =3D=3D '#') + return NULL; + + for (quoted =3D escaped =3D 0, r =3D w =3D tok; *r; r++) { + if (!escaped) { + if (*r =3D=3D '\\') { + escaped =3D 1; + continue; + } + if (*r =3D=3D '\"') { + quoted ^=3D -1; + continue; + } + if (!quoted) { + if (strchr(delim, *r)) { + r++; + break; + } + } + } else { + escaped =3D 0; + if ((p =3D strchr(esc_set, *r)) !=3D NULL) { + *w++ =3D esc_rep[p - esc_set]; + continue; + } + } + *w++ =3D *r; + } + *w =3D 0; + *s =3D r; + return tok; +} + static int fstabscan() { @@ -91,21 +145,73 @@ #define MAXLINELENGTH 1024 static char line[MAXLINELENGTH]; char subline[MAXLINELENGTH]; - int typexx; + int typexx, escaped=3D0, quoted=3D0, ws_sep=3D0; =20 for (;;) { =20 if (!(p =3D fgets(line, sizeof(line), _fs_fp))) return(0); -/* OLD_STYLE_FSTAB */ ++LineNo; - if (*line =3D=3D '#' || *line =3D=3D '\n') - continue; - if (!strpbrk(p, " \t")) { - _fs_fstab.fs_spec =3D strsep(&p, ":\n"); - _fs_fstab.fs_file =3D strsep(&p, ":\n"); + =09 + /* Detect whether line is in old or new fstab style */ + for (cp=3Dp; *cp !=3D '\n'; ++cp) { + if (*cp =3D=3D '\\') { + escaped =3D (escaped ? 0 : 1); + continue; + } + if (!escaped) { + /* Quotes */ + if (*cp =3D=3D '\"') { + quoted =3D (quoted ? 0 : 1); + continue; + } + if (quoted) + continue; + /* new white separator found */ + if (cp > p && strspn (cp, " \n\r\t") && + !strspn(cp-1, " \t")) + ++ws_sep; + =20 + /* #-comment found */ + if (*cp =3D=3D '#') { + *cp =3D '\0'; + /* ignore white spaces in front of a comment */ + if (cp > p && strspn(cp-1, " \t") &&=20 + ws_sep > 0) + ws_sep--; + break; + } + } else + escaped =3D 0; + } + /* open quotes and unfinished escape-sequences are bad */ + if (quoted || escaped) + goto bad; + /* ignore trailing white spaces */ + if (*(cp + strspn (cp, " \t")) =3D=3D '\n' && ws_sep > 0) + --ws_sep; + =20 + /* No white space separators found =3D> OLD_STYLE_FSTAB */ + if (ws_sep =3D=3D 0) { + /* + * line consists only of white spaces + * (evtl. + #-comment) + */ + if (strspn (p, " \t")) + continue; + /* + * Now read the different values (gtok will convert + * escape seq.). Format is: + * :::: + * ':' itself can be encodes as '\:' + */ + if (!(_fs_fstab.fs_spec =3D gtok(&p, ":\n\r"))) + continue; + if (!(_fs_fstab.fs_file =3D gtok(&p, ":\n\r"))) { + goto bad; + } fixfsfile(); - _fs_fstab.fs_type =3D strsep(&p, ":\n"); + _fs_fstab.fs_type =3D gtok(&p, ":\n\r"); if (_fs_fstab.fs_type) { if (!strcmp(_fs_fstab.fs_type, FSTAB_XX)) continue; @@ -113,46 +219,43 @@ _fs_fstab.fs_vfstype =3D strcmp(_fs_fstab.fs_type, FSTAB_SW) ? "ufs" : "swap"; - if ((cp =3D strsep(&p, ":\n")) !=3D NULL) { + if ((cp =3D gtok(&p, ":\n\r")) !=3D NULL) { _fs_fstab.fs_freq =3D atoi(cp); - if ((cp =3D strsep(&p, ":\n")) !=3D NULL) { + if ((cp =3D gtok(&p, " \n\r\t")) !=3D NULL) { _fs_fstab.fs_passno =3D atoi(cp); + if (gtok (&p, " \n\r\t")) + goto bad; + =20 return(1); } } } goto bad; } -/* OLD_STYLE_FSTAB */ - while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0') - ; - _fs_fstab.fs_spec =3D cp; - if (!_fs_fstab.fs_spec || *_fs_fstab.fs_spec =3D=3D '#') + =09 + /* At least one white space sep. found =3D> NEW_STYLE_FSTAB */ + if (!(_fs_fstab.fs_spec =3D gtok(&p, " \n\r\t"))) continue; - while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0') - ; - _fs_fstab.fs_file =3D cp; + if (!(_fs_fstab.fs_file =3D gtok(&p, " \n\r\t"))) + goto bad; fixfsfile(); - while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0') - ; - _fs_fstab.fs_vfstype =3D cp; - while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0') - ; - _fs_fstab.fs_mntops =3D cp; - if (_fs_fstab.fs_mntops =3D=3D NULL) + if (!(_fs_fstab.fs_vfstype =3D gtok(&p, " \n\r\t"))) + goto bad; + if (!(_fs_fstab.fs_mntops =3D gtok(&p, " \n\r\t"))) goto bad; _fs_fstab.fs_freq =3D 0; _fs_fstab.fs_passno =3D 0; - while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0') - ; + cp =3D gtok(&p, " \n\r\t"); if (cp !=3D NULL) { _fs_fstab.fs_freq =3D atoi(cp); - while ((cp =3D strsep(&p, " \t\n")) !=3D NULL && *cp =3D=3D '\0') - ; + cp =3D gtok(&p, " \n\r\t"); if (cp !=3D NULL) _fs_fstab.fs_passno =3D atoi(cp); } strcpy(subline, _fs_fstab.fs_mntops); + if (gtok (&p, " \n\r\t")) + goto bad; + p =3D subline; for (typexx =3D 0, cp =3D strsep(&p, ","); cp; cp =3D strsep(&p, ",")) { --O5XBE6gyVG5Rl6Rj-- --m51xatjYGsM+13rf Content-Type: application/pgp-signature; name="signature.asc" Content-Description: Digital signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.2.2 (FreeBSD) iD8DBQE/Ml1BCkn+/eutqCoRAtkbAKD4zdFKhcmU+oBM4O3piYxAINenrgCgzAFP xf9B3L1OcG/V6TXSY6Dy5HA= =Mr8w -----END PGP SIGNATURE----- --m51xatjYGsM+13rf--