Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 25 Jun 2010 22:12:42 +0200
From:      David Naylor <naylor.b.david@gmail.com>
To:        Garrett Cooper <yanefbsd@gmail.com>
Cc:        freebsd-hackers@freebsd.org
Subject:   Re: [RFC] mtree improvements
Message-ID:  <201006252212.46301.naylor.b.david@gmail.com>
In-Reply-To: <AANLkTilWRu63O48AotRODxnwo8EtnSHB27UucXzSJDf9@mail.gmail.com>
References:  <201006251852.30302.naylor.b.david@gmail.com> <AANLkTilWRu63O48AotRODxnwo8EtnSHB27UucXzSJDf9@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
--nextPart1715099.4f37riSuJ6
Content-Type: multipart/mixed;
  boundary="Boundary-01=_62QJMEjqHFP9f2f"
Content-Transfer-Encoding: 7bit


--Boundary-01=_62QJMEjqHFP9f2f
Content-Type: Text/Plain;
  charset="us-ascii"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

On Friday 25 June 2010 20:01:42 Garrett Cooper wrote:
> On Fri, Jun 25, 2010 at 9:52 AM, David Naylor <naylor.b.david@gmail.com>=
=20
wrote:
> > Hi,
> >=20
> > I've created a patch that increases the performance of mtree.  This is =
of
> > particular use during a port install.  In an extreme case I have
> > experienced a ~20% increase [1].
> >=20
> > For a full discussion see PR bin/143732.  This arose out of [2] where I
> > experienced the increase.
> >=20
> > For your convenience I have attached the patch.
> >=20
> > Please review this patch and if it is acceptable, commit it.
> >=20
> > Regards,
> >=20
> > David
> >=20
> > 1] http://markmail.org/message/iju3l6hyv7s7emrb
> > 2] http://markmail.org/message/gfztjpszl5dozzii
>=20
> Hmm... this has other interesting applications other than just ports,
> but unfortunately pkg_install won't really feel as much of a
> performance boost (because it uses mtree -e -U when +MTREE exists in
> the package).
>=20
> Comments follow.
> Thanks!
> -Garrett

I've included some more changes.  I do not think -[uU] needs to be set when=
=20
skipping vwalk.  I also move sflag so it will get called even if eflag is s=
et=20
(bug in my patch). =20

I'm not sure sflag will produce the same results as check() and vwalk() may=
=20
call compare() in different orders?  Will this impact on crc?

> --- /usr/src/usr.sbin/mtree/verify.c	2010-02-07 15:07:28.000000000 +0200
> +++ verify.c	2010-02-07 15:04:10.000000000 +0200
> @@ -50,17 +50,23 @@
>  static NODE *root;
>  static char path[MAXPATHLEN];
>=20
> -static void	miss(NODE *, char *);
> +static int	miss(NODE *, char *);
> +static int	check(NODE *, char *);
>  static int	vwalk(void);
>=20
>  int
>  mtree_verifyspec(FILE *fi)
>  {
> -	int rval;
> +	int rval =3D 0;
>=20
>  	root =3D mtree_readspec(fi);
> -	rval =3D vwalk();
> -	miss(root, path);
> +	/*
> +	 * No need to walk entire tree if we are only updating the structure
> +	 * and extra files are ignored.
> +	 */
> +	if (!(uflag && eflag))
> +		rval =3D vwalk();
>=20
> gcooper> This is where the performance boost is coming from as you're
> not walking the directory tree, correct?

Yes, if an update is requested without reporting extra files.  In some case=
s=20
with a well populated /usr/local (and a slowish HDD) it could take some tim=
e. =20

I think it is possible to even use this optimisation if uflag is not set? =
=20

> +	rval |=3D miss(root, path);
>  	return (rval);
>  }
>=20
> @@ -155,15 +161,47 @@
>  	return (rval);
>  }
>=20
> -static void
> +static int
> +check(NODE *p, char *tail)
> +{
> +	FTSENT fts;
> +	struct stat fts_stat;
> +
> +	strcpy(tail, p->name);
>=20
> gcooper> Dangerous. Please use strlcpy with appropriate bounds.

This is the same code used in miss.c:171. =20

It appears the code assumes that a path a mtree file describes will not exc=
eed=20
MAXPATHLEN and does not check to see if the buffer overflows.  I could crea=
te a=20
patch to fix that.  Perhaps a separate patch?

> +	/*
> +	 * It is assumed that compare() only requires fts_accpath and fts_statp
> +	 * fields in the FTSENT structure.
> +	 */
> +	fts.fts_accpath =3D path;
> +	fts.fts_statp =3D &fts_stat;
> +
> +	if (stat(path, fts.fts_statp))
> +		return (0);
>=20
> gcooper> What about symlink functionality? lstat(2)?

=46ixed.  This should now be consistent with fts_read. =20

> +	p->flags |=3D F_VISIT;
> +	if ((p->flags & F_NOCHANGE) =3D=3D 0 && compare(p->name, p, &fts))
> +		return (MISMATCHEXIT);
> +	else
> +		return (0);
> +
> +	/*
> +	 * tail is not restored to '\0' as the next time tail (or path) is used
> +	 * is with a strcpy (thus overriding the '\0').  See +19 lines below.
> +	 */
> +}
> +
> +static int
>  miss(NODE *p, char *tail)
>  {
>  	int create;
>  	char *tp;
>  	const char *type, *what;
> -	int serr;
> +	int serr, rval =3D 0;
>=20
> gcooper> This isn't correct as per-style(9). Please do:
> gcooper>
> gcooper> int rval =3D 0;
> gcooper> int serr;
> gcooper>
> gcooper> This reduces diff churn and is more style(9)-istically correct.

I didn't know that.  Good point. =20

>  	for (; p; p =3D p->next) {
> +		if (uflag && eflag)
> +			rval |=3D check(p, tail);
>  		if (p->flags & F_OPT && !(p->flags & F_VISIT))
>  			continue;
>  		if (p->type !=3D F_DIR && (dflag || p->flags & F_VISIT))
> @@ -256,4 +294,5 @@
>  			(void)printf("%s: file flags not set: %s\n",
>  			    path, strerror(errno));
>  	}
> +	return (rval);
>  }

--Boundary-01=_62QJMEjqHFP9f2f
Content-Type: text/x-patch;
  charset="ISO-8859-1";
  name="mtree.diff"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="mtree.diff"

=2D-- mtree/verify.c	2010-05-27 13:47:20.000000000 +0200
+++ verify.c	2010-06-25 22:04:40.000000000 +0200
@@ -50,17 +50,25 @@
 static NODE *root;
 static char path[MAXPATHLEN];
=20
=2Dstatic void	miss(NODE *, char *);
+static int	miss(NODE *, char *);
+static int	check(NODE *, char *);
 static int	vwalk(void);
=20
 int
 mtree_verifyspec(FILE *fi)
 {
=2D	int rval;
+	int rval =3D 0;
=20
 	root =3D mtree_readspec(fi);
=2D	rval =3D vwalk();
=2D	miss(root, path);
+	/* No need to walk tree if we are ignoring extra files. */
+	if (!eflag)
+		rval =3D vwalk();
+	rval |=3D miss(root, path);
+
+	/* Called here to make sure vwalk() and check() have been called */
+	if (sflag)
+		warnx("%s checksum: %lu", fullpath, (unsigned long)crc_total);
+
 	return (rval);
 }
=20
@@ -135,35 +143,72 @@
 		if (ep)
 			continue;
 extra:
=2D		if (!eflag) {
=2D			(void)printf("%s extra", RP(p));
=2D			if (rflag) {
=2D				if ((S_ISDIR(p->fts_statp->st_mode)
=2D				    ? rmdir : unlink)(p->fts_accpath)) {
=2D					(void)printf(", not removed: %s",
=2D					    strerror(errno));
=2D				} else
=2D					(void)printf(", removed");
=2D			}
=2D			(void)putchar('\n');
+		(void)printf("%s extra", RP(p));
+		if (rflag) {
+			if ((S_ISDIR(p->fts_statp->st_mode)
+			    ? rmdir : unlink)(p->fts_accpath)) {
+				(void)printf(", not removed: %s",
+				    strerror(errno));
+			} else
+				(void)printf(", removed");
 		}
+		(void)putchar('\n');
 		(void)fts_set(t, p, FTS_SKIP);
 	}
 	(void)fts_close(t);
=2D	if (sflag)
=2D		warnx("%s checksum: %lu", fullpath, (unsigned long)crc_total);
 	return (rval);
 }
=20
=2Dstatic void
+static int
+check(NODE *p, char *tail)
+{
+	FTSENT fts;
+	struct stat fts_stat;
+
+	strcpy(tail, p->name);
+
+	/*
+	 * It is assumed that compare() only requires fts_accpath and fts_statp
+	 * fields in the FTSENT structure.
+	 */
+	fts.fts_accpath =3D path;
+	fts.fts_statp =3D &fts_stat;
+
+	if (ftsoptions & FTS_LOGICAL) {
+		if (stat(path, fts.fts_statp) || lstat(path, fts.fts_statp))
+			return (0);
+	} else if (lstat(path, fts.fts_statp))
+		return (0);
+
+	p->flags |=3D F_VISIT;
+	if ((p->flags & F_NOCHANGE) =3D=3D 0 && compare(p->name, p, &fts))
+		return (MISMATCHEXIT);
+	else
+		return (0);
+
+	/*
+	 * tail is not restored to '\0' as the next time tail (or path) is used
+	 * is with a strcpy (thus overriding the '\0').  See +25 lines below.
+	 */
+}
+
+static int
 miss(NODE *p, char *tail)
 {
 	int create;
 	char *tp;
 	const char *type, *what;
+	int rval =3D 0;
 	int serr;
=20
 	for (; p; p =3D p->next) {
+		/*
+		 * If check() needs to be called (eflag set) and dflag is set=20
+		 * then only call for directories (as we are ignoring=20
+		 * everything else) otherwise always call.
+		 */
+		if (eflag && (!dflag || (p->type =3D=3D F_DIR)))
+			rval |=3D check(p, tail);
 		if (p->flags & F_OPT && !(p->flags & F_VISIT))
 			continue;
 		if (p->type !=3D F_DIR && (dflag || p->flags & F_VISIT))
@@ -256,4 +301,5 @@
 			(void)printf("%s: file flags not set: %s\n",
 			    path, strerror(errno));
 	}
+	return (rval);
 }

--Boundary-01=_62QJMEjqHFP9f2f--

--nextPart1715099.4f37riSuJ6
Content-Type: application/pgp-signature; name=signature.asc 
Content-Description: This is a digitally signed message part.

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.14 (FreeBSD)

iEYEABECAAYFAkwlDb4ACgkQUaaFgP9pFrK33wCffHn+OJ7hDXbjdTIkP85c9I5s
djgAnjQnvYi2h5yooO2Pe/NeHOFqvTPa
=4b6w
-----END PGP SIGNATURE-----

--nextPart1715099.4f37riSuJ6--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201006252212.46301.naylor.b.david>