Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 29 May 2005 16:12:46 +0100
From:      Dominic Marks <dom@goodforbusiness.co.uk>
To:        Bruce Evans <bde@zeta.org.au>
Cc:        freebsd-fs@FreeBSD.org, freebsd-gnats-submit@FreeBSD.org, banhalmi@field.hu
Subject:   Re: i386/68719: [usb] USB 2.0 mobil rack+ fat32 performance problem
Message-ID:  <200505291612.46941.dom@goodforbusiness.co.uk>
In-Reply-To: <200505281540.35116.dom@goodforbusiness.co.uk>
References:  <200505271328.58072.dom@goodforbusiness.co.uk> <200505281213.42118.dom@goodforbusiness.co.uk> <200505281540.35116.dom@goodforbusiness.co.uk>

next in thread | previous in thread | raw e-mail | index | archive | help
--Boundary-00=_uvdmCBeoNTBAj+g
Content-Type: text/plain;
  charset="iso-8859-1"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

On Saturday 28 May 2005 15:40, Dominic Marks wrote:
> On Saturday 28 May 2005 12:13, Dominic Marks wrote:
> > On Saturday 28 May 2005 11:36, Bruce Evans wrote:
>
> <snip>
>
> > > I use the following to improve transfer rates for msdosfs.  The patch
> > > is for an old version so it might not apply directly.
> > >

<snip>

> > Thanks! I'll try my three tests again with this patch.
>
> Index: msdosfs_vnops.c
> ===================================================================
> RCS file: /usr/cvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v
> retrieving revision 1.149.2.1
> diff -u -r1.149.2.1 msdosfs_vnops.c
> --- msdosfs_vnops.c	31 Jan 2005 23:25:56 -0000	1.149.2.1
> +++ msdosfs_vnops.c	28 May 2005 14:26:59 -0000
> @@ -607,6 +607,7 @@
>  		struct uio *a_uio;
>  		int a_ioflag;
>  		struct ucred *a_cred;
> +		int seqcount;
>  	} */ *ap;
>  {
>  	int n;
> @@ -615,6 +616,7 @@
>  	u_long osize;
>  	int error = 0;
>  	u_long count;
> +	int seqcount;
>  	daddr_t bn, lastcn;
>  	struct buf *bp;
>  	int ioflag = ap->a_ioflag;
> @@ -692,7 +694,7 @@
>  	 */
>  	if (uio->uio_offset + resid > osize) {
>  		count = de_clcount(pmp, uio->uio_offset + resid) -
> -			de_clcount(pmp, osize);
> +		   	de_clcount(pmp, osize);
>  		error = extendfile(dep, count, NULL, NULL, 0);
>  		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
>  			goto errexit;
> @@ -700,6 +702,7 @@
>  	} else
>  		lastcn = de_clcount(pmp, osize) - 1;
>
> +	seqcount = ioflag >> IO_SEQSHIFT;
>  	do {
>  		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
>  			error = ENOSPC;
> @@ -725,7 +728,7 @@
>  			 * then no need to read data from disk.
>  			 */
>  			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
> -			clrbuf(bp);
> +			vfs_bio_clrbuf(bp);
>  			/*
>  			 * Do the bmap now, since pcbmap needs buffers
>  			 * for the fat table. (see msdosfs_strategy)
> @@ -775,6 +778,7 @@
>  		 * without delay.  Otherwise do a delayed write because we
>  		 * may want to write somemore into the block later.
>  		 */
> +		 /*
>  		if (ioflag & IO_SYNC)
>  			(void) bwrite(bp);
>  		else if (n + croffset == pmp->pm_bpcluster)
> @@ -782,6 +786,24 @@
>  		else
>  			bdwrite(bp);
>  		dep->de_flag |= DE_UPDATE;
> +		*/
> +		/*
> +		 * XXX Patch.
> +		 */
> +                if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
> +                       bp->b_flags |= B_CLUSTEROK;
> +                if (ioflag & IO_SYNC)
> +                       (void)bwrite(bp);
> +                else if (vm_page_count_severe() ||
> buf_dirty_count_severe()) +                       bawrite(bp);
> +                else if (n + croffset == pmp->pm_bpcluster) {
> +                       if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
> +                               cluster_write(bp, dep->de_FileSize,
> seqcount); +                       else
> +                               bawrite(bp);
> +               } else
> +                       bdwrite(bp);
> +                dep->de_flag |= DE_UPDATE;
>  	} while (error == 0 && uio->uio_resid > 0);
>
>  	/*
>
> Your patch works for me on 5.4-STABLE. It improves write performance
> dramatically. I did another test, reading and writing 1GB chunks of data.

<snip>

> Since the patch is to the _write function is it safe to assume the same
> method could be used to fix read performance if applied properly in the
> correct function?
>
> Cheers,

I have been experimenting in msdosfs_read and I have managed to come up with 
something that works, but I'm sure it is flawed. On large file reads it will 
improve read performance (see below) - but only after a long period of the 
file copy achieving only 3MB/s (see A1). During this time gstat reports the 
disc itself is reading at its maximum of around 28MB/s. After a long period 
of low throughput, the disc drops to 25MB/s but the actual transfer rate 
increases to 25MB/s (see A2).

I've tried to narrow it down to something but I'm mostly in the dark, so I'll 
just hand over what I found to work to review. I looked at Bruce's changes to 
msdosfs_write and tried to do the same (implement cluster_read) using the 
ext2 and ffs _read methods as a how-to. I think I'm reading ahead too far, or 
too early. I have been unable to interpret the gstat output during the first 
part of the transfer any further.

gstat(8) output at the start (slow, A1), and middle (fast, A2) of a large file 
copy between msdosfs/usb drive (da0s1) and ufs2/ata-100 (ad1).

# A1
dT: 0.501  flag_I 500000us  sizeof 240  i -1
 L(q)  ops/s    r/s   kBps   ms/r    w/s   kBps   ms/w   %busy Name
   14    445    445  28376   24.7      0      0    0.0   99.9| da0
    0     28      0      0    0.0     28   3578    1.7    4.8| ad1
    0     28      0      0    0.0     28   3578    1.7    4.8| ad1s1
   14    445    445  28376   24.9      0      0    0.0  100.0| da0s1

After 30-45 seconds (1GB test file):

# A2
dT: 0.501  flag_I 500000us  sizeof 240  i -1
 L(q)  ops/s    r/s   kBps   ms/r    w/s   kBps   ms/w   %busy Name
    1    403    403  25428    2.1      0      0    0.0   85.0| da0
    0    199      0      0    0.0    199  25532    1.7   34.0| ad1
    0    199      0      0    0.0    199  25532    1.7   34.1| ad1s1
    1    403    403  25428    2.1      0      0    0.0   85.9| da0s1

The patch which combines Bruce's original patch for msdosfs_write, revised for 
current text positions, and my attempts to do the same for msdosfs_read.

%%
Index: msdosfs_vnops.c
===================================================================
RCS file: /usr/cvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.149.2.1
diff -u -r1.149.2.1 msdosfs_vnops.c
--- msdosfs_vnops.c	31 Jan 2005 23:25:56 -0000	1.149.2.1
+++ msdosfs_vnops.c	29 May 2005 14:10:18 -0000
@@ -517,6 +517,8 @@
 	int blsize;
 	int isadir;
 	int orig_resid;
+	int nblks = 16; /* XXX should be defined, but not here */
+	int crsize;
 	u_int n;
 	u_long diff;
 	u_long on;
@@ -565,14 +567,21 @@
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
 		} else {
 			blsize = pmp->pm_bpcluster;
-			rablock = lbn + 1;
-			if (seqcount > 1 &&
-			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
-				rasize = pmp->pm_bpcluster;
-				error = breadn(vp, lbn, blsize,
-				    &rablock, &rasize, 1, NOCRED, &bp);
+			/* XXX what is the best value for crsize? */
+ 			crsize = blsize * nblks > MAXBSIZE ? MAXBSIZE : blsize * nblks;
+			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+				error = cluster_read(vp, dep->de_FileSize, lbn,
+					crsize, NOCRED, uio->uio_resid, seqcount, &bp);
 			} else {
-				error = bread(vp, lbn, blsize, NOCRED, &bp);
+				rablock = lbn + 1;
+				if (seqcount > 1 &&
+					de_cn2off(pmp, rablock) < dep->de_FileSize) {
+						rasize = pmp->pm_bpcluster;
+						error = breadn(vp, lbn, blsize,
+						&rablock, &rasize, 1, NOCRED, &bp);
+				} else {
+					error = bread(vp, lbn, blsize, NOCRED, &bp);
+				}
 			}
 		}
 		if (error) {
@@ -580,14 +589,16 @@
 			break;
 		}
 		on = uio->uio_offset & pmp->pm_crbomask;
-		diff = pmp->pm_bpcluster - on;
-		n = diff > uio->uio_resid ? uio->uio_resid : diff;
+		diff = blsize * nblks - on;
+		n = blsize * nblks > uio->uio_resid ? uio->uio_resid : blsize * nblks;
 		diff = dep->de_FileSize - uio->uio_offset;
-		if (diff < n)
+		if (diff < n) {
 			n = diff;
-		diff = blsize - bp->b_resid;
-		if (diff < n)
+		}
+		diff = blsize * nblks - bp->b_resid;
+		if (diff < n) {
 			n = diff;
+		}
 		error = uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
@@ -607,6 +618,7 @@
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
+		int seqcount;
 	} */ *ap;
 {
 	int n;
@@ -615,6 +627,7 @@
 	u_long osize;
 	int error = 0;
 	u_long count;
+	int seqcount;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
@@ -692,7 +705,7 @@
 	 */
 	if (uio->uio_offset + resid > osize) {
 		count = de_clcount(pmp, uio->uio_offset + resid) -
-			de_clcount(pmp, osize);
+		   	de_clcount(pmp, osize);
 		error = extendfile(dep, count, NULL, NULL, 0);
 		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
@@ -700,6 +713,7 @@
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
+	seqcount = ioflag >> IO_SEQSHIFT;
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
@@ -725,7 +739,7 @@
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
-			clrbuf(bp);
+			vfs_bio_clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the fat table. (see msdosfs_strategy)
@@ -775,6 +789,7 @@
 		 * without delay.  Otherwise do a delayed write because we
 		 * may want to write somemore into the block later.
 		 */
+		 /*
 		if (ioflag & IO_SYNC)
 			(void) bwrite(bp);
 		else if (n + croffset == pmp->pm_bpcluster)
@@ -782,6 +797,24 @@
 		else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
+		*/
+		/*
+		 * XXX Patch.
+		 */
+                if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
+                       bp->b_flags |= B_CLUSTEROK;
+                if (ioflag & IO_SYNC)
+                       (void)bwrite(bp);
+                else if (vm_page_count_severe() || buf_dirty_count_severe())
+                       bawrite(bp);
+                else if (n + croffset == pmp->pm_bpcluster) {
+                       if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
+                               cluster_write(bp, dep->de_FileSize, seqcount);
+                       else
+                               bawrite(bp);
+               } else
+                       bdwrite(bp);
+                dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
 
 	/*

%%

With this patch I can get the following transfer rates:

msdosfs reading

# ls -lh /mnt/random2.file 
-rwxr-xr-x  1 root  wheel   1.0G May 29 11:24 /mnt/random2.file

# /usr/bin/time -al cp /mnt/random2.file /vol
       59.61 real         0.05 user         6.79 sys
       632  maximum resident set size
        11  average shared memory size
        80  average unshared data size
       123  average unshared stack size
        88  page reclaims
         0  page faults
         0  swaps
     23757  block input operations **
      8192  block output operations
         0  messages sent
         0  messages received
         0  signals received
     16660  voluntary context switches
     10387  involuntary context switches

Average Rate: 15.31MB/s. (Would be higher if not for the slow start)

** This figure is 3x that of the UFS2 operations. This must be a indicator of 
what I'm doing wrong, but I don't know what.

msdosfs writing

# /usr/bin/time -al cp /vol/random2.file /mnt
       47.33 real         0.03 user         7.13 sys
       632  maximum resident set size
        12  average shared memory size
        85  average unshared data size
       130  average unshared stack size
        88  page reclaims
         0  page faults
         0  swaps
      8735  block input operations
     16385  block output operations
         0  messages sent
         0  messages received
         0  signals received
      8856  voluntary context switches
     29631  involuntary context switches

Average Rate: 18.79MB/s.

To compare with UFS2 + softupdates on the same system / disc.

ufs2 reading

# /usr/bin/time -al cp /mnt/random2.file /vol
       42.39 real         0.02 user         6.61 sys
       632  maximum resident set size
        12  average shared memory size
        87  average unshared data size
       133  average unshared stack size
        88  page reclaims
         0  page faults
         0  swaps
      8249  block input operations
      8193  block output operations
         0  messages sent
         0  messages received
         0  signals received
      8246  voluntary context switches
     24617  involuntary context switches

Average Rate: 20.89MB/s.

ufs2 writing

# /usr/bin/time -al cp /vol/random2.file /mnt/
       47.12 real         0.03 user         6.74 sys
       632  maximum resident set size
        12  average shared memory size
        85  average unshared data size
       130  average unshared stack size
        88  page reclaims
         0  page faults
         0  swaps
      8260  block input operations
      8192  block output operations
         0  messages sent
         0  messages received
         0  signals received
      8303  voluntary context switches
     24700  involuntary context switches

Average Rate: 19MB/s.

I'd hope that someone could point me in the right direction so I can clean the 
patch up, or finish this off themselves and commit it (or something else 
which will increase the read/write speed).

Thanks very much,
-- 
Dominic
GoodforBusiness.co.uk
I.T. Services for SMEs in the UK.

--Boundary-00=_uvdmCBeoNTBAj+g
Content-Type: text/x-diff; charset="iso-8859-1";
	name="msdos-perf-releng5.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
	filename="msdos-perf-releng5.patch"

Index: msdosfs_vnops.c
===================================================================
RCS file: /usr/cvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.149.2.1
diff -u -r1.149.2.1 msdosfs_vnops.c
--- msdosfs_vnops.c	31 Jan 2005 23:25:56 -0000	1.149.2.1
+++ msdosfs_vnops.c	29 May 2005 14:10:18 -0000
@@ -517,6 +517,8 @@
 	int blsize;
 	int isadir;
 	int orig_resid;
+	int nblks = 16; /* XXX should be defined, but not here */
+	int crsize;
 	u_int n;
 	u_long diff;
 	u_long on;
@@ -565,14 +567,21 @@
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
 		} else {
 			blsize = pmp->pm_bpcluster;
-			rablock = lbn + 1;
-			if (seqcount > 1 &&
-			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
-				rasize = pmp->pm_bpcluster;
-				error = breadn(vp, lbn, blsize,
-				    &rablock, &rasize, 1, NOCRED, &bp);
+			/* XXX what is the best value for crsize? */
+ 			crsize = blsize * nblks > MAXBSIZE ? MAXBSIZE : blsize * nblks;
+			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+				error = cluster_read(vp, dep->de_FileSize, lbn,
+					crsize, NOCRED, uio->uio_resid, seqcount, &bp);
 			} else {
-				error = bread(vp, lbn, blsize, NOCRED, &bp);
+				rablock = lbn + 1;
+				if (seqcount > 1 &&
+					de_cn2off(pmp, rablock) < dep->de_FileSize) {
+						rasize = pmp->pm_bpcluster;
+						error = breadn(vp, lbn, blsize,
+						&rablock, &rasize, 1, NOCRED, &bp);
+				} else {
+					error = bread(vp, lbn, blsize, NOCRED, &bp);
+				}
 			}
 		}
 		if (error) {
@@ -580,14 +589,16 @@
 			break;
 		}
 		on = uio->uio_offset & pmp->pm_crbomask;
-		diff = pmp->pm_bpcluster - on;
-		n = diff > uio->uio_resid ? uio->uio_resid : diff;
+		diff = blsize * nblks - on;
+		n = blsize * nblks > uio->uio_resid ? uio->uio_resid : blsize * nblks;
 		diff = dep->de_FileSize - uio->uio_offset;
-		if (diff < n)
+		if (diff < n) {
 			n = diff;
-		diff = blsize - bp->b_resid;
-		if (diff < n)
+		}
+		diff = blsize * nblks - bp->b_resid;
+		if (diff < n) {
 			n = diff;
+		}
 		error = uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
@@ -607,6 +618,7 @@
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
+		int seqcount;
 	} */ *ap;
 {
 	int n;
@@ -615,6 +627,7 @@
 	u_long osize;
 	int error = 0;
 	u_long count;
+	int seqcount;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
@@ -692,7 +705,7 @@
 	 */
 	if (uio->uio_offset + resid > osize) {
 		count = de_clcount(pmp, uio->uio_offset + resid) -
-			de_clcount(pmp, osize);
+		   	de_clcount(pmp, osize);
 		error = extendfile(dep, count, NULL, NULL, 0);
 		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
@@ -700,6 +713,7 @@
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
+	seqcount = ioflag >> IO_SEQSHIFT;
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
@@ -725,7 +739,7 @@
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
-			clrbuf(bp);
+			vfs_bio_clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the fat table. (see msdosfs_strategy)
@@ -775,6 +789,7 @@
 		 * without delay.  Otherwise do a delayed write because we
 		 * may want to write somemore into the block later.
 		 */
+		 /*
 		if (ioflag & IO_SYNC)
 			(void) bwrite(bp);
 		else if (n + croffset == pmp->pm_bpcluster)
@@ -782,6 +797,24 @@
 		else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
+		*/
+		/*
+		 * XXX Patch.
+		 */
+                if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
+                       bp->b_flags |= B_CLUSTEROK;
+                if (ioflag & IO_SYNC)
+                       (void)bwrite(bp);
+                else if (vm_page_count_severe() || buf_dirty_count_severe())
+                       bawrite(bp);
+                else if (n + croffset == pmp->pm_bpcluster) {
+                       if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
+                               cluster_write(bp, dep->de_FileSize, seqcount);
+                       else
+                               bawrite(bp);
+               } else
+                       bdwrite(bp);
+                dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
 
 	/*

--Boundary-00=_uvdmCBeoNTBAj+g--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200505291612.46941.dom>