From owner-freebsd-fs@FreeBSD.ORG  Mon May 30 10:11:59 2005
Return-Path: <owner-freebsd-fs@FreeBSD.ORG>
X-Original-To: freebsd-fs@freebsd.org
Delivered-To: freebsd-fs@freebsd.org
Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125])
	by hub.freebsd.org (Postfix) with ESMTP id BF14E16A41C;
	Mon, 30 May 2005 10:11:59 +0000 (GMT) (envelope-from bde@zeta.org.au)
Received: from mailout2.pacific.net.au (mailout2.pacific.net.au [61.8.0.85])
	by mx1.FreeBSD.org (Postfix) with ESMTP id 11F6943D54;
	Mon, 30 May 2005 10:11:58 +0000 (GMT) (envelope-from bde@zeta.org.au)
Received: from mailproxy2.pacific.net.au (mailproxy2.pacific.net.au
	[61.8.0.87])
	by mailout2.pacific.net.au (8.12.3/8.12.3/Debian-7.1) with ESMTP id
	j4UABmkG005619; Mon, 30 May 2005 20:11:48 +1000
Received: from epsplex.bde.org (katana.zip.com.au [61.8.7.246])
	by mailproxy2.pacific.net.au (8.12.3/8.12.3/Debian-7.1) with ESMTP id
	j4UABiMC012521; Mon, 30 May 2005 20:11:44 +1000
Date: Mon, 30 May 2005 20:11:45 +1000 (EST)
From: Bruce Evans <bde@zeta.org.au>
X-X-Sender: bde@epsplex.bde.org
To: Dominic Marks <dom@goodforbusiness.co.uk>
In-Reply-To: <20050530155609.Q1473@epsplex.bde.org>
Message-ID: <20050530193711.I843@epsplex.bde.org>
References: <200505271328.58072.dom@goodforbusiness.co.uk>
	<200505281213.42118.dom@goodforbusiness.co.uk>
	<200505281540.35116.dom@goodforbusiness.co.uk>
	<200505291612.46941.dom@goodforbusiness.co.uk>
	<20050530155609.Q1473@epsplex.bde.org>
MIME-Version: 1.0
Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed
Cc: freebsd-fs@freebsd.org, freebsd-gnats-submit@freebsd.org, banhalmi@field.hu
Subject: Re: i386/68719: [usb] USB 2.0 mobil rack+ fat32 performance problem
X-BeenThere: freebsd-fs@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: Filesystems <freebsd-fs.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/freebsd-fs>,
	<mailto:freebsd-fs-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/freebsd-fs>
List-Post: <mailto:freebsd-fs@freebsd.org>
List-Help: <mailto:freebsd-fs-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/freebsd-fs>,
	<mailto:freebsd-fs-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Mon, 30 May 2005 10:11:59 -0000

On Mon, 30 May 2005, Bruce Evans wrote:

> On Sun, 29 May 2005, Dominic Marks wrote:

>> I have been experimenting in msdosfs_read and I have managed to come up 
>> with
>> something that works, but I'm sure it is flawed. On large file reads it 
>> will
>> ...
>> %%
>> Index: msdosfs_vnops.c
>> ===================================================================
>> RCS file: /usr/cvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v
>> retrieving revision 1.149.2.1
>> diff -u -r1.149.2.1 msdosfs_vnops.c
>> --- msdosfs_vnops.c	31 Jan 2005 23:25:56 -0000	1.149.2.1
>> +++ msdosfs_vnops.c	29 May 2005 14:10:18 -0000
>> @@ -565,14 +567,21 @@
>> 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, 
>> &bp);
>> 		} else {
>> 			blsize = pmp->pm_bpcluster;
>> -			rablock = lbn + 1;
>> -			if (seqcount > 1 &&
>> -			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
>> -				rasize = pmp->pm_bpcluster;
>> -				error = breadn(vp, lbn, blsize,
>> -				    &rablock, &rasize, 1, NOCRED, &bp);
>> +			/* XXX what is the best value for crsize? */
>> + 			crsize = blsize * nblks > MAXBSIZE ? MAXBSIZE : 
>> blsize * nblks;
>> +			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
>> +				error = cluster_read(vp, dep->de_FileSize, 
>> lbn,
>> +					crsize, NOCRED, uio->uio_resid, 
>> seqcount, &bp);
>
> crsize should be just the block size (cluster size in msdosfs and
> blsize variable here) according to this code in all other file systems.
> ...

The main problem is that VOP_BMAP() is not fully implemented for msdosfs.
msdosfs_bmap() only has a stub which pretends that clustering ins never
possible:

% /*
%  * vp  - address of vnode file the file
%  * bn  - which cluster we are interested in mapping to a filesystem block number.
%  * vpp - returns the vnode for the block special file holding the filesystem
%  *	 containing the file of interest
%  * bnp - address of where to return the filesystem relative block number
%  */

This comment rotted in 1994 when 4.4BSD packed the args into a struct
and added the a_runp and a_runb args to support clustering.

% static int
% msdosfs_bmap(ap)
% 	struct vop_bmap_args /* {
% 		struct vnode *a_vp;
% 		daddr_t a_bn;
% 		struct vnode **a_vpp;
% 		daddr_t *a_bnp;
% 		int *a_runp;
% 		int *a_runb;
% 	} */ *ap;
% {
% 	struct denode *dep = VTODE(ap->a_vp);
% 	daddr_t blkno;
% 	int error;
% 
% 	if (ap->a_vpp != NULL)
% 		*ap->a_vpp = dep->de_devvp;
% 	if (ap->a_bnp == NULL)
% 		return (0);
% 	if (ap->a_runp) {
% 		/*
% 		 * Sequential clusters should be counted here.
   		                       ^^^^^^^^^
% 		 */
% 		*ap->a_runp = 0;
% 	}
% 	if (ap->a_runb) {
% 		*ap->a_runb = 0;
% 	}
% 	error = pcbmap(dep, ap->a_bn, &blkno, 0, 0);
% 	*ap->a_bnp = blkno;
% 	return (error);
% }

Here is a cleaned up version of the patch to add (not actually working)
clustering to msdosfs_read().

%%%
Index: msdosfs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.147
diff -u -2 -r1.147 msdosfs_vnops.c
--- msdosfs_vnops.c	4 Feb 2004 21:52:53 -0000	1.147
+++ msdosfs_vnops.c	30 May 2005 08:57:02 -0000
@@ -541,5 +555,7 @@
  		if (uio->uio_offset >= dep->de_FileSize)
  			break;
+		blsize = pmp->pm_bpcluster;
  		lbn = de_cluster(pmp, uio->uio_offset);
+		rablock = lbn + 1;
  		/*
  		 * If we are operating on a directory file then be sure to
@@ -556,15 +573,15 @@
  				break;
  			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+		} else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) {
+			error = bread(vp, lbn, blsize, NOCRED, &bp);
+		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
+			    NOCRED, uio->uio_resid, seqcount, &bp);
+		} else if (seqcount > 1) {
+			rasize = blsize;
+			error = breadn(vp, lbn,
+			    blsize, &rablock, &rasize, 1, NOCRED, &bp);
  		} else {
-			blsize = pmp->pm_bpcluster;
-			rablock = lbn + 1;
-			if (seqcount > 1 &&
-			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
-				rasize = pmp->pm_bpcluster;
-				error = breadn(vp, lbn, blsize,
-				    &rablock, &rasize, 1, NOCRED, &bp);
-			} else {
-				error = bread(vp, lbn, blsize, NOCRED, &bp);
-			}
+			error = bread(vp, lbn, blsize, NOCRED, &bp);
  		}
  		if (error) {
%%%

I rearranged the code to be almost lexically identical with that in
ffs_read().

I only tested this on a relatively fast ATA drive.  It made little
difference.  Most writes were clustered to give a block size of 64K
and write speed of over 40+MB/s until the disk is nearly full, but
reads weren't clustered with or without the patch so the block size
remained at the fs block size (4K); the drive handles this block size
mediocrely and gave a read speed of 20+MB/sec.  (The drive is a WDC
1200JB-00CRA1.  This drive has the interesting behaviour of giving
almost the same mediocre read speed for all block sizes between 2.5K
and 19.5K.  A block size 20K gives maximal speed which is about twice
as fast as the speed for a block size of 19.5K.)

Both reading and writing a 1GB file to/from msdosfs caused noticable
buffer resource problems.  Accesses to other file systems on the same
disk sometimes blocked for many seconds.  I have debugging code in
getblk().  It reported that a process waited 17 seconds in or near
getblk().  The process only stopped waiting because I suspended the
process accessing msdosfs.  This may be a local bug.

Bruce