Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 19 Aug 2010 10:36:36 +0800
From:      gnehzuil <gnehzuil@gmail.com>
To:        fs@freebsd.org
Cc:        jhb@FreeBSD.org
Subject:   [patch] ext2fs + preallocation
Message-ID:  <4C6C98B4.4070909@gmail.com>

next in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------070305070503050500020300
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Hi all,

There is a patch in attachment which implements a preallocation 
algorithm in ext2fs. I implement this algorithm during FreeBSD SoC 2010.

This patch implements the in-memory ext2/3 block preallocation algorithm 
from reservation window. It uses a RB-tree to index block allocation 
request and reserve a number of blocks for each file which has requested 
to allocate a block. When a file request to allocate a block, it will 
find a block to allocate to this file. When it find the block to 
allocate, it will try to allocate a block, which is in the same cylinder 
group with inode and is not in other reservation window in RB-tree. 
Meanwhile there are some contiguous free blocks after this block. It 
uses a data structure to store this block's position and the length of 
contiguous free blocks. Then it inserts this data structure into 
RB-tree. When this file request to allocate a block again, It will find 
corresponding data structure in RB-tree. If it can find, the next free 
block will be allocated to this file directly. Otherwise, it will search 
a new block again.

I have run some benchmarks to test this algorithm. Please review it in 
wiki page (' http://wiki.freebsd.org/SOC2010ZhengLiu'). The performance 
is better when the number of threads is smaller than 4. When the number 
of threads is greater than 4, the performance can be increased a little.

Please test it.


Thanks and best regards,

lz


--------------070305070503050500020300
Content-Type: text/x-patch;
 name="ext2fs_prealloc.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="ext2fs_prealloc.patch"

diff -urN /usr/src/sys/fs/ext2fs/ext2_alloc.c new/ext2_alloc.c
--- /usr/src/sys/fs/ext2fs/ext2_alloc.c	2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_alloc.c	2010-08-19 02:47:29.000000000 +0800
@@ -50,6 +50,9 @@
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
+
+#define phy_blk(cg, fs) (((cg) * (fs->e2fs->e2fs_fpg)) + fs->e2fs->e2fs_first_dblock)
 
 static daddr_t	ext2_alloccg(struct inode *, int, daddr_t, int);
 static u_long	ext2_dirpref(struct inode *);
@@ -59,37 +62,524 @@
 						int));
 static daddr_t	ext2_nodealloccg(struct inode *, int, daddr_t, int);
 static daddr_t  ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
+
+/* For reservation window */
+static u_long   ext2_alloc_blk(struct inode *, int, struct buf *, int32_t, struct ext2_rsv_win *);
+static int      ext2_alloc_new_rsv(struct inode *, int, struct buf *, int32_t);
+static int      ext2_bpref_in_rsv(struct ext2_rsv_win *, int32_t);
+static int      ext2_find_rsv(struct ext2_rsv_win *, struct ext2_rsv_win *,
+                              struct m_ext2fs *, int32_t, int);
+static void	ext2_remove_rsv_win(struct m_ext2fs *, struct ext2_rsv_win *);
+static u_long   ext2_rsvalloc(struct m_ext2fs *, struct inode *,
+                              int, struct buf *, int32_t, int);
+static daddr_t  ext2_search_next_block(struct m_ext2fs *, char *, int, int);
+static struct ext2_rsv_win *ext2_search_rsv(struct ext2_rsv_win_tree *, int32_t);
+
+RB_GENERATE(ext2_rsv_win_tree, ext2_rsv_win, rsv_link, ext2_rsv_win_cmp);
+
 /*
  * Allocate a block in the file system.
  *
- * A preference may be optionally specified. If a preference is given
- * the following hierarchy is used to allocate a block:
- *   1) allocate the requested block.
- *   2) allocate a rotationally optimal block in the same cylinder.
- *   3) allocate a block in the same cylinder group.
- *   4) quadradically rehash into other cylinder groups, until an
- *        available block is located.
- * If no block preference is given the following hierarchy is used
- * to allocate a block:
- *   1) allocate a block in the cylinder group that contains the
- *        inode for the file.
- *   2) quadradically rehash into other cylinder groups, until an
- *        available block is located.
- *
- * A preference may be optionally specified. If a preference is given
- * the following hierarchy is used to allocate a block:
- *   1) allocate the requested block.
- *   2) allocate a rotationally optimal block in the same cylinder.
- *   3) allocate a block in the same cylinder group.
- *   4) quadradically rehash into other cylinder groups, until an
- *        available block is located.
- * If no block preference is given the following hierarchy is used
- * to allocate a block:
- *   1) allocate a block in the cylinder group that contains the
- *        inode for the file.
- *   2) quadradically rehash into other cylinder groups, until an
- *        available block is located.
+ * By given preference:
+ *   Check whether inode has a reservation window and preference
+ *   is within it and try to allocate a free block from
+ *   this reservation window.
+ *   If not, traverse RB tree to find a place, which is not in
+ *   any window and insert it to RB tree to try to allocate a
+ *   free block again.
+ *   If it fails, try to allocate a free block in other cylinder
+ *   groups without preference.
+ */
+
+/*
+ * Allocate a free block.
+ *
+ * First check whether reservation window is used.
+ * If reservation window is used, try to allocate a free
+ * block from the reservation window. If it fails, traverse
+ * the bitmap to find a free block.
+ * If reservation window is not used, try to allocate
+ * a free block by bpref. If it fails, traverse the bitmap
+ * to find a free block.
  */
+static u_long
+ext2_alloc_blk(struct inode *ip, int cg, struct buf *bp,
+    int32_t bpref, struct ext2_rsv_win *rp)
+{
+	struct m_ext2fs *fs;
+	struct ext2mount *ump;
+	int bno, start, end;
+	char *bbp;
+
+	fs = ip->i_e2fs;
+	ump = ip->i_ump;
+	bbp = (char *)bp->b_data;
+
+	if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
+		return (0);
+
+        if (bpref < 0)
+                bpref = 0;
+
+        /* Check whether it use reservation window */
+        if (rp != NULL) {
+                /*
+                 * If window's start is not in this cylinder group,
+                 * try to allocate from the beginning, otherwise
+                 * try to allocate from the beginning of the
+                 * window.
+                 */
+                if (dtog(fs, rp->rsv_start) < cg)
+                        start = 0;
+                else
+                        start = rp->rsv_start;
+
+                /*
+                 * If window's end crosses the end of this group,
+                 * set end variable to the end of this group.
+                 * Otherwise, set it to the window's end.
+                 */
+                if (dtog(fs, rp->rsv_end) > cg)
+                        end = phy_blk(cg + 1, fs) - 1;
+                else
+                        end = rp->rsv_end;
+
+                /* If preference block is within the window, try to allocate it. */
+                if (start <= bpref && bpref <= end) {
+                        bpref = dtogd(fs, bpref);
+                        if (isclr(bbp, bpref)) {
+                                rp->rsv_alloc_hit++;
+                                bno = bpref;
+                                goto gotit;
+                        }
+                } else
+                        if (dtog(fs, rp->rsv_start) == cg)
+                                bpref = dtogd(fs, rp->rsv_start);
+                        else
+                                bpref = 0;
+        } else {
+                if (dtog(fs, bpref) != cg)
+                        bpref = 0;
+                if (bpref != 0) {
+                        bpref = dtogd(fs, bpref);
+                        if (isclr(bbp, bpref)) {
+                                bno = bpref;
+                                goto gotit;
+                        }
+                }
+        }
+
+	bno = ext2_mapsearch(fs, bbp, bpref);
+	if (bno < 0)
+		return (0);
+
+gotit:
+	setbit(bbp, (daddr_t)bno);
+	EXT2_LOCK(ump);
+	fs->e2fs->e2fs_fbcount--;
+	fs->e2fs_gd[cg].ext2bgd_nbfree--;
+	fs->e2fs_fmod = 1;
+	EXT2_UNLOCK(ump);
+	bdwrite(bp);
+	bno = phy_blk(cg, fs) + bno;
+        return (bno);
+}
+
+/*
+ * Initialize reservation window per inode.
+ */
+void
+ext2_init_rsv(struct inode *ip)
+{
+	struct ext2_rsv_win *rp;
+
+	rp = malloc(sizeof(struct ext2_rsv_win),
+	    M_EXT2NODE, M_WAITOK | M_ZERO);
+
+	/* 
+         * If malloc failed, we just do not use the
+	 * reservation window mechanism.
+	 */
+	if (rp == NULL)
+		return;
+
+	rp->rsv_start = EXT2_RSV_NOT_ALLOCATED;
+	rp->rsv_end = EXT2_RSV_NOT_ALLOCATED;
+
+	rp->rsv_goal_size = EXT2_RSV_DEFAULT_RESERVE_BLKS;
+	rp->rsv_alloc_hit = 0;
+
+	ip->i_rsv = rp;
+} 
+
+/*
+ * Discard reservation window.
+ *
+ * It is called during the following situations:
+ * 1. free an inode
+ * 2. sync inode
+ * 3. truncate a file
+ */
+void
+ext2_discard_rsv(struct inode *ip)
+{
+	struct ext2_rsv_win *rp;
+
+	if (ip->i_rsv == NULL) 
+                return;
+
+	rp = ip->i_rsv;
+
+        /* If reservation window is empty, nothing to do */
+	if (rp->rsv_end == EXT2_RSV_NOT_ALLOCATED)
+                return;
+
+        EXT2_TREE_LOCK(ip->i_e2fs);
+        ext2_remove_rsv_win(ip->i_e2fs, rp);
+        EXT2_TREE_UNLOCK(ip->i_e2fs);
+        rp->rsv_goal_size = EXT2_RSV_DEFAULT_RESERVE_BLKS;
+}
+
+/*
+ * Remove a ext2_rsv_win structure from RB tree.
+ */
+static void
+ext2_remove_rsv_win(struct m_ext2fs *fs, struct ext2_rsv_win *rp)
+{
+	RB_REMOVE(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rp);
+	rp->rsv_start = EXT2_RSV_NOT_ALLOCATED;
+	rp->rsv_end = EXT2_RSV_NOT_ALLOCATED;
+	rp->rsv_alloc_hit = 0;
+}
+
+/*
+ * Check bpref is in the reservation window.
+ */
+static int
+ext2_bpref_in_rsv(struct ext2_rsv_win *rp, int32_t bpref)
+{
+        if (bpref >= 0 && (bpref < rp->rsv_start || bpref > rp->rsv_end))
+                return (0);
+
+        return (1);
+}
+
+/*
+ * Search a tree node from RB tree. It includes the bpref or
+ * the previous one if bpref is not in any window.
+ */
+static struct ext2_rsv_win *
+ext2_search_rsv(struct ext2_rsv_win_tree *root, int32_t start)
+{
+        struct ext2_rsv_win *prev, *next;
+
+        if (RB_EMPTY(root))
+                return (NULL);
+
+        next = RB_ROOT(root);
+        do {
+                prev = next;
+                if (start < next->rsv_start)
+                        next = RB_LEFT(next, rsv_link);
+                else if (start > next->rsv_end)
+                        next = RB_RIGHT(next, rsv_link);
+                else
+                        return (next);
+        } while (next != NULL);
+
+        if (prev->rsv_start > start) {
+                next = RB_PREV(ext2_rsv_win_tree, root, prev);
+                if (next != NULL)
+                        prev = next;
+        }
+
+        return (prev);
+}
+
+/*
+ * Find a reservation window by given range from start to
+ * the end of this cylinder group.
+ */
+static int
+ext2_find_rsv(struct ext2_rsv_win *search, struct ext2_rsv_win *rp,
+    struct m_ext2fs *fs, int32_t start, int cg)
+{
+        struct ext2_rsv_win *rsv, *prev;
+        int32_t cur;
+        int size = rp->rsv_goal_size;
+
+        if (search == NULL) {
+                rp->rsv_start = start & ~7;
+                rp->rsv_end = start + size - 1;
+                rp->rsv_alloc_hit = 0;
+
+                RB_INSERT(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rp);
+
+                return (0);
+        }
+
+        /*
+         * Make the start of reservation window byte-aligned
+         * in order to can find a free block with bit operations
+         * in the ext2_search_next_block() function.
+         */
+        cur = start & ~7;
+        rsv = search;
+        prev = NULL;
+
+        while (1) {
+                if (cur <= rsv->rsv_end)
+                        cur = rsv->rsv_end + 1;
+
+                if (dtog(fs, cur) != cg)
+                        return (-1);
+
+                prev = rsv;
+                rsv = RB_NEXT(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rsv);
+
+                if (rsv == NULL)
+                        break;
+
+                if (cur + size <= rsv->rsv_start)
+                        break;
+        }
+
+        if (prev != rp && rp->rsv_end != EXT2_RSV_NOT_ALLOCATED)
+                ext2_remove_rsv_win(fs, rp);
+
+        rp->rsv_start = cur;
+        rp->rsv_end = cur + size - 1;
+        rp->rsv_alloc_hit = 0;
+
+        if (prev != rp)
+                RB_INSERT(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rp);
+
+        return (0);
+}
+
+/*
+ * Find a free block by given range from bpref to
+ * the end of this cylinder group.
+ */
+static daddr_t
+ext2_search_next_block(struct m_ext2fs *fs, char *bbp, int bpref, int cg)
+{
+        daddr_t bno;
+        int start, loc, len, map, i;
+
+        start = bpref / NBBY;
+        len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
+        loc = skpc(0xff, len, &bbp[start]);
+        if (loc == 0)
+                return (-1);
+
+        i = start + len - loc;
+        map = bbp[i];
+        bno = i * NBBY;
+        for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
+                if ((map & i) == 0)
+                        return (bno);
+        }
+
+        return (-1);
+}
+
+/*
+ * Allocate a new reservation window.
+ */
+static int
+ext2_alloc_new_rsv(struct inode *ip, int cg, struct buf *bp, int32_t bpref)
+{
+        struct m_ext2fs *fs;
+        struct ext2_rsv_win *rp, *search;
+        char *bbp;
+        int start, size, ret;
+
+        fs = ip->i_e2fs;
+        rp = ip->i_rsv;
+        bbp = bp->b_data;
+        size = rp->rsv_goal_size;
+
+        if (bpref <= 0)
+                start = phy_blk(cg, fs);
+        else
+                start = bpref;
+
+        /* Dynamically increase the size of window */
+        if (rp->rsv_end != EXT2_RSV_NOT_ALLOCATED) {
+                if (rp->rsv_alloc_hit >
+                    ((rp->rsv_end - rp->rsv_start + 1) / 2)) {
+                        size = size * 2;
+                        if (size > EXT2_RSV_MAX_RESERVE_BLKS)
+                                size = EXT2_RSV_MAX_RESERVE_BLKS;
+                        rp->rsv_goal_size = size;
+                }
+        }
+
+        EXT2_TREE_LOCK(fs);
+
+        search = ext2_search_rsv(fs->e2fs_rsv_tree, start);
+
+repeat:
+        ret = ext2_find_rsv(search, rp, fs, start, cg);
+        if (ret < 0) {
+                if (rp->rsv_end != EXT2_RSV_NOT_ALLOCATED)
+                        ext2_remove_rsv_win(fs, rp);
+                EXT2_TREE_UNLOCK(fs);
+                return (-1);
+        }
+        EXT2_TREE_UNLOCK(fs);
+
+        start = dtogd(fs, rp->rsv_start);
+        start = ext2_search_next_block(fs, bbp, start, cg);
+        if (start < 0) {
+                EXT2_TREE_LOCK(fs);
+                if (rp->rsv_end != EXT2_RSV_NOT_ALLOCATED)
+                        ext2_remove_rsv_win(fs, rp);
+                EXT2_TREE_UNLOCK(fs);
+                return (-1);
+        }
+
+        start = phy_blk(cg, fs) + start;
+        if (start >= rp->rsv_start && start <= rp->rsv_end)
+                return (0);
+
+        search = rp;
+        EXT2_TREE_LOCK(fs);
+        goto repeat;
+}
+
+/*
+ * Allocate a free block from reservation window.
+ */
+static u_long
+ext2_rsvalloc(struct m_ext2fs *fs, struct inode *ip, int cg,
+    struct buf *bp, int32_t bpref, int size)
+{
+        struct ext2_rsv_win *rp;
+        int ret;
+
+        rp = ip->i_rsv;
+        if (rp == NULL)
+                return (ext2_alloc_blk(ip, cg, bp, bpref, NULL));
+
+        if (rp->rsv_end == EXT2_RSV_NOT_ALLOCATED ||
+            !ext2_bpref_in_rsv(rp, bpref)) {
+                ret = ext2_alloc_new_rsv(ip, cg, bp, bpref);
+                if (ret < 0)
+                        return (0);
+        }
+
+        return (ext2_alloc_blk(ip, cg, bp, bpref, rp));
+}
+
+/*
+ * Allocate a block using reservation window in ext2 file system.
+ *
+ * NOTE: This function will replace the ext2_alloc() function.
+ */
+int
+ext2_alloc_rsv(struct inode *ip, int32_t lbn, int32_t bpref,
+    int size, struct ucred *cred, int32_t *bnp)
+{
+	struct m_ext2fs *fs;
+	struct ext2mount *ump;
+        struct buf *bp;
+	int32_t bno = 0;
+	int i, cg, error;
+
+	*bnp = 0;
+	fs = ip->i_e2fs;
+	ump = ip->i_ump;
+	mtx_assert(EXT2_MTX(ump), MA_OWNED);
+
+	if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
+		goto nospace;
+	if (cred->cr_uid != 0 && 
+	    fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
+		goto nospace;
+
+	if (bpref >= fs->e2fs->e2fs_bcount)
+		bpref = 0;
+	if (bpref == 0)
+		cg = ino_to_cg(fs, ip->i_number);
+	else
+		cg = dtog(fs, bpref);
+
+        /* If cg has some free blocks, then try to allocate a free block from this cg */
+        if (fs->e2fs_gd[cg].ext2bgd_nbfree > 0) {
+                /* Read block bitmap from buffer */
+                EXT2_UNLOCK(ump);
+                error = bread(ip->i_devvp,
+                    fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+                    (int)fs->e2fs_bsize, NOCRED, &bp);
+                if (error) {
+                        brelse(bp);
+                        goto ioerror;
+                }
+
+                EXT2_RSV_LOCK(ip);
+                /* Try to allocate from reservation window */
+                bno = ext2_rsvalloc(fs, ip, cg, bp, bpref, size);
+                EXT2_RSV_UNLOCK(ip);
+                if (bno > 0)
+                        goto allocated;
+
+                brelse(bp);
+                EXT2_LOCK(ump);
+        }
+
+        /* Just need to try to allocate a free block from rest groups. */
+        cg = (cg + 1) % fs->e2fs_gcount;
+        for (i = 1; i < fs->e2fs_gcount; i++) {
+                if (fs->e2fs_gd[cg].ext2bgd_nbfree > 0) {
+                        /* Read block bitmap from buffer */
+                        EXT2_UNLOCK(ump);
+                        error = bread(ip->i_devvp,
+                            fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+                            (int)fs->e2fs_bsize, NOCRED, &bp);
+                        if (error) {
+                                brelse(bp);
+                                goto ioerror;
+                        }
+
+                        EXT2_RSV_LOCK(ip);
+                        bno = ext2_rsvalloc(fs, ip, cg, bp, -1, size);
+                        EXT2_RSV_UNLOCK(ip);
+                        if (bno > 0)
+                                goto allocated;
+
+                        brelse(bp);
+                        EXT2_LOCK(ump);
+                }
+
+                cg++;
+                if (cg == fs->e2fs_gcount)
+                        cg = 0;
+        }
+
+allocated:
+        if (bno > 0) {
+                ip->i_next_alloc_block = lbn;
+                ip->i_next_alloc_goal = bno;
+
+                ip->i_blocks += btodb(fs->e2fs_bsize);
+                ip->i_flag |= IN_CHANGE | IN_UPDATE;
+                *bnp = bno;
+                return (0);
+        }
+
+nospace:
+	EXT2_UNLOCK(ump);
+	ext2_fserr(fs, cred->cr_uid, "file system full");
+	uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
+	return (ENOSPC);
+
+ioerror:
+        ext2_fserr(fs, cred->cr_uid, "file system IO error");
+        uprintf("\n%s: write failed, file system IO error\n", fs->e2fs_fsmnt);
+        return (EIO);
+}
 
 int
 ext2_alloc(ip, lbn, bpref, size, cred, bnp)
@@ -923,9 +1413,11 @@
 		start = 0;
 		loc = skpc(0xff, len, &bbp[start]);
 		if (loc == 0) {
-			printf("start = %d, len = %d, fs = %s\n",
-				start, len, fs->e2fs_fsmnt);
-			panic("ext2fs_alloccg: map corrupted");
+                        /* XXX: just for reservation window */
+                        return -1;
+			/*printf("start = %d, len = %d, fs = %s\n",*/
+				/*start, len, fs->e2fs_fsmnt);*/
+			/*panic("ext2fs_alloccg: map corrupted");*/
 			/* NOTREACHED */
 		}
 	}
diff -urN /usr/src/sys/fs/ext2fs/ext2_balloc.c new/ext2_balloc.c
--- /usr/src/sys/fs/ext2fs/ext2_balloc.c	2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_balloc.c	2010-08-19 02:47:29.000000000 +0800
@@ -49,6 +49,7 @@
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
@@ -78,6 +79,9 @@
 	fs = ip->i_e2fs;
 	ump = ip->i_ump;
 
+        if (ip->i_rsv == NULL)
+                ext2_init_rsv(ip);
+
 	/*
 	 * check if this is a sequential block allocation. 
 	 * If so, increment next_alloc fields to allow ext2_blkpref 
@@ -136,9 +140,9 @@
 			else
 				nsize = fs->e2fs_bsize;
 			EXT2_LOCK(ump);
-			error = ext2_alloc(ip, lbn,
-			    ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0),
-			    nsize, cred, &newb);
+			error = ext2_alloc_rsv(ip, lbn,
+				    ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0),
+				    nsize, cred, &newb);
 			if (error)
 				return (error);
 			bp = getblk(vp, lbn, nsize, 0, 0, 0);
@@ -170,9 +174,9 @@
 		EXT2_LOCK(ump);
 		pref = ext2_blkpref(ip, lbn, indirs[0].in_off + 
 					     EXT2_NDIR_BLOCKS, &ip->i_db[0], 0);
-	        if ((error = ext2_alloc(ip, lbn, pref, 
-			(int)fs->e2fs_bsize, cred, &newb)))
-			return (error);
+	        if ((error = ext2_alloc_rsv(ip, lbn, pref, 
+				(int)fs->e2fs_bsize, cred, &newb)))
+				return (error);
 		nb = newb;
 		bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0);
 		bp->b_blkno = fsbtodb(fs, newb);
@@ -211,7 +215,7 @@
 		if (pref == 0)
 			pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap,
 						bp->b_lblkno);
-		error =  ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb);
+		error =  ext2_alloc_rsv(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb);
 		if (error) {
 			brelse(bp);
 			return (error);
@@ -250,8 +254,8 @@
 		EXT2_LOCK(ump);
 		pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], 
 				bp->b_lblkno);
-		if ((error = ext2_alloc(ip,
-		    lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) {
+		if ((error = ext2_alloc_rsv(ip, lbn, pref,
+				(int)fs->e2fs_bsize, cred, &newb)) != 0) {
 			brelse(bp);
 			return (error);
 		}
diff -urN /usr/src/sys/fs/ext2fs/ext2_inode.c new/ext2_inode.c
--- /usr/src/sys/fs/ext2fs/ext2_inode.c	2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_inode.c	2010-08-19 02:47:29.000000000 +0800
@@ -52,6 +52,7 @@
 #include <fs/ext2fs/ext2fs.h>
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
 
 static int ext2_indirtrunc(struct inode *, int32_t, int32_t, int32_t, int,
 	    long *);
@@ -153,6 +154,11 @@
 	}
 	fs = oip->i_e2fs;
 	osize = oip->i_size;
+
+        EXT2_RSV_LOCK(oip);
+	ext2_discard_rsv(oip);
+        EXT2_RSV_UNLOCK(oip);
+
 	/*
 	 * Lengthen the size of the file. We must ensure that the
 	 * last byte of the file is allocated. Since the smallest
@@ -484,6 +490,10 @@
 	if (prtactive && vrefcnt(vp) != 0)
 		vprint("ext2_inactive: pushing active", vp);
 
+        EXT2_RSV_LOCK(ip);
+        ext2_discard_rsv(ip);
+        EXT2_RSV_UNLOCK(ip);
+
 	/*
 	 * Ignore inodes related to stale file handles.
 	 */
@@ -525,11 +535,21 @@
 	if (prtactive && vrefcnt(vp) != 0)
 		vprint("ufs_reclaim: pushing active", vp);
 	ip = VTOI(vp);
+
 	if (ip->i_flag & IN_LAZYMOD) {
 		ip->i_flag |= IN_MODIFIED;
 		ext2_update(vp, 0);
 	}
 	vfs_hash_remove(vp);
+
+        EXT2_RSV_LOCK(ip);
+        if (ip->i_rsv != NULL) {
+                free(ip->i_rsv, M_EXT2NODE);
+                ip->i_rsv = NULL;
+        }
+        EXT2_RSV_UNLOCK(ip);
+        mtx_destroy(&ip->i_rsv_lock);
+
 	free(vp->v_data, M_EXT2NODE);
 	vp->v_data = 0;
 	vnode_destroy_vobject(vp);
diff -urN /usr/src/sys/fs/ext2fs/ext2_rsv_win.h new/ext2_rsv_win.h
--- /usr/src/sys/fs/ext2fs/ext2_rsv_win.h	1970-01-01 08:00:00.000000000 +0800
+++ new/ext2_rsv_win.h	2010-08-19 02:47:29.000000000 +0800
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2010, 2010 Zheng Liu <lz@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/fs/ext2fs/ext2_rsv_win.h,v 0.1 2010/05/08 12:41:51 lz Exp $
+ */
+#ifndef _FS_EXT2FS_EXT2_RSV_WIN_H_
+#define _FS_EXT2FS_EXT2_RSV_WIN_H_
+
+#include <sys/tree.h>
+
+#define EXT2_RSV_DEFAULT_RESERVE_BLKS 8
+#define EXT2_RSV_MAX_RESERVE_BLKS     1024
+#define EXT2_RSV_NOT_ALLOCATED        0
+
+#define EXT2_RSV_LOCK(ip)   mtx_lock(&ip->i_rsv_lock)
+#define EXT2_RSV_UNLOCK(ip) mtx_unlock(&ip->i_rsv_lock)
+
+#define EXT2_TREE_LOCK(fs)   mtx_lock(&fs->e2fs_rsv_lock);
+#define EXT2_TREE_UNLOCK(fs) mtx_unlock(&fs->e2fs_rsv_lock);
+
+/*
+ * Reservation window entry
+ */
+struct ext2_rsv_win {
+	RB_ENTRY(ext2_rsv_win) rsv_link; /* RB tree links */
+
+	int32_t rsv_goal_size; /* Default reservation window size */
+	int32_t rsv_alloc_hit; /* Number of allocated windows */
+
+	int32_t rsv_start; /* First bytes of window */
+	int32_t rsv_end;   /* End bytes of window */
+};
+
+RB_HEAD(ext2_rsv_win_tree, ext2_rsv_win);
+
+static __inline int
+ext2_rsv_win_cmp(const struct ext2_rsv_win *a,
+		 const struct ext2_rsv_win *b)
+{
+	if (a->rsv_start < b->rsv_start)
+		return (-1);
+	if (a->rsv_start == b->rsv_start)
+		return (0);
+
+	return (1);
+}
+RB_PROTOTYPE(ext2_rsv_win_tree, ext2_rsv_win, rsv_link, ext2_rsv_win_cmp);
+
+/* predefine */
+struct inode;
+/* ext2_alloc.c */
+void    ext2_init_rsv(struct inode *ip);
+void    ext2_discard_rsv(struct inode *ip);
+int     ext2_alloc_rsv(struct inode *, int32_t, int32_t, int, struct ucred *, int32_t *);
+
+#endif /* !_FS_EXT2FS_EXT2_RSV_WIN_H_ */
diff -urN /usr/src/sys/fs/ext2fs/ext2_vfsops.c new/ext2_vfsops.c
--- /usr/src/sys/fs/ext2fs/ext2_vfsops.c	2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_vfsops.c	2010-08-19 02:47:29.000000000 +0800
@@ -1,4 +1,4 @@
-/*-
+/*
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
@@ -61,6 +61,7 @@
 #include <fs/ext2fs/fs.h>
 #include <fs/ext2fs/ext2_extern.h>
 #include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
 
 static int	ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
 static int	ext2_mountfs(struct vnode *, struct mount *);
@@ -95,9 +96,9 @@
 static int	compute_sb_data(struct vnode * devvp,
 		    struct ext2fs * es, struct m_ext2fs * fs);
 
-static const char *ext2_opts[] = { "from", "export", "acls", "noexec",
-    "noatime", "union", "suiddir", "multilabel", "nosymfollow",
-    "noclusterr", "noclusterw", "force", NULL };
+static const char *ext2_opts[] = { "acls", "async", "export", "force",
+    "from", "multilabel", "noatime", "noclusterr", "noclusterw",
+    "noexec", "nosymfollow", "suiddir", "union", NULL };
 
 /*
  * VFS Operations.
@@ -581,6 +582,14 @@
 	if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
 		goto out;
 
+	/* Initial reservation window index and lock */
+	bzero(&ump->um_e2fs->e2fs_rsv_lock, sizeof(struct mtx));
+	mtx_init(&ump->um_e2fs->e2fs_rsv_lock,
+            "rsv tree lock", NULL, MTX_DEF);
+        ump->um_e2fs->e2fs_rsv_tree = malloc(sizeof(struct ext2_rsv_win_tree),
+            M_EXT2MNT, M_WAITOK | M_ZERO);
+	RB_INIT(ump->um_e2fs->e2fs_rsv_tree);
+
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_e2fs;
@@ -680,6 +689,8 @@
 	g_topology_unlock();
 	PICKUP_GIANT();
 	vrele(ump->um_devvp);
+        free(fs->e2fs_rsv_tree, M_EXT2MNT);
+	mtx_destroy(&fs->e2fs_rsv_lock);
 	free(fs->e2fs_gd, M_EXT2MNT);
 	free(fs->e2fs_contigdirs, M_EXT2MNT);
 	free(fs->e2fs, M_EXT2MNT);
@@ -919,6 +930,10 @@
 	ip->i_prealloc_count = 0;
 	ip->i_prealloc_block = 0;
 
+	bzero(&ip->i_rsv_lock, sizeof(struct mtx));
+	mtx_init(&ip->i_rsv_lock, "inode rsv lock", NULL, MTX_DEF);
+        ip->i_rsv = NULL;
+
 	/*
 	 * Now we want to make sure that block pointers for unused
 	 * blocks are zeroed out - ext2_balloc depends on this
diff -urN /usr/src/sys/fs/ext2fs/ext2fs.h new/ext2fs.h
--- /usr/src/sys/fs/ext2fs/ext2fs.h	2010-01-14 22:30:54.000000000 +0800
+++ new/ext2fs.h	2010-08-19 02:47:29.000000000 +0800
@@ -38,6 +38,7 @@
 #define _FS_EXT2FS_EXT2_FS_H
 
 #include <sys/types.h>
+#include <sys/lock.h>
 
 /*
  * Special inode numbers
@@ -174,6 +175,9 @@
 	char e2fs_wasvalid;       /* valid at mount time */
 	off_t e2fs_maxfilesize;
 	struct ext2_gd *e2fs_gd; /* Group Descriptors */
+
+	struct mtx e2fs_rsv_lock;                /* Protect reservation window RB tree */
+	struct ext2_rsv_win_tree *e2fs_rsv_tree; /* Reservation window index */
 };
 
 /*
diff -urN /usr/src/sys/fs/ext2fs/inode.h new/inode.h
--- /usr/src/sys/fs/ext2fs/inode.h	2010-01-14 22:30:54.000000000 +0800
+++ new/inode.h	2010-08-19 02:47:29.000000000 +0800
@@ -100,6 +100,10 @@
 	int32_t		i_gen;		/* Generation number. */
 	u_int32_t	i_uid;		/* File owner. */
 	u_int32_t	i_gid;		/* File group. */
+
+        /* Fields for reservation window */
+        struct mtx          i_rsv_lock; /* Protects i_rsv */
+	struct ext2_rsv_win *i_rsv;     /* Reservation window */
 };
 
 /*


--------------070305070503050500020300--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4C6C98B4.4070909>