Date: Thu, 19 Aug 2010 10:36:36 +0800 From: gnehzuil <gnehzuil@gmail.com> To: fs@freebsd.org Cc: jhb@FreeBSD.org Subject: [patch] ext2fs + preallocation Message-ID: <4C6C98B4.4070909@gmail.com>
next in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------070305070503050500020300
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Hi all,
There is a patch in attachment which implements a preallocation
algorithm in ext2fs. I implement this algorithm during FreeBSD SoC 2010.
This patch implements the in-memory ext2/3 block preallocation algorithm
from reservation window. It uses a RB-tree to index block allocation
request and reserve a number of blocks for each file which has requested
to allocate a block. When a file request to allocate a block, it will
find a block to allocate to this file. When it find the block to
allocate, it will try to allocate a block, which is in the same cylinder
group with inode and is not in other reservation window in RB-tree.
Meanwhile there are some contiguous free blocks after this block. It
uses a data structure to store this block's position and the length of
contiguous free blocks. Then it inserts this data structure into
RB-tree. When this file request to allocate a block again, It will find
corresponding data structure in RB-tree. If it can find, the next free
block will be allocated to this file directly. Otherwise, it will search
a new block again.
I have run some benchmarks to test this algorithm. Please review it in
wiki page (' http://wiki.freebsd.org/SOC2010ZhengLiu'). The performance
is better when the number of threads is smaller than 4. When the number
of threads is greater than 4, the performance can be increased a little.
Please test it.
Thanks and best regards,
lz
--------------070305070503050500020300
Content-Type: text/x-patch;
name="ext2fs_prealloc.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="ext2fs_prealloc.patch"
diff -urN /usr/src/sys/fs/ext2fs/ext2_alloc.c new/ext2_alloc.c
--- /usr/src/sys/fs/ext2fs/ext2_alloc.c 2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_alloc.c 2010-08-19 02:47:29.000000000 +0800
@@ -50,6 +50,9 @@
#include <fs/ext2fs/ext2fs.h>
#include <fs/ext2fs/fs.h>
#include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
+
+#define phy_blk(cg, fs) (((cg) * (fs->e2fs->e2fs_fpg)) + fs->e2fs->e2fs_first_dblock)
static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int);
static u_long ext2_dirpref(struct inode *);
@@ -59,37 +62,524 @@
int));
static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int);
static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t);
+
+/* For reservation window */
+static u_long ext2_alloc_blk(struct inode *, int, struct buf *, int32_t, struct ext2_rsv_win *);
+static int ext2_alloc_new_rsv(struct inode *, int, struct buf *, int32_t);
+static int ext2_bpref_in_rsv(struct ext2_rsv_win *, int32_t);
+static int ext2_find_rsv(struct ext2_rsv_win *, struct ext2_rsv_win *,
+ struct m_ext2fs *, int32_t, int);
+static void ext2_remove_rsv_win(struct m_ext2fs *, struct ext2_rsv_win *);
+static u_long ext2_rsvalloc(struct m_ext2fs *, struct inode *,
+ int, struct buf *, int32_t, int);
+static daddr_t ext2_search_next_block(struct m_ext2fs *, char *, int, int);
+static struct ext2_rsv_win *ext2_search_rsv(struct ext2_rsv_win_tree *, int32_t);
+
+RB_GENERATE(ext2_rsv_win_tree, ext2_rsv_win, rsv_link, ext2_rsv_win_cmp);
+
/*
* Allocate a block in the file system.
*
- * A preference may be optionally specified. If a preference is given
- * the following hierarchy is used to allocate a block:
- * 1) allocate the requested block.
- * 2) allocate a rotationally optimal block in the same cylinder.
- * 3) allocate a block in the same cylinder group.
- * 4) quadradically rehash into other cylinder groups, until an
- * available block is located.
- * If no block preference is given the following hierarchy is used
- * to allocate a block:
- * 1) allocate a block in the cylinder group that contains the
- * inode for the file.
- * 2) quadradically rehash into other cylinder groups, until an
- * available block is located.
- *
- * A preference may be optionally specified. If a preference is given
- * the following hierarchy is used to allocate a block:
- * 1) allocate the requested block.
- * 2) allocate a rotationally optimal block in the same cylinder.
- * 3) allocate a block in the same cylinder group.
- * 4) quadradically rehash into other cylinder groups, until an
- * available block is located.
- * If no block preference is given the following hierarchy is used
- * to allocate a block:
- * 1) allocate a block in the cylinder group that contains the
- * inode for the file.
- * 2) quadradically rehash into other cylinder groups, until an
- * available block is located.
+ * By given preference:
+ * Check whether inode has a reservation window and preference
+ * is within it and try to allocate a free block from
+ * this reservation window.
+ * If not, traverse RB tree to find a place, which is not in
+ * any window and insert it to RB tree to try to allocate a
+ * free block again.
+ * If it fails, try to allocate a free block in other cylinder
+ * groups without preference.
+ */
+
+/*
+ * Allocate a free block.
+ *
+ * First check whether reservation window is used.
+ * If reservation window is used, try to allocate a free
+ * block from the reservation window. If it fails, traverse
+ * the bitmap to find a free block.
+ * If reservation window is not used, try to allocate
+ * a free block by bpref. If it fails, traverse the bitmap
+ * to find a free block.
*/
+static u_long
+ext2_alloc_blk(struct inode *ip, int cg, struct buf *bp,
+ int32_t bpref, struct ext2_rsv_win *rp)
+{
+ struct m_ext2fs *fs;
+ struct ext2mount *ump;
+ int bno, start, end;
+ char *bbp;
+
+ fs = ip->i_e2fs;
+ ump = ip->i_ump;
+ bbp = (char *)bp->b_data;
+
+ if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0)
+ return (0);
+
+ if (bpref < 0)
+ bpref = 0;
+
+ /* Check whether it use reservation window */
+ if (rp != NULL) {
+ /*
+ * If window's start is not in this cylinder group,
+ * try to allocate from the beginning, otherwise
+ * try to allocate from the beginning of the
+ * window.
+ */
+ if (dtog(fs, rp->rsv_start) < cg)
+ start = 0;
+ else
+ start = rp->rsv_start;
+
+ /*
+ * If window's end crosses the end of this group,
+ * set end variable to the end of this group.
+ * Otherwise, set it to the window's end.
+ */
+ if (dtog(fs, rp->rsv_end) > cg)
+ end = phy_blk(cg + 1, fs) - 1;
+ else
+ end = rp->rsv_end;
+
+ /* If preference block is within the window, try to allocate it. */
+ if (start <= bpref && bpref <= end) {
+ bpref = dtogd(fs, bpref);
+ if (isclr(bbp, bpref)) {
+ rp->rsv_alloc_hit++;
+ bno = bpref;
+ goto gotit;
+ }
+ } else
+ if (dtog(fs, rp->rsv_start) == cg)
+ bpref = dtogd(fs, rp->rsv_start);
+ else
+ bpref = 0;
+ } else {
+ if (dtog(fs, bpref) != cg)
+ bpref = 0;
+ if (bpref != 0) {
+ bpref = dtogd(fs, bpref);
+ if (isclr(bbp, bpref)) {
+ bno = bpref;
+ goto gotit;
+ }
+ }
+ }
+
+ bno = ext2_mapsearch(fs, bbp, bpref);
+ if (bno < 0)
+ return (0);
+
+gotit:
+ setbit(bbp, (daddr_t)bno);
+ EXT2_LOCK(ump);
+ fs->e2fs->e2fs_fbcount--;
+ fs->e2fs_gd[cg].ext2bgd_nbfree--;
+ fs->e2fs_fmod = 1;
+ EXT2_UNLOCK(ump);
+ bdwrite(bp);
+ bno = phy_blk(cg, fs) + bno;
+ return (bno);
+}
+
+/*
+ * Initialize reservation window per inode.
+ */
+void
+ext2_init_rsv(struct inode *ip)
+{
+ struct ext2_rsv_win *rp;
+
+ rp = malloc(sizeof(struct ext2_rsv_win),
+ M_EXT2NODE, M_WAITOK | M_ZERO);
+
+ /*
+ * If malloc failed, we just do not use the
+ * reservation window mechanism.
+ */
+ if (rp == NULL)
+ return;
+
+ rp->rsv_start = EXT2_RSV_NOT_ALLOCATED;
+ rp->rsv_end = EXT2_RSV_NOT_ALLOCATED;
+
+ rp->rsv_goal_size = EXT2_RSV_DEFAULT_RESERVE_BLKS;
+ rp->rsv_alloc_hit = 0;
+
+ ip->i_rsv = rp;
+}
+
+/*
+ * Discard reservation window.
+ *
+ * It is called during the following situations:
+ * 1. free an inode
+ * 2. sync inode
+ * 3. truncate a file
+ */
+void
+ext2_discard_rsv(struct inode *ip)
+{
+ struct ext2_rsv_win *rp;
+
+ if (ip->i_rsv == NULL)
+ return;
+
+ rp = ip->i_rsv;
+
+ /* If reservation window is empty, nothing to do */
+ if (rp->rsv_end == EXT2_RSV_NOT_ALLOCATED)
+ return;
+
+ EXT2_TREE_LOCK(ip->i_e2fs);
+ ext2_remove_rsv_win(ip->i_e2fs, rp);
+ EXT2_TREE_UNLOCK(ip->i_e2fs);
+ rp->rsv_goal_size = EXT2_RSV_DEFAULT_RESERVE_BLKS;
+}
+
+/*
+ * Remove a ext2_rsv_win structure from RB tree.
+ */
+static void
+ext2_remove_rsv_win(struct m_ext2fs *fs, struct ext2_rsv_win *rp)
+{
+ RB_REMOVE(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rp);
+ rp->rsv_start = EXT2_RSV_NOT_ALLOCATED;
+ rp->rsv_end = EXT2_RSV_NOT_ALLOCATED;
+ rp->rsv_alloc_hit = 0;
+}
+
+/*
+ * Check bpref is in the reservation window.
+ */
+static int
+ext2_bpref_in_rsv(struct ext2_rsv_win *rp, int32_t bpref)
+{
+ if (bpref >= 0 && (bpref < rp->rsv_start || bpref > rp->rsv_end))
+ return (0);
+
+ return (1);
+}
+
+/*
+ * Search a tree node from RB tree. It includes the bpref or
+ * the previous one if bpref is not in any window.
+ */
+static struct ext2_rsv_win *
+ext2_search_rsv(struct ext2_rsv_win_tree *root, int32_t start)
+{
+ struct ext2_rsv_win *prev, *next;
+
+ if (RB_EMPTY(root))
+ return (NULL);
+
+ next = RB_ROOT(root);
+ do {
+ prev = next;
+ if (start < next->rsv_start)
+ next = RB_LEFT(next, rsv_link);
+ else if (start > next->rsv_end)
+ next = RB_RIGHT(next, rsv_link);
+ else
+ return (next);
+ } while (next != NULL);
+
+ if (prev->rsv_start > start) {
+ next = RB_PREV(ext2_rsv_win_tree, root, prev);
+ if (next != NULL)
+ prev = next;
+ }
+
+ return (prev);
+}
+
+/*
+ * Find a reservation window by given range from start to
+ * the end of this cylinder group.
+ */
+static int
+ext2_find_rsv(struct ext2_rsv_win *search, struct ext2_rsv_win *rp,
+ struct m_ext2fs *fs, int32_t start, int cg)
+{
+ struct ext2_rsv_win *rsv, *prev;
+ int32_t cur;
+ int size = rp->rsv_goal_size;
+
+ if (search == NULL) {
+ rp->rsv_start = start & ~7;
+ rp->rsv_end = start + size - 1;
+ rp->rsv_alloc_hit = 0;
+
+ RB_INSERT(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rp);
+
+ return (0);
+ }
+
+ /*
+ * Make the start of reservation window byte-aligned
+ * in order to can find a free block with bit operations
+ * in the ext2_search_next_block() function.
+ */
+ cur = start & ~7;
+ rsv = search;
+ prev = NULL;
+
+ while (1) {
+ if (cur <= rsv->rsv_end)
+ cur = rsv->rsv_end + 1;
+
+ if (dtog(fs, cur) != cg)
+ return (-1);
+
+ prev = rsv;
+ rsv = RB_NEXT(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rsv);
+
+ if (rsv == NULL)
+ break;
+
+ if (cur + size <= rsv->rsv_start)
+ break;
+ }
+
+ if (prev != rp && rp->rsv_end != EXT2_RSV_NOT_ALLOCATED)
+ ext2_remove_rsv_win(fs, rp);
+
+ rp->rsv_start = cur;
+ rp->rsv_end = cur + size - 1;
+ rp->rsv_alloc_hit = 0;
+
+ if (prev != rp)
+ RB_INSERT(ext2_rsv_win_tree, fs->e2fs_rsv_tree, rp);
+
+ return (0);
+}
+
+/*
+ * Find a free block by given range from bpref to
+ * the end of this cylinder group.
+ */
+static daddr_t
+ext2_search_next_block(struct m_ext2fs *fs, char *bbp, int bpref, int cg)
+{
+ daddr_t bno;
+ int start, loc, len, map, i;
+
+ start = bpref / NBBY;
+ len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start;
+ loc = skpc(0xff, len, &bbp[start]);
+ if (loc == 0)
+ return (-1);
+
+ i = start + len - loc;
+ map = bbp[i];
+ bno = i * NBBY;
+ for (i = 1; i < (1 << NBBY); i <<= 1, bno++) {
+ if ((map & i) == 0)
+ return (bno);
+ }
+
+ return (-1);
+}
+
+/*
+ * Allocate a new reservation window.
+ */
+static int
+ext2_alloc_new_rsv(struct inode *ip, int cg, struct buf *bp, int32_t bpref)
+{
+ struct m_ext2fs *fs;
+ struct ext2_rsv_win *rp, *search;
+ char *bbp;
+ int start, size, ret;
+
+ fs = ip->i_e2fs;
+ rp = ip->i_rsv;
+ bbp = bp->b_data;
+ size = rp->rsv_goal_size;
+
+ if (bpref <= 0)
+ start = phy_blk(cg, fs);
+ else
+ start = bpref;
+
+ /* Dynamically increase the size of window */
+ if (rp->rsv_end != EXT2_RSV_NOT_ALLOCATED) {
+ if (rp->rsv_alloc_hit >
+ ((rp->rsv_end - rp->rsv_start + 1) / 2)) {
+ size = size * 2;
+ if (size > EXT2_RSV_MAX_RESERVE_BLKS)
+ size = EXT2_RSV_MAX_RESERVE_BLKS;
+ rp->rsv_goal_size = size;
+ }
+ }
+
+ EXT2_TREE_LOCK(fs);
+
+ search = ext2_search_rsv(fs->e2fs_rsv_tree, start);
+
+repeat:
+ ret = ext2_find_rsv(search, rp, fs, start, cg);
+ if (ret < 0) {
+ if (rp->rsv_end != EXT2_RSV_NOT_ALLOCATED)
+ ext2_remove_rsv_win(fs, rp);
+ EXT2_TREE_UNLOCK(fs);
+ return (-1);
+ }
+ EXT2_TREE_UNLOCK(fs);
+
+ start = dtogd(fs, rp->rsv_start);
+ start = ext2_search_next_block(fs, bbp, start, cg);
+ if (start < 0) {
+ EXT2_TREE_LOCK(fs);
+ if (rp->rsv_end != EXT2_RSV_NOT_ALLOCATED)
+ ext2_remove_rsv_win(fs, rp);
+ EXT2_TREE_UNLOCK(fs);
+ return (-1);
+ }
+
+ start = phy_blk(cg, fs) + start;
+ if (start >= rp->rsv_start && start <= rp->rsv_end)
+ return (0);
+
+ search = rp;
+ EXT2_TREE_LOCK(fs);
+ goto repeat;
+}
+
+/*
+ * Allocate a free block from reservation window.
+ */
+static u_long
+ext2_rsvalloc(struct m_ext2fs *fs, struct inode *ip, int cg,
+ struct buf *bp, int32_t bpref, int size)
+{
+ struct ext2_rsv_win *rp;
+ int ret;
+
+ rp = ip->i_rsv;
+ if (rp == NULL)
+ return (ext2_alloc_blk(ip, cg, bp, bpref, NULL));
+
+ if (rp->rsv_end == EXT2_RSV_NOT_ALLOCATED ||
+ !ext2_bpref_in_rsv(rp, bpref)) {
+ ret = ext2_alloc_new_rsv(ip, cg, bp, bpref);
+ if (ret < 0)
+ return (0);
+ }
+
+ return (ext2_alloc_blk(ip, cg, bp, bpref, rp));
+}
+
+/*
+ * Allocate a block using reservation window in ext2 file system.
+ *
+ * NOTE: This function will replace the ext2_alloc() function.
+ */
+int
+ext2_alloc_rsv(struct inode *ip, int32_t lbn, int32_t bpref,
+ int size, struct ucred *cred, int32_t *bnp)
+{
+ struct m_ext2fs *fs;
+ struct ext2mount *ump;
+ struct buf *bp;
+ int32_t bno = 0;
+ int i, cg, error;
+
+ *bnp = 0;
+ fs = ip->i_e2fs;
+ ump = ip->i_ump;
+ mtx_assert(EXT2_MTX(ump), MA_OWNED);
+
+ if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0)
+ goto nospace;
+ if (cred->cr_uid != 0 &&
+ fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount)
+ goto nospace;
+
+ if (bpref >= fs->e2fs->e2fs_bcount)
+ bpref = 0;
+ if (bpref == 0)
+ cg = ino_to_cg(fs, ip->i_number);
+ else
+ cg = dtog(fs, bpref);
+
+ /* If cg has some free blocks, then try to allocate a free block from this cg */
+ if (fs->e2fs_gd[cg].ext2bgd_nbfree > 0) {
+ /* Read block bitmap from buffer */
+ EXT2_UNLOCK(ump);
+ error = bread(ip->i_devvp,
+ fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+ (int)fs->e2fs_bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ goto ioerror;
+ }
+
+ EXT2_RSV_LOCK(ip);
+ /* Try to allocate from reservation window */
+ bno = ext2_rsvalloc(fs, ip, cg, bp, bpref, size);
+ EXT2_RSV_UNLOCK(ip);
+ if (bno > 0)
+ goto allocated;
+
+ brelse(bp);
+ EXT2_LOCK(ump);
+ }
+
+ /* Just need to try to allocate a free block from rest groups. */
+ cg = (cg + 1) % fs->e2fs_gcount;
+ for (i = 1; i < fs->e2fs_gcount; i++) {
+ if (fs->e2fs_gd[cg].ext2bgd_nbfree > 0) {
+ /* Read block bitmap from buffer */
+ EXT2_UNLOCK(ump);
+ error = bread(ip->i_devvp,
+ fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap),
+ (int)fs->e2fs_bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ goto ioerror;
+ }
+
+ EXT2_RSV_LOCK(ip);
+ bno = ext2_rsvalloc(fs, ip, cg, bp, -1, size);
+ EXT2_RSV_UNLOCK(ip);
+ if (bno > 0)
+ goto allocated;
+
+ brelse(bp);
+ EXT2_LOCK(ump);
+ }
+
+ cg++;
+ if (cg == fs->e2fs_gcount)
+ cg = 0;
+ }
+
+allocated:
+ if (bno > 0) {
+ ip->i_next_alloc_block = lbn;
+ ip->i_next_alloc_goal = bno;
+
+ ip->i_blocks += btodb(fs->e2fs_bsize);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ *bnp = bno;
+ return (0);
+ }
+
+nospace:
+ EXT2_UNLOCK(ump);
+ ext2_fserr(fs, cred->cr_uid, "file system full");
+ uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt);
+ return (ENOSPC);
+
+ioerror:
+ ext2_fserr(fs, cred->cr_uid, "file system IO error");
+ uprintf("\n%s: write failed, file system IO error\n", fs->e2fs_fsmnt);
+ return (EIO);
+}
int
ext2_alloc(ip, lbn, bpref, size, cred, bnp)
@@ -923,9 +1413,11 @@
start = 0;
loc = skpc(0xff, len, &bbp[start]);
if (loc == 0) {
- printf("start = %d, len = %d, fs = %s\n",
- start, len, fs->e2fs_fsmnt);
- panic("ext2fs_alloccg: map corrupted");
+ /* XXX: just for reservation window */
+ return -1;
+ /*printf("start = %d, len = %d, fs = %s\n",*/
+ /*start, len, fs->e2fs_fsmnt);*/
+ /*panic("ext2fs_alloccg: map corrupted");*/
/* NOTREACHED */
}
}
diff -urN /usr/src/sys/fs/ext2fs/ext2_balloc.c new/ext2_balloc.c
--- /usr/src/sys/fs/ext2fs/ext2_balloc.c 2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_balloc.c 2010-08-19 02:47:29.000000000 +0800
@@ -49,6 +49,7 @@
#include <fs/ext2fs/fs.h>
#include <fs/ext2fs/ext2_extern.h>
#include <fs/ext2fs/ext2_mount.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
/*
* Balloc defines the structure of file system storage
* by allocating the physical blocks on a device given
@@ -78,6 +79,9 @@
fs = ip->i_e2fs;
ump = ip->i_ump;
+ if (ip->i_rsv == NULL)
+ ext2_init_rsv(ip);
+
/*
* check if this is a sequential block allocation.
* If so, increment next_alloc fields to allow ext2_blkpref
@@ -136,9 +140,9 @@
else
nsize = fs->e2fs_bsize;
EXT2_LOCK(ump);
- error = ext2_alloc(ip, lbn,
- ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0),
- nsize, cred, &newb);
+ error = ext2_alloc_rsv(ip, lbn,
+ ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0),
+ nsize, cred, &newb);
if (error)
return (error);
bp = getblk(vp, lbn, nsize, 0, 0, 0);
@@ -170,9 +174,9 @@
EXT2_LOCK(ump);
pref = ext2_blkpref(ip, lbn, indirs[0].in_off +
EXT2_NDIR_BLOCKS, &ip->i_db[0], 0);
- if ((error = ext2_alloc(ip, lbn, pref,
- (int)fs->e2fs_bsize, cred, &newb)))
- return (error);
+ if ((error = ext2_alloc_rsv(ip, lbn, pref,
+ (int)fs->e2fs_bsize, cred, &newb)))
+ return (error);
nb = newb;
bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0);
bp->b_blkno = fsbtodb(fs, newb);
@@ -211,7 +215,7 @@
if (pref == 0)
pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap,
bp->b_lblkno);
- error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb);
+ error = ext2_alloc_rsv(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb);
if (error) {
brelse(bp);
return (error);
@@ -250,8 +254,8 @@
EXT2_LOCK(ump);
pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0],
bp->b_lblkno);
- if ((error = ext2_alloc(ip,
- lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) {
+ if ((error = ext2_alloc_rsv(ip, lbn, pref,
+ (int)fs->e2fs_bsize, cred, &newb)) != 0) {
brelse(bp);
return (error);
}
diff -urN /usr/src/sys/fs/ext2fs/ext2_inode.c new/ext2_inode.c
--- /usr/src/sys/fs/ext2fs/ext2_inode.c 2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_inode.c 2010-08-19 02:47:29.000000000 +0800
@@ -52,6 +52,7 @@
#include <fs/ext2fs/ext2fs.h>
#include <fs/ext2fs/fs.h>
#include <fs/ext2fs/ext2_extern.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
static int ext2_indirtrunc(struct inode *, int32_t, int32_t, int32_t, int,
long *);
@@ -153,6 +154,11 @@
}
fs = oip->i_e2fs;
osize = oip->i_size;
+
+ EXT2_RSV_LOCK(oip);
+ ext2_discard_rsv(oip);
+ EXT2_RSV_UNLOCK(oip);
+
/*
* Lengthen the size of the file. We must ensure that the
* last byte of the file is allocated. Since the smallest
@@ -484,6 +490,10 @@
if (prtactive && vrefcnt(vp) != 0)
vprint("ext2_inactive: pushing active", vp);
+ EXT2_RSV_LOCK(ip);
+ ext2_discard_rsv(ip);
+ EXT2_RSV_UNLOCK(ip);
+
/*
* Ignore inodes related to stale file handles.
*/
@@ -525,11 +535,21 @@
if (prtactive && vrefcnt(vp) != 0)
vprint("ufs_reclaim: pushing active", vp);
ip = VTOI(vp);
+
if (ip->i_flag & IN_LAZYMOD) {
ip->i_flag |= IN_MODIFIED;
ext2_update(vp, 0);
}
vfs_hash_remove(vp);
+
+ EXT2_RSV_LOCK(ip);
+ if (ip->i_rsv != NULL) {
+ free(ip->i_rsv, M_EXT2NODE);
+ ip->i_rsv = NULL;
+ }
+ EXT2_RSV_UNLOCK(ip);
+ mtx_destroy(&ip->i_rsv_lock);
+
free(vp->v_data, M_EXT2NODE);
vp->v_data = 0;
vnode_destroy_vobject(vp);
diff -urN /usr/src/sys/fs/ext2fs/ext2_rsv_win.h new/ext2_rsv_win.h
--- /usr/src/sys/fs/ext2fs/ext2_rsv_win.h 1970-01-01 08:00:00.000000000 +0800
+++ new/ext2_rsv_win.h 2010-08-19 02:47:29.000000000 +0800
@@ -0,0 +1,78 @@
+/*-
+ * Copyright (c) 2010, 2010 Zheng Liu <lz@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD: src/sys/fs/ext2fs/ext2_rsv_win.h,v 0.1 2010/05/08 12:41:51 lz Exp $
+ */
+#ifndef _FS_EXT2FS_EXT2_RSV_WIN_H_
+#define _FS_EXT2FS_EXT2_RSV_WIN_H_
+
+#include <sys/tree.h>
+
+#define EXT2_RSV_DEFAULT_RESERVE_BLKS 8
+#define EXT2_RSV_MAX_RESERVE_BLKS 1024
+#define EXT2_RSV_NOT_ALLOCATED 0
+
+#define EXT2_RSV_LOCK(ip) mtx_lock(&ip->i_rsv_lock)
+#define EXT2_RSV_UNLOCK(ip) mtx_unlock(&ip->i_rsv_lock)
+
+#define EXT2_TREE_LOCK(fs) mtx_lock(&fs->e2fs_rsv_lock);
+#define EXT2_TREE_UNLOCK(fs) mtx_unlock(&fs->e2fs_rsv_lock);
+
+/*
+ * Reservation window entry
+ */
+struct ext2_rsv_win {
+ RB_ENTRY(ext2_rsv_win) rsv_link; /* RB tree links */
+
+ int32_t rsv_goal_size; /* Default reservation window size */
+ int32_t rsv_alloc_hit; /* Number of allocated windows */
+
+ int32_t rsv_start; /* First bytes of window */
+ int32_t rsv_end; /* End bytes of window */
+};
+
+RB_HEAD(ext2_rsv_win_tree, ext2_rsv_win);
+
+static __inline int
+ext2_rsv_win_cmp(const struct ext2_rsv_win *a,
+ const struct ext2_rsv_win *b)
+{
+ if (a->rsv_start < b->rsv_start)
+ return (-1);
+ if (a->rsv_start == b->rsv_start)
+ return (0);
+
+ return (1);
+}
+RB_PROTOTYPE(ext2_rsv_win_tree, ext2_rsv_win, rsv_link, ext2_rsv_win_cmp);
+
+/* predefine */
+struct inode;
+/* ext2_alloc.c */
+void ext2_init_rsv(struct inode *ip);
+void ext2_discard_rsv(struct inode *ip);
+int ext2_alloc_rsv(struct inode *, int32_t, int32_t, int, struct ucred *, int32_t *);
+
+#endif /* !_FS_EXT2FS_EXT2_RSV_WIN_H_ */
diff -urN /usr/src/sys/fs/ext2fs/ext2_vfsops.c new/ext2_vfsops.c
--- /usr/src/sys/fs/ext2fs/ext2_vfsops.c 2010-01-14 22:30:54.000000000 +0800
+++ new/ext2_vfsops.c 2010-08-19 02:47:29.000000000 +0800
@@ -1,4 +1,4 @@
-/*-
+/*
* modified for EXT2FS support in Lites 1.1
*
* Aug 1995, Godmar Back (gback@cs.utah.edu)
@@ -61,6 +61,7 @@
#include <fs/ext2fs/fs.h>
#include <fs/ext2fs/ext2_extern.h>
#include <fs/ext2fs/ext2fs.h>
+#include <fs/ext2fs/ext2_rsv_win.h>
static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td);
static int ext2_mountfs(struct vnode *, struct mount *);
@@ -95,9 +96,9 @@
static int compute_sb_data(struct vnode * devvp,
struct ext2fs * es, struct m_ext2fs * fs);
-static const char *ext2_opts[] = { "from", "export", "acls", "noexec",
- "noatime", "union", "suiddir", "multilabel", "nosymfollow",
- "noclusterr", "noclusterw", "force", NULL };
+static const char *ext2_opts[] = { "acls", "async", "export", "force",
+ "from", "multilabel", "noatime", "noclusterr", "noclusterw",
+ "noexec", "nosymfollow", "suiddir", "union", NULL };
/*
* VFS Operations.
@@ -581,6 +582,14 @@
if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs)))
goto out;
+ /* Initial reservation window index and lock */
+ bzero(&ump->um_e2fs->e2fs_rsv_lock, sizeof(struct mtx));
+ mtx_init(&ump->um_e2fs->e2fs_rsv_lock,
+ "rsv tree lock", NULL, MTX_DEF);
+ ump->um_e2fs->e2fs_rsv_tree = malloc(sizeof(struct ext2_rsv_win_tree),
+ M_EXT2MNT, M_WAITOK | M_ZERO);
+ RB_INIT(ump->um_e2fs->e2fs_rsv_tree);
+
brelse(bp);
bp = NULL;
fs = ump->um_e2fs;
@@ -680,6 +689,8 @@
g_topology_unlock();
PICKUP_GIANT();
vrele(ump->um_devvp);
+ free(fs->e2fs_rsv_tree, M_EXT2MNT);
+ mtx_destroy(&fs->e2fs_rsv_lock);
free(fs->e2fs_gd, M_EXT2MNT);
free(fs->e2fs_contigdirs, M_EXT2MNT);
free(fs->e2fs, M_EXT2MNT);
@@ -919,6 +930,10 @@
ip->i_prealloc_count = 0;
ip->i_prealloc_block = 0;
+ bzero(&ip->i_rsv_lock, sizeof(struct mtx));
+ mtx_init(&ip->i_rsv_lock, "inode rsv lock", NULL, MTX_DEF);
+ ip->i_rsv = NULL;
+
/*
* Now we want to make sure that block pointers for unused
* blocks are zeroed out - ext2_balloc depends on this
diff -urN /usr/src/sys/fs/ext2fs/ext2fs.h new/ext2fs.h
--- /usr/src/sys/fs/ext2fs/ext2fs.h 2010-01-14 22:30:54.000000000 +0800
+++ new/ext2fs.h 2010-08-19 02:47:29.000000000 +0800
@@ -38,6 +38,7 @@
#define _FS_EXT2FS_EXT2_FS_H
#include <sys/types.h>
+#include <sys/lock.h>
/*
* Special inode numbers
@@ -174,6 +175,9 @@
char e2fs_wasvalid; /* valid at mount time */
off_t e2fs_maxfilesize;
struct ext2_gd *e2fs_gd; /* Group Descriptors */
+
+ struct mtx e2fs_rsv_lock; /* Protect reservation window RB tree */
+ struct ext2_rsv_win_tree *e2fs_rsv_tree; /* Reservation window index */
};
/*
diff -urN /usr/src/sys/fs/ext2fs/inode.h new/inode.h
--- /usr/src/sys/fs/ext2fs/inode.h 2010-01-14 22:30:54.000000000 +0800
+++ new/inode.h 2010-08-19 02:47:29.000000000 +0800
@@ -100,6 +100,10 @@
int32_t i_gen; /* Generation number. */
u_int32_t i_uid; /* File owner. */
u_int32_t i_gid; /* File group. */
+
+ /* Fields for reservation window */
+ struct mtx i_rsv_lock; /* Protects i_rsv */
+ struct ext2_rsv_win *i_rsv; /* Reservation window */
};
/*
--------------070305070503050500020300--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4C6C98B4.4070909>
