Date: Thu, 30 Dec 2010 11:05:07 +0200 From: Kostik Belousov <kostikbel@gmail.com> To: Boris Kochergin <spawk@acm.poly.edu> Cc: freebsd-fs@freebsd.org, Jeff Roberson <jroberson@jroberson.net>, Kirk McKusick <mckusick@mckusick.com> Subject: Re: SUJ doesn't work with GELI? Message-ID: <20101230090507.GD90883@deviant.kiev.zoral.com.ua> In-Reply-To: <4D1BF45D.7070703@acm.poly.edu>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
On Wed, Dec 29, 2010 at 09:54:21PM -0500, Boris Kochergin wrote:
> On 12/29/10 21:44, Lucius Windschuh wrote:
> >2010/12/29 Boris Kochergin<spawk@acm.poly.edu>:
> >>Ahoy. I'm having trouble enabling SUJ on a UFS filesystem that lives on
> >>top
> >>of a GELI device. The system is an i386 -CURRENT build from December
> >>24th. I
> >>first noticed the problem when it happened with an actual disk, but here
> >>is
> >>an example with a memory disk for ease of reproduction:
> >Hi Boris.
> >The problem is, as you may have already noticed, the "non-standard"
> >block size of 4k of the GELI device.
> >If you specify the tradiditional block size of 512 bytes when
> >initializing the GELI provider, it should work.
> >
> >I sent an email to Jeff, the answer on April 27 was:
> >>Thanks for the repro. This is an interesting case. I'll have to
> >>slightly rewrite the directory handling code in tunefs but it should not
> >>take long.
> >But I forgot to remind Jeff about this issue, as it seems. So I'll try
> >again with this mail...
> >
> >Regards,
> >
> >Lucius
>
> Aha. I hadn't thought of that. Thanks for the insight.
I had the following patch lurking around for some time. This is only
for the kernel, I did not have time to go through the usermode tools.
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 002911c..4f4e020 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -747,7 +747,7 @@ static void handle_written_jnewblk(struct jnewblk *);
static void handle_written_jfreeblk(struct jfreeblk *);
static void handle_written_jfreefrag(struct jfreefrag *);
static void complete_jseg(struct jseg *);
-static void jseg_write(struct fs *, struct jblocks *, struct jseg *,
+static void jseg_write(struct ufsmount *ump, struct jblocks *, struct jseg *,
uint8_t *);
static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
@@ -886,7 +886,8 @@ static void handle_jwork(struct workhead *);
static struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
struct mkdir **);
static struct jblocks *jblocks_create(void);
-static ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *);
+static ufs2_daddr_t jblocks_alloc(struct ufsmount *, struct jblocks *, int,
+ int *);
static void jblocks_free(struct jblocks *, struct mount *, int);
static void jblocks_destroy(struct jblocks *);
static void jblocks_add(struct jblocks *, ufs2_daddr_t, int);
@@ -2135,7 +2136,8 @@ jblocks_create(void)
}
static ufs2_daddr_t
-jblocks_alloc(jblocks, bytes, actual)
+jblocks_alloc(ump, jblocks, bytes, actual)
+ struct ufsmount *ump;
struct jblocks *jblocks;
int bytes;
int *actual;
@@ -2145,7 +2147,7 @@ jblocks_alloc(jblocks, bytes, actual)
int freecnt;
int blocks;
- blocks = bytes / DEV_BSIZE;
+ blocks = bytes / ump->um_devvp->v_bufobj.bo_bsize;
jext = &jblocks->jb_extent[jblocks->jb_head];
freecnt = jext->je_blocks - jblocks->jb_off;
if (freecnt == 0) {
@@ -2157,7 +2159,7 @@ jblocks_alloc(jblocks, bytes, actual)
}
if (freecnt > blocks)
freecnt = blocks;
- *actual = freecnt * DEV_BSIZE;
+ *actual = freecnt * ump->um_devvp->v_bufobj.bo_bsize;
daddr = jext->je_daddr + jblocks->jb_off;
jblocks->jb_off += freecnt;
jblocks->jb_free -= freecnt;
@@ -2172,7 +2174,7 @@ jblocks_free(jblocks, mp, bytes)
int bytes;
{
- jblocks->jb_free += bytes / DEV_BSIZE;
+ jblocks->jb_free += bytes / VFSTOUFS(mp)->um_devvp->v_bufobj.bo_bsize;
if (jblocks->jb_suspended)
worklist_speedup();
wakeup(jblocks);
@@ -2422,7 +2424,8 @@ journal_space(ump, thresh)
thresh = jblocks->jb_min;
else
thresh = jblocks->jb_low;
- avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
+ avail = (ump->softdep_on_journal * JREC_SIZE) /
+ ump->um_devvp->v_bufobj.bo_bsize;
avail = jblocks->jb_free - avail;
return (avail > thresh);
@@ -2555,8 +2558,8 @@ softdep_prelink(dvp, vp)
}
static void
-jseg_write(fs, jblocks, jseg, data)
- struct fs *fs;
+jseg_write(ump, jblocks, jseg, data)
+ struct ufsmount *ump;
struct jblocks *jblocks;
struct jseg *jseg;
uint8_t *data;
@@ -2567,9 +2570,9 @@ jseg_write(fs, jblocks, jseg, data)
rec->jsr_seq = jseg->js_seq;
rec->jsr_oldest = jblocks->jb_oldestseq;
rec->jsr_cnt = jseg->js_cnt;
- rec->jsr_blocks = jseg->js_size / DEV_BSIZE;
+ rec->jsr_blocks = jseg->js_size / ump->um_devvp->v_bufobj.bo_bsize;
rec->jsr_crc = 0;
- rec->jsr_time = fs->fs_mtime;
+ rec->jsr_time = ump->um_fs->fs_mtime;
}
static inline void
@@ -2719,19 +2722,21 @@ softdep_process_journal(mp, flags)
int size;
int cnt;
int off;
+ int devbsize;
if ((mp->mnt_kern_flag & MNTK_SUJ) == 0)
return;
ump = VFSTOUFS(mp);
fs = ump->um_fs;
jblocks = ump->softdep_jblocks;
+ devbsize = ump->um_devvp->v_bufobj.bo_bsize;
/*
* We write anywhere between a disk block and fs block. The upper
* bound is picked to prevent buffer cache fragmentation and limit
* processing time per I/O.
*/
- jrecmin = (DEV_BSIZE / JREC_SIZE) - 1; /* -1 for seg header */
- jrecmax = (fs->fs_bsize / DEV_BSIZE) * jrecmin;
+ jrecmin = (devbsize / JREC_SIZE) - 1; /* -1 for seg header */
+ jrecmax = (fs->fs_bsize / devbsize) * jrecmin;
segwritten = 0;
while ((cnt = ump->softdep_on_journal) != 0) {
/*
@@ -2786,7 +2791,7 @@ softdep_process_journal(mp, flags)
*/
cnt = ump->softdep_on_journal;
if (cnt < jrecmax)
- size = howmany(cnt, jrecmin) * DEV_BSIZE;
+ size = howmany(cnt, jrecmin) * devbsize;
else
size = fs->fs_bsize;
/*
@@ -2794,9 +2799,10 @@ softdep_process_journal(mp, flags)
* for truncation of the requested size if enough contiguous
* space was not available.
*/
- bp->b_blkno = jblocks_alloc(jblocks, size, &size);
+ bp->b_blkno = jblocks_alloc(ump, jblocks, size, &size) *
+ btodb(devbsize);
bp->b_lblkno = bp->b_blkno;
- bp->b_offset = bp->b_blkno * DEV_BSIZE;
+ bp->b_offset = bp->b_blkno * devbsize;
bp->b_bcount = size;
bp->b_bufobj = &ump->um_devvp->v_bufobj;
bp->b_flags &= ~B_INVAL;
@@ -2806,7 +2812,7 @@ softdep_process_journal(mp, flags)
* sequence number to it and link it in-order.
*/
cnt = MIN(ump->softdep_on_journal,
- (size / DEV_BSIZE) * jrecmin);
+ (size / devbsize) * jrecmin);
jseg->js_buf = bp;
jseg->js_cnt = cnt;
jseg->js_refs = cnt + 1; /* Self ref. */
@@ -2825,8 +2831,8 @@ softdep_process_journal(mp, flags)
while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
!= NULL) {
/* Place a segment header on every device block. */
- if ((off % DEV_BSIZE) == 0) {
- jseg_write(fs, jblocks, jseg, data);
+ if ((off % devbsize) == 0) {
+ jseg_write(ump, jblocks, jseg, data);
off += JREC_SIZE;
data = bp->b_data + off;
}
[-- Attachment #2 --]
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (FreeBSD)
iEYEARECAAYFAk0cS0IACgkQC3+MBN1Mb4g8bACgqesddF3TRiNB/4pZ0V693ZAh
ggAAn3+XCUz+bba8rE/1tBT85JCW0p6n
=Tk3e
-----END PGP SIGNATURE-----
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20101230090507.GD90883>
