Date: Sun, 22 Aug 2010 19:17:48 +0800 From: gnehzuil <gnehzuil@gmail.com> To: fs@freebsd.org Cc: jhb@FreeBSD.org Subject: [patch] ext4fs read only mode Message-ID: <4C71075C.9010802@gmail.com>
next in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------000709010802090406030102 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Hi all, This patch makes ext2fs can read ext4 filesystem in read-only mode. There are two files in attachments. 'ext4fs_ro_makefile.patch' is for Makefile in modules/ext2fs/. 'ext4fs_ro_src.patch' is for source code in fs/ext2fs/. Please use the following command to mount disk: 'mount -t ext2fs -r /dev/XXX /YYY'. Now you can use it to read data from ext4 filesystem in the following features: + HAS_JOURNAL(*) + FILE_TYPE + SPARSE_SUPER + HUGE_FILE + EXTENTS + DIR_NLINK + UNINIT_BG + FLEX_BG(*) + EXTRA_ISIZE(*) + DIR_INDEX(**) * I don't implement this feature. However you don't need to worry about it because it doesn't be used in read-only mode. ** I have implemented a hash directory index in ext2_lookup() function. But there are two functions that I think they seem to be contaminated kernel source code. So this patch doesn't include hash directory index. Please test it. Best regards, lz --------------000709010802090406030102 Content-Type: text/x-patch; name="ext4fs_ro_makefile.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="ext4fs_ro_makefile.patch" --- /usr/src/sys/modules/ext2fs/Makefile 2010-01-14 22:30:54.000000000 +0800 +++ Makefile 2010-08-22 22:53:28.000000000 +0800 @@ -3,8 +3,8 @@ .PATH: ${.CURDIR}/../../fs/ext2fs KMOD= ext2fs SRCS= opt_ddb.h opt_quota.h opt_suiddir.h vnode_if.h \ - ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_inode.c \ - ext2_inode_cnv.c ext2_lookup.c ext2_subr.c ext2_vfsops.c \ - ext2_vnops.c + ext2_alloc.c ext2_balloc.c ext2_bmap.c ext2_extents.c \ + ext2_inode.c ext2_inode_cnv.c ext2_lookup.c ext2_subr.c \ + ext2_vfsops.c ext2_vnops.c .include <bsd.kmod.mk> --------------000709010802090406030102 Content-Type: text/x-patch; name="ext4fs_ro_src.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="ext4fs_ro_src.patch" diff -urN /usr/src/sys/fs/ext2fs/ext2_alloc.c src/ext2_alloc.c --- /usr/src/sys/fs/ext2fs/ext2_alloc.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_alloc.c 2010-08-22 22:41:52.000000000 +0800 @@ -116,12 +116,12 @@ if (cred == NOCRED) panic("ext2_alloc: missing credential"); #endif /* DIAGNOSTIC */ - if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0) + if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount_lo == 0) goto nospace; if (cred->cr_uid != 0 && - fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount) + fs->e2fs->e2fs_fbcount_lo < fs->e2fs->e2fs_rbcount_lo) goto nospace; - if (bpref >= fs->e2fs->e2fs_bcount) + if (bpref >= fs->e2fs->e2fs_bcount_lo) bpref = 0; if (bpref == 0) cg = ino_to_cg(fs, ip->i_number); @@ -443,7 +443,7 @@ fs = pip->i_e2fs; avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; - avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount; + avgbfree = fs->e2fs->e2fs_fbcount_lo / fs->e2fs_gcount; avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; /* @@ -455,18 +455,18 @@ mincg = prefcg; minndir = fs->e2fs_ipg; for (cg = prefcg; cg < fs->e2fs_gcount; cg++) - if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && - fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && - fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { + if (fs->e2fs_gd[cg].ext2bgd_ndirs_lo < minndir && + fs->e2fs_gd[cg].ext2bgd_nifree_lo >= avgifree && + fs->e2fs_gd[cg].ext2bgd_nbfree_lo >= avgbfree) { mincg = cg; - minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; + minndir = fs->e2fs_gd[cg].ext2bgd_ndirs_lo; } for (cg = 0; cg < prefcg; cg++) - if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && - fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && - fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { + if (fs->e2fs_gd[cg].ext2bgd_ndirs_lo < minndir && + fs->e2fs_gd[cg].ext2bgd_nifree_lo >= avgifree && + fs->e2fs_gd[cg].ext2bgd_nbfree_lo >= avgbfree) { mincg = cg; - minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; + minndir = fs->e2fs_gd[cg].ext2bgd_ndirs_lo; } return (mincg); @@ -503,16 +503,16 @@ */ prefcg = ino_to_cg(fs, pip->i_number); for (cg = prefcg; cg < fs->e2fs_gcount; cg++) - if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && - fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && - fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { + if (fs->e2fs_gd[cg].ext2bgd_ndirs_lo < maxndir && + fs->e2fs_gd[cg].ext2bgd_nifree_lo >= minifree && + fs->e2fs_gd[cg].ext2bgd_nbfree_lo >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } for (cg = 0; cg < prefcg; cg++) - if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && - fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && - fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { + if (fs->e2fs_gd[cg].ext2bgd_ndirs_lo < maxndir && + fs->e2fs_gd[cg].ext2bgd_nifree_lo >= minifree && + fs->e2fs_gd[cg].ext2bgd_nbfree_lo >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } @@ -520,10 +520,10 @@ * This is a backstop when we have deficit in space. */ for (cg = prefcg; cg < fs->e2fs_gcount; cg++) - if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) + if (fs->e2fs_gd[cg].ext2bgd_nifree_lo >= avgifree) return (cg); for (cg = 0; cg < prefcg; cg++) - if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) + if (fs->e2fs_gd[cg].ext2bgd_nifree_lo >= avgifree) break; return (cg); } @@ -644,11 +644,11 @@ /* XXX ondisk32 */ fs = ip->i_e2fs; ump = ip->i_ump; - if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) + if (fs->e2fs_gd[cg].ext2bgd_nbfree_lo == 0) return (0); EXT2_UNLOCK(ump); error = bread(ip->i_devvp, fsbtodb(fs, - fs->e2fs_gd[cg].ext2bgd_b_bitmap), + fs->e2fs_gd[cg].ext2bgd_b_bitmap_lo), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); @@ -709,8 +709,8 @@ #endif setbit(bbp, (daddr_t)bno); EXT2_LOCK(ump); - fs->e2fs->e2fs_fbcount--; - fs->e2fs_gd[cg].ext2bgd_nbfree--; + fs->e2fs->e2fs_fbcount_lo--; + fs->e2fs_gd[cg].ext2bgd_nbfree_lo--; fs->e2fs_fmod = 1; EXT2_UNLOCK(ump); bdwrite(bp); @@ -736,11 +736,11 @@ ipref = 0; fs = ip->i_e2fs; ump = ip->i_ump; - if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) + if (fs->e2fs_gd[cg].ext2bgd_nifree_lo == 0) return (0); EXT2_UNLOCK(ump); error = bread(ip->i_devvp, fsbtodb(fs, - fs->e2fs_gd[cg].ext2bgd_i_bitmap), + fs->e2fs_gd[cg].ext2bgd_i_bitmap_lo), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); @@ -781,11 +781,11 @@ gotit: setbit(ibp, ipref); EXT2_LOCK(ump); - fs->e2fs_gd[cg].ext2bgd_nifree--; + fs->e2fs_gd[cg].ext2bgd_nifree_lo--; fs->e2fs->e2fs_ficount--; fs->e2fs_fmod = 1; if ((mode & IFMT) == IFDIR) { - fs->e2fs_gd[cg].ext2bgd_ndirs++; + fs->e2fs_gd[cg].ext2bgd_ndirs_lo++; fs->e2fs_total_dir++; } EXT2_UNLOCK(ump); @@ -812,14 +812,14 @@ fs = ip->i_e2fs; ump = ip->i_ump; cg = dtog(fs, bno); - if ((u_int)bno >= fs->e2fs->e2fs_bcount) { + if ((u_int)bno >= fs->e2fs->e2fs_bcount_lo) { printf("bad block %lld, ino %llu\n", (long long)bno, (unsigned long long)ip->i_number); ext2_fserr(fs, ip->i_uid, "bad block"); return; } error = bread(ip->i_devvp, - fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), + fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap_lo), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); @@ -834,8 +834,8 @@ } clrbit(bbp, bno); EXT2_LOCK(ump); - fs->e2fs->e2fs_fbcount++; - fs->e2fs_gd[cg].ext2bgd_nbfree++; + fs->e2fs->e2fs_fbcount_lo++; + fs->e2fs_gd[cg].ext2bgd_nbfree_lo++; fs->e2fs_fmod = 1; EXT2_UNLOCK(ump); bdwrite(bp); @@ -868,7 +868,7 @@ cg = ino_to_cg(fs, ino); error = bread(pip->i_devvp, - fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), + fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap_lo), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); @@ -885,9 +885,9 @@ clrbit(ibp, ino); EXT2_LOCK(ump); fs->e2fs->e2fs_ficount++; - fs->e2fs_gd[cg].ext2bgd_nifree++; + fs->e2fs_gd[cg].ext2bgd_nifree_lo++; if ((mode & IFMT) == IFDIR) { - fs->e2fs_gd[cg].ext2bgd_ndirs--; + fs->e2fs_gd[cg].ext2bgd_ndirs_lo--; fs->e2fs_total_dir--; } fs->e2fs_fmod = 1; diff -urN /usr/src/sys/fs/ext2fs/ext2_bmap.c src/ext2_bmap.c --- /usr/src/sys/fs/ext2fs/ext2_bmap.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_bmap.c 2010-08-22 22:41:52.000000000 +0800 @@ -46,9 +46,62 @@ #include <sys/stat.h> #include <fs/ext2fs/inode.h> +#include <fs/ext2fs/fs.h> #include <fs/ext2fs/ext2fs.h> #include <fs/ext2fs/ext2_mount.h> #include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2_dinode.h> + +static int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *); + +/* + * This function converts the logical block number of a file to + * its physical block number on the disk within ext4 extents. + */ +static int +ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) +{ + struct inode *ip; + struct m_ext2fs *fs; + struct ext4_extent *ep; + struct ext4_extent_header *ehp; + struct ext4_extent_path path; + daddr_t lbn; + int bsize; + int depth; + + ip = VTOI(vp); + fs = ip->i_e2fs; + lbn = bn; + bsize = blksize(fs, ip, lbn); + + /* + * TODO: need to implement read ahead to improve the performance. + */ + if (runp != NULL) + *runp = 0; + + if (runb != NULL) + *runb = 0; + + ext4_ext_find_extent(fs, ip, lbn, &path); + depth = ((struct ext4_extent_header *)(ip->i_db))->eh_depth; + if (path.ep_ext == NULL && depth != 0) + return (EIO); + + ehp = path.ep_header; + ep = path.ep_ext; + if (ep == NULL) + return (EIO); + + *bnp = fsbtodb(fs, (lbn - ep->e_blk + + (ep->e_start_lo | ((daddr_t)(ep->e_start_hi) << 31) << 1))); + + if (*bnp == 0) + *bnp = -1; + + return (0); +} /* * Bmap converts the logical block number of a file to its physical block @@ -66,7 +119,7 @@ int *a_runb; } */ *ap; { - int32_t blkno; + int64_t blkno; int error; /* @@ -78,8 +131,12 @@ if (ap->a_bnp == NULL) return (0); - error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, - ap->a_runp, ap->a_runb); + if (VTOI(ap->a_vp)->i_flags & EXT4_EXTENTS) + error = ext4_bmapext(ap->a_vp, ap->a_bn, &blkno, + ap->a_runp, ap->a_runb); + else + error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, + ap->a_runp, ap->a_runb); *ap->a_bnp = blkno; return (error); } @@ -102,7 +159,7 @@ ext2_bmaparray(vp, bn, bnp, runp, runb) struct vnode *vp; int32_t bn; - int32_t *bnp; + int64_t *bnp; int *runp; int *runb; { diff -urN /usr/src/sys/fs/ext2fs/ext2_dinode.h src/ext2_dinode.h --- /usr/src/sys/fs/ext2fs/ext2_dinode.h 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_dinode.h 2010-08-22 22:41:52.000000000 +0800 @@ -29,8 +29,6 @@ #ifndef _FS_EXT2FS_EXT2_DINODE_H_ #define _FS_EXT2FS_EXT2_DINODE_H_ -#define e2di_size_high e2di_dacl - /* * Inode flags * The current implementation uses only EXT2_IMMUTABLE and EXT2_APPEND flags @@ -43,7 +41,21 @@ #define EXT2_APPEND 0x00000020 /* writes to file may only append */ #define EXT2_NODUMP 0x00000040 /* do not dump file */ #define EXT2_NOATIME 0x00000080 /* do not update atime */ - +/* NOT implementation. Reserved for compression usage. */ +#define EXT4_DIRTY 0x00000100 +#define EXT4_COMPRBLK 0x00000200 /* One or more compressed clusters */ +#define EXT4_NOCOMPR 0x00000400 /* Don't compress */ +#define EXT4_ECOMPR 0x00000800 /* Compression error */ +/* End compression flags */ +#define EXT4_INDEX 0x00001000 /* Hash-indexed directory */ +#define EXT4_IMAGIC 0x00002000 /* AFS directory */ +#define EXT4_JOURNAL_DATA 0x00004000 /* File data should be journaled */ +#define EXT4_NOTAIL 0x00008000 /* File tail should not be merged */ +#define EXT4_DIRSYNC 0x00010000 /* dirsync behavior */ +#define EXT4_TOPDIR 0x00020000 /* top of directory hierarchies */ +#define EXT4_HUGE_FILE 0x00040000 /* Set to each huge file */ +#define EXT4_EXTENTS 0x00080000 /* Inode uses extents */ +#define EXT4_RESERVED 0x80000000 /* Reserved for ext4 lib */ /* * Structure of an inode on the disk @@ -51,27 +63,62 @@ struct ext2fs_dinode { u_int16_t e2di_mode; /* 0: IFMT, permissions; see below. */ u_int16_t e2di_uid; /* 2: Owner UID */ - u_int32_t e2di_size; /* 4: Size (in bytes) */ + u_int32_t e2di_size_lo; /* 4: Size (in bytes) */ u_int32_t e2di_atime; /* 8: Access time */ u_int32_t e2di_ctime; /* 12: Create time */ u_int32_t e2di_mtime; /* 16: Modification time */ u_int32_t e2di_dtime; /* 20: Deletion time */ u_int16_t e2di_gid; /* 24: Owner GID */ u_int16_t e2di_nlink; /* 26: File link count */ - u_int32_t e2di_nblock; /* 28: Blocks count */ + u_int32_t e2di_nblock_lo; /* 28: Blocks count */ u_int32_t e2di_flags; /* 32: Status flags (chflags) */ - u_int32_t e2di_linux_reserved1; /* 36 */ + union { + struct { + u_int32_t e2di_version; + } linux1; + struct { + u_int32_t e2di_translator; + } hurd1; + struct { + u_int32_t e2di_reserved1; + } masix1; + } osd1; /* 36: */ u_int32_t e2di_blocks[EXT2_N_BLOCKS]; /* 40: disk blocks */ u_int32_t e2di_gen; /* 100: generation number */ u_int32_t e2di_facl; /* 104: file ACL (not implemented) */ u_int32_t e2di_dacl; /* 108: dir ACL (not implemented) */ +#define e2di_size_high e2di_dacl u_int32_t e2di_faddr; /* 112: fragment address */ - u_int8_t e2di_nfrag; /* 116: fragment number */ - u_int8_t e2di_fsize; /* 117: fragment size */ - u_int16_t e2di_linux_reserved2; /* 118 */ - u_int16_t e2di_uid_high; /* 120: Owner UID top 16 bits */ - u_int16_t e2di_gid_high; /* 122: Owner GID top 16 bits */ - u_int32_t e2di_linux_reserved3; /* 124 */ + union { + struct { + u_int16_t e2di_l_blk_high; +#define e2di_nblock_high osd2.linux2.e2di_l_blk_high + u_int16_t e2di_l_facl_high; + u_int16_t e2di_l_uid_high; + u_int16_t e2di_l_gid_high; + u_int32_t e2di_l_reserved2; + } linux2; + struct { + u_int16_t e2di_h_reserved1; + u_int16_t e2di_h_mode_high; + u_int16_t e2di_h_uid_high; + u_int16_t e2di_h_gid_high; + u_int32_t e2di_h_author; + } hurd2; + struct { + u_int16_t e2di_m_reserved1; + u_int16_t e2di_m_facl_high; + u_int32_t e2di_reserved2[2]; + } masix2; + } osd2; + u_int16_t e2di_extra_isize; + u_int16_t e2di_pad; + u_int32_t e2di_ctime_extra; + u_int32_t e2di_mtime_extra; + u_int32_t e2di_atime_extra; + u_int32_t e2di_crtime; + u_int32_t e2di_crtime_extra; + u_int32_t e2di_version_hi; }; #endif /* _FS_EXT2FS_EXT2_DINODE_H_ */ diff -urN /usr/src/sys/fs/ext2fs/ext2_extents.c src/ext2_extents.c --- /usr/src/sys/fs/ext2fs/ext2_extents.c 1970-01-01 08:00:00.000000000 +0800 +++ src/ext2_extents.c 2010-08-22 22:41:52.000000000 +0800 @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2010, 2010 Zheng Liu <lz@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/fs/ext2fs/ext2_extents.c,v 0.1 2010/07/02 17:22:00 lz Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/vnode.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/conf.h> + +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_extents.h> +#include <fs/ext2fs/ext2_extern.h> + +static void ext4_ext_binsearch_index(struct inode *, struct ext4_extent_path *, daddr_t); +static void ext4_ext_binsearch(struct inode *, struct ext4_extent_path *, daddr_t); + +static void +ext4_ext_binsearch_index(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) +{ + struct ext4_extent_header *ehp = path->ep_header; + struct ext4_extent_index *l, *r, *m; + + l = (struct ext4_extent_index *)(((char *)(ehp) + + sizeof(struct ext4_extent_header))); + r = (struct ext4_extent_index *)(((char *)(ehp) + + sizeof(struct ext4_extent_header))) + ehp->eh_ecount - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (lbn < m->ei_blk) + r = m - 1; + else + l = m + 1; + } + + path->ep_index = l - 1; +} + +static void +ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn) +{ + struct ext4_extent_header *ehp = path->ep_header; + struct ext4_extent *l, *r, *m; + + if (ehp->eh_ecount == 0) + return; + + l = (struct ext4_extent *)(((char *)(ehp) + + sizeof(struct ext4_extent_header))); + r = (struct ext4_extent *)(((char *)(ehp) + + sizeof(struct ext4_extent_header))) + ehp->eh_ecount - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (lbn < m->e_blk) + r = m - 1; + else + l = m + 1; + } + + path->ep_ext = l - 1; +} + +/* + * find a block in ext4 extent cache. + */ +int +ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep) +{ + struct ext4_extent_cache *ecp; + int ret = EXT4_EXT_CACHE_NO; + + ecp = &ip->i_ext_cache; + + /* cache is invalid */ + if (ecp->ec_type == EXT4_EXT_CACHE_NO) + return (ret); + + if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) { + ep->e_blk = ecp->ec_blk; + ep->e_start_lo = (ecp->ec_start & 0xffffffff); + ep->e_start_hi = (((ecp->ec_start >> 31) >> 1) & 0xffff); + ep->e_len = ecp->ec_len; + ret = ecp->ec_type; + } + + return (ret); +} + +/* + * put a ext4_extent structure in ext4 cache. + */ +void +ext4_ext_put_cache(struct inode *ip, struct ext4_extent *ep, int type) +{ + struct ext4_extent_cache *ecp; + + ecp = &ip->i_ext_cache; + ecp->ec_type = type; + ecp->ec_blk = ep->e_blk; + ecp->ec_len = ep->e_len; + ecp->ec_start = (((daddr_t)(ep->e_start_hi) << 31) << 1) | ep->e_start_lo; +} + +/* + * find a extent. + */ +struct ext4_extent_path * +ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip, + daddr_t lbn, struct ext4_extent_path *path) +{ + struct vnode *vp; + struct ext4_extent_header *ehp; + int depth, i, error, size; + daddr_t nblk; + + vp = ITOV(ip); + ehp = (struct ext4_extent_header *)((char *)ip->i_db); + depth = ehp->eh_depth; + + if (ehp->eh_magic != EXT4_EXT_MAGIC) + return (NULL); + + path->ep_header = ehp; + + i = depth; + while (i) { + ext4_ext_binsearch_index(ip, path, lbn); + path->ep_blk = (((daddr_t)(path->ep_index->ei_leaf_hi) << 31) << 1) | + path->ep_index->ei_leaf_lo; + path->ep_depth = i; + path->ep_ext = NULL; + + size = blksize(fs, ip, path->ep_blk); + nblk = path->ep_blk; + if (path->ep_bp != NULL) { + brelse(path->ep_bp); + path->ep_bp = NULL; + } + error = bread(ip->i_devvp, fsbtodb(fs, nblk), size, NOCRED, &path->ep_bp); + if (error) { + brelse(path->ep_bp); + path->ep_bp = NULL; + return (NULL); + } + ehp = (struct ext4_extent_header *)path->ep_bp->b_data; + path->ep_header = ehp; + i--; + } + + path->ep_depth = i; + path->ep_ext = NULL; + path->ep_index = NULL; + + ext4_ext_binsearch(ip, path, lbn); + if (path->ep_ext != NULL) + path->ep_blk = (((daddr_t)(path->ep_ext->e_start_hi) << 31) << 1) | + path->ep_ext->e_start_lo; + + return (path); +} diff -urN /usr/src/sys/fs/ext2fs/ext2_extents.h src/ext2_extents.h --- /usr/src/sys/fs/ext2fs/ext2_extents.h 1970-01-01 08:00:00.000000000 +0800 +++ src/ext2_extents.h 2010-08-22 22:41:52.000000000 +0800 @@ -0,0 +1,103 @@ +/*- + * Copyright (c) 2010, 2010 Zheng Liu <lz@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/fs/ext2fs/ext2_extents.h,v 0.1 2010/06/22 18:01:51 lz Exp $ + */ +#ifndef _FS_EXT2FS_EXT2_EXTENTS_H_ +#define _FS_EXT2FS_EXT2_EXTENTS_H_ + +#include <sys/types.h> + +#define EXT4_EXT_MAGIC 0xf30a + +/* lock/unlock ext lock */ +#define EXT4_EXT_LOCK(ip) mtx_lock(&(ip)->i_ext_lock) +#define EXT4_EXT_UNLOCK(ip) mtx_unlock(&(ip)->i_ext_lock) + +#define EXT4_EXT_CACHE_NO 0 +#define EXT4_EXT_CACHE_GAP 1 +#define EXT4_EXT_CACHE_IN 2 + +/* + * ext4 file system extent on disk + */ +struct ext4_extent { + u_int32_t e_blk; /* first logical block */ + u_int16_t e_len; /* number of blocks */ + u_int16_t e_start_hi; /* high 16 bits of physical block */ + u_int32_t e_start_lo; /* low 32 bits of physical block */ +}; + +/* + * extent index on disk + */ +struct ext4_extent_index { + u_int32_t ei_blk; /* indexes logical blocks */ + u_int32_t ei_leaf_lo; /* pointes to physical block of the next level */ + u_int16_t ei_leaf_hi; /* high 16 bits of physical block */ + u_int16_t ei_unused; +}; + +/* + * extent tree header + */ +struct ext4_extent_header { + u_int16_t eh_magic; /* magic number: 0xf30a */ + u_int16_t eh_ecount; /* number of valid entries */ + u_int16_t eh_max; /* capacity of store in entries */ + u_int16_t eh_depth; /* the depth of extent tree */ + u_int32_t eh_gen; /* generation of extent tree */ +}; + +/* + * save cached extent + */ +struct ext4_extent_cache { + daddr_t ec_start; /* extent start */ + u_int32_t ec_blk; /* logical block */ + u_int32_t ec_len; + u_int32_t ec_type; +}; + +/* + * save path to some extent. + */ +struct ext4_extent_path { + daddr_t ep_blk; + u_int16_t ep_depth; + struct buf *ep_bp; + struct ext4_extent *ep_ext; + struct ext4_extent_index *ep_index; + struct ext4_extent_header *ep_header; +}; + +struct inode; +struct m_ext2fs; +int ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *); +void ext4_ext_put_cache(struct inode *, struct ext4_extent *, int); +struct ext4_extent_path *ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *, + daddr_t, struct ext4_extent_path *); + +#endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */ diff -urN /usr/src/sys/fs/ext2fs/ext2_extern.h src/ext2_extern.h --- /usr/src/sys/fs/ext2fs/ext2_extern.h 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_extern.h 2010-08-22 22:41:52.000000000 +0800 @@ -54,7 +54,7 @@ void ext2_blkfree(struct inode *, int32_t, long); int32_t ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t); int ext2_bmap(struct vop_bmap_args *); -int ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *); +int ext2_bmaparray(struct vnode *, int32_t, int64_t *, int *, int *); void ext2_dirbad(struct inode *ip, doff_t offset, char *how); void ext2_ei2i(struct ext2fs_dinode *, struct inode *); int ext2_getlbns(struct vnode *, int32_t, struct indir *, int *); diff -urN /usr/src/sys/fs/ext2fs/ext2_inode.c src/ext2_inode.c --- /usr/src/sys/fs/ext2fs/ext2_inode.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_inode.c 2010-08-22 22:41:52.000000000 +0800 @@ -153,6 +153,7 @@ } fs = oip->i_e2fs; osize = oip->i_size; + /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest @@ -525,11 +526,15 @@ if (prtactive && vrefcnt(vp) != 0) vprint("ufs_reclaim: pushing active", vp); ip = VTOI(vp); + if (ip->i_flag & IN_LAZYMOD) { ip->i_flag |= IN_MODIFIED; ext2_update(vp, 0); } vfs_hash_remove(vp); + + mtx_destroy(&ip->i_ext_lock); + free(vp->v_data, M_EXT2NODE); vp->v_data = 0; vnode_destroy_vobject(vp); diff -urN /usr/src/sys/fs/ext2fs/ext2_inode_cnv.c src/ext2_inode_cnv.c --- /usr/src/sys/fs/ext2fs/ext2_inode_cnv.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_inode_cnv.c 2010-08-22 22:50:00.000000000 +0800 @@ -35,12 +35,15 @@ #include <fs/ext2fs/ext2fs.h> #include <fs/ext2fs/ext2_extern.h> #include <fs/ext2fs/ext2_dinode.h> +#include <fs/ext2fs/ext2_extents.h> void ext2_print_inode( in ) struct inode *in; { int i; + struct ext4_extent_header *ehp; + struct ext4_extent *ep; printf( "Inode: %5d", in->i_number); printf( /* "Inode: %5d" */ @@ -49,7 +52,7 @@ printf( "User: %5lu Group: %5lu Size: %lu\n", (unsigned long)in->i_uid, (unsigned long)in->i_gid, (unsigned long)in->i_size); - printf( "Links: %3d Blockcount: %d\n", + printf( "Links: %3d Blockcount: %lld\n", in->i_nlink, in->i_blocks); printf( "ctime: 0x%x", in->i_ctime); printf( "atime: 0x%x", in->i_atime); @@ -57,6 +60,15 @@ printf( "BLOCKS: "); for(i=0; i < (in->i_blocks <= 24 ? ((in->i_blocks+1)/2): 12); i++) printf("%d ", in->i_db[i]); + printf( "\n"); + + printf( "Extents:\n"); + ehp = (struct ext4_extent_header *)in->i_db; + printf( "Header (magic 0x%x entries %d max %d depth %d gen %d)\n", + ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, ehp->eh_gen); + ep = (struct ext4_extent *)((char *)(in->i_db) + sizeof(struct ext4_extent_header)); + printf( "Index (blk %d len %d start_lo %d start_hi %d)\n", + ep->e_blk, ep->e_len, ep->e_start_lo, ep->e_start_hi); printf("\n"); } @@ -77,17 +89,18 @@ I can see that this might lead to problems in an undelete. */ ip->i_mode = ei->e2di_nlink ? ei->e2di_mode : 0; - ip->i_size = ei->e2di_size; + ip->i_size = ei->e2di_size_lo; if (S_ISREG(ip->i_mode)) ip->i_size |= ((u_int64_t)ei->e2di_size_high) << 32; ip->i_atime = ei->e2di_atime; ip->i_mtime = ei->e2di_mtime; ip->i_ctime = ei->e2di_ctime; - ip->i_flags = 0; - ip->i_flags |= (ei->e2di_flags & EXT2_APPEND) ? SF_APPEND : 0; - ip->i_flags |= (ei->e2di_flags & EXT2_IMMUTABLE) ? SF_IMMUTABLE : 0; - ip->i_flags |= (ei->e2di_flags & EXT2_NODUMP) ? UF_NODUMP : 0; - ip->i_blocks = ei->e2di_nblock; + ip->i_flags = ei->e2di_flags; /* we need to entire flags to check new features */ + ip->i_gen = ei->e2di_gen; + if (ip->i_e2fs->e2fs->e2fs_features_incompat & EXT4F_ROCOMPAT_HUGE_FILE) + ip->i_blocks = ((int64_t)(ei->e2di_nblock_high)) << 32 | ei->e2di_nblock_lo; + else + ip->i_blocks = ei->e2di_nblock_lo; ip->i_gen = ei->e2di_gen; ip->i_uid = ei->e2di_uid; ip->i_gid = ei->e2di_gid; @@ -115,7 +128,7 @@ has been deleted, this would correspond to a zero link count */ ei->e2di_dtime = ei->e2di_nlink ? 0 : ip->i_mtime; - ei->e2di_size = ip->i_size; + ei->e2di_size_lo = ip->i_size; if (S_ISREG(ip->i_mode)) ei->e2di_size_high = ip->i_size >> 32; ei->e2di_atime = ip->i_atime; @@ -126,7 +139,7 @@ ei->e2di_flags |= (ip->i_flags & SF_APPEND) ? EXT2_APPEND: 0; ei->e2di_flags |= (ip->i_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE: 0; ei->e2di_flags |= (ip->i_flags & UF_NODUMP) ? EXT2_NODUMP: 0; - ei->e2di_nblock = ip->i_blocks; + ei->e2di_nblock_lo = ip->i_blocks; ei->e2di_gen = ip->i_gen; ei->e2di_uid = ip->i_uid; ei->e2di_gid = ip->i_gid; diff -urN /usr/src/sys/fs/ext2fs/ext2_readwrite.c src/ext2_readwrite.c --- /usr/src/sys/fs/ext2fs/ext2_readwrite.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_readwrite.c 2010-08-22 22:41:52.000000000 +0800 @@ -36,6 +36,9 @@ * $FreeBSD: src/sys/fs/ext2fs/ext2_readwrite.c,v 1.1 2010/01/14 14:30:54 lulf Exp $ */ +#include <fs/ext2fs/ext2_dinode.h> +#include <fs/ext2fs/ext2_extents.h> + /* XXX TODO: remove these obfuscations (as in ffs_vnops.c). */ #define BLKSIZE(a, b, c) blksize(a, b, c) #define FS struct m_ext2fs @@ -45,17 +48,124 @@ #define WRITE ext2_write #define WRITE_S "ext2_write" +static int ext4_ext_read(struct vop_read_args *); +static int ext2_ind_read(struct vop_read_args *); + /* - * Vnode op for reading. + * this function handles ext4 extents block mapping */ static int -READ(ap) - struct vop_read_args /* { - struct vnode *a_vp; - struct uio *a_uio; - int a_ioflag; - struct ucred *a_cred; - } */ *ap; +ext4_ext_read(struct vop_read_args *ap) +{ + struct vnode *vp; + struct inode *ip; + struct uio *uio; + struct m_ext2fs *fs; + struct buf *bp; + struct ext4_extent nex, *ep; + struct ext4_extent_header *ehp; + struct ext4_extent_path path; + daddr_t lbn, nextlbn, newblk = 0; + off_t bytesinfile; + u_short mode; + int cache_type; + int orig_resid; + int error = 0; + int depth = 0; + long size, xfersize, blkoffset; + + vp = ap->a_vp; + ip = VTOI(vp); + mode = ip->i_mode; + uio = ap->a_uio; + memset(&path, 0, sizeof(path)); + + orig_resid = uio->uio_resid; + KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); + if (orig_resid == 0) + return (0); + KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); + fs = ip->I_FS; + if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) + return (EOVERFLOW); + + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + lbn = lblkno(fs, uio->uio_offset); + nextlbn = lbn + 1; + size = BLKSIZE(fs, ip, lbn); + blkoffset = blkoff(fs, uio->uio_offset); + + xfersize = fs->e2fs_fsize - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + /* get block from ext4 extent cache */ + cache_type = ext4_ext_in_cache(ip, lbn, &nex); + if (cache_type != 0) { + /* block does not be allocated yet */ + if (cache_type == EXT4_EXT_CACHE_GAP) + return (error); + else if (cache_type == EXT4_EXT_CACHE_IN) + newblk = lbn - nex.e_blk + + (nex.e_start_lo | ((daddr_t)(nex.e_start_hi) << 31) << 1); + } else { + ext4_ext_find_extent(fs, ip, lbn, &path); + depth = ((struct ext4_extent_header *)(ip->i_db))->eh_depth; + if (path.ep_ext == NULL && depth != 0) + return (EIO); + + ehp = path.ep_header; + ep = path.ep_ext; + if (ep == NULL) + return (EIO); + + ext4_ext_put_cache(ip, ep, EXT4_EXT_CACHE_IN); + + newblk = lbn - ep->e_blk + + (ep->e_start_lo | ((daddr_t)(ep->e_start_hi) << 31) << 1); + + if (path.ep_bp != NULL) { + brelse(path.ep_bp); + path.ep_bp = NULL; + } + } + + error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, NOCRED, &bp); + if (error) { + brelse(bp); + bp = NULL; + break; + } + + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) + break; + xfersize = size; + } + error = uiomove((char *)bp->b_data + blkoffset, + (int)xfersize, uio); + if (error) + break; + + bqrelse(bp); + } + + if (bp != NULL) + bqrelse(bp); + + return (error); +} + +/* + * this function handles traditional block mapping + */ +static int +ext2_ind_read(struct vop_read_args *ap) { struct vnode *vp; struct inode *ip; @@ -152,6 +262,35 @@ } /* + * Vnode op for reading. + */ +static int +READ(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp; + struct inode *ip; + int error; + + vp = ap->a_vp; + ip = VTOI(vp); + + /*EXT4_EXT_LOCK(ip);*/ + if (ip->i_flags & EXT4_EXTENTS) + error = ext4_ext_read(ap); + else + error = ext2_ind_read(ap); + /*EXT4_EXT_UNLOCK(ip);*/ + + return (error); +} + +/* * Vnode op for writing. */ static int diff -urN /usr/src/sys/fs/ext2fs/ext2_subr.c src/ext2_subr.c --- /usr/src/sys/fs/ext2fs/ext2_subr.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_subr.c 2010-08-22 22:41:52.000000000 +0800 @@ -50,6 +50,7 @@ #include <fs/ext2fs/ext2_extern.h> #include <fs/ext2fs/ext2fs.h> #include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extents.h> #ifdef KDB void ext2_checkoverlap(struct buf *, struct inode *); @@ -70,22 +71,57 @@ struct inode *ip; struct m_ext2fs *fs; struct buf *bp; + struct ext4_extent *ep; + struct ext4_extent_header *ehp; + struct ext4_extent_path path; int32_t lbn; int bsize, error; + int depth; + daddr_t newblk; ip = VTOI(vp); fs = ip->i_e2fs; lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); + memset(&path, 0, sizeof(path)); *bpp = NULL; - if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { - brelse(bp); - return (error); - } - if (res) - *res = (char *)bp->b_data + blkoff(fs, offset); - *bpp = bp; + if (ext4_ext_find_extent(fs, ip, lbn, &path) == NULL) + goto normal; + depth = ((struct ext4_extent_header *)(ip->i_db))->eh_depth; + if (path.ep_ext == NULL && depth != 0) + goto normal; + ehp = path.ep_header; + ep = path.ep_ext; + if (ep == NULL) + goto normal; + + newblk = lbn - ep->e_blk + + (ep->e_start_lo | ((daddr_t)(ep->e_start_hi) << 31) << 1); + + if (path.ep_bp != NULL) { + brelse(path.ep_bp); + path.ep_bp = NULL; + } + if ((error = bread(ip->i_devvp, fsbtodb(fs, newblk), bsize, NOCRED, &bp)) != 0) { + brelse(bp); + return (error); + } + if (res) + *res = (char *)bp->b_data + blkoff(fs, offset); + *bpp = bp; + return (0); + +normal: + if (*bpp == NULL) { + if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { + brelse(bp); + return (error); + } + if (res) + *res = (char *)bp->b_data + blkoff(fs, offset); + *bpp = bp; + } return (0); } diff -urN /usr/src/sys/fs/ext2fs/ext2_vfsops.c src/ext2_vfsops.c --- /usr/src/sys/fs/ext2fs/ext2_vfsops.c 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2_vfsops.c 2010-08-22 22:41:52.000000000 +0800 @@ -51,6 +51,9 @@ #include <sys/malloc.h> #include <sys/stat.h> #include <sys/mutex.h> +#include <sys/types.h> + +#include <machine/atomic.h> #include <geom/geom.h> #include <geom/geom_vfs.h> @@ -61,6 +64,7 @@ #include <fs/ext2fs/fs.h> #include <fs/ext2fs/ext2_extern.h> #include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_dinode.h> static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); static int ext2_mountfs(struct vnode *, struct mount *); @@ -288,7 +292,8 @@ return (1); } if (es->e2fs_rev > E2FS_REV0) { - if (es->e2fs_features_incompat & ~EXT2F_INCOMPAT_SUPP) { + /*if (es->e2fs_features_incompat & ~EXT2F_INCOMPAT_SUPP) {*/ + if (es->e2fs_features_incompat & ~EXT4F_INCOMPAT_SUPP) { printf( "WARNING: mount of %s denied due to unsupported optional features\n", devtoname(dev)); @@ -334,7 +339,6 @@ } else { fs->e2fs_first_inode = es->e2fs_first_ino; fs->e2fs_isize = es->e2fs_inode_size; - /* * Simple sanity check for superblock inode size value. */ @@ -350,8 +354,9 @@ fs->e2fs_itpg = fs->e2fs_ipg /fs->e2fs_ipb; fs->e2fs_descpb = fs->e2fs_bsize / sizeof (struct ext2_gd); /* s_resuid / s_resgid ? */ - fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock + - EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs); + fs->e2fs_gcount = (((int64_t)(es->e2fs_bcount_hi) << 32 | es->e2fs_bcount_lo) + - es->e2fs_first_dblock + EXT2_BLOCKS_PER_GROUP(fs) - 1) / + EXT2_BLOCKS_PER_GROUP(fs); db_count = (fs->e2fs_gcount + EXT2_DESC_PER_BLOCK(fs) - 1) / EXT2_DESC_PER_BLOCK(fs); fs->e2fs_gdbcount = db_count; @@ -383,9 +388,10 @@ brelse(bp); bp = NULL; } + fs->e2fs_total_dir = 0; for (i=0; i < fs->e2fs_gcount; i++){ - fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs; + fs->e2fs_total_dir += (fs->e2fs_gd[i].ext2bgd_ndirs_lo); fs->e2fs_contigdirs[i] = 0; } if (es->e2fs_rev == E2FS_REV0 || @@ -393,6 +399,25 @@ fs->e2fs_maxfilesize = 0x7fffffff; else fs->e2fs_maxfilesize = 0x7fffffffffffffff; + + /* check inode size */ + if (fs->e2fs_isize > E2FS_REV0_INODE_SIZE) { + fs->e2fs_want_extra_isize = sizeof(struct ext2fs_dinode) - + E2FS_REV0_INODE_SIZE; + + if (es->e2fs_features_rocompat & EXT4F_ROCOMPAT_EXTRA_ISIZE) { + if (fs->e2fs_want_extra_isize < es->e2fs_want_extra_isize) + fs->e2fs_want_extra_isize = es->e2fs_want_extra_isize; + if (fs->e2fs_want_extra_isize < es->e2fs_min_extra_isize) + fs->e2fs_want_extra_isize = es->e2fs_min_extra_isize; + } + } + + if (E2FS_REV0_INODE_SIZE + fs->e2fs_want_extra_isize > + fs->e2fs_isize) + printf("EXT2-fs: no space for extra inode.\n"); + + return (0); } @@ -745,9 +770,12 @@ sbp->f_bsize = EXT2_FRAG_SIZE(fs); sbp->f_iosize = EXT2_BLOCK_SIZE(fs); - sbp->f_blocks = fs->e2fs->e2fs_bcount - overhead; - sbp->f_bfree = fs->e2fs->e2fs_fbcount; - sbp->f_bavail = sbp->f_bfree - fs->e2fs->e2fs_rbcount; + sbp->f_blocks = ((int64_t)(fs->e2fs->e2fs_bcount_hi) << 32 | + fs->e2fs->e2fs_bcount_lo) - overhead; + sbp->f_bfree = ((int64_t)(fs->e2fs->e2fs_fbcount_hi) << 32 | + fs->e2fs->e2fs_fbcount_lo); + sbp->f_bavail = sbp->f_bfree - ((int64_t)(fs->e2fs->e2fs_rbcount_hi) << 32 | + fs->e2fs->e2fs_rbcount_lo); sbp->f_files = fs->e2fs->e2fs_icount; sbp->f_ffree = fs->e2fs->e2fs_ficount; return (0); @@ -853,8 +881,8 @@ struct vnode *vp; struct cdev *dev; struct thread *td; - int i, error; - int used_blocks; + int error; + int i, used_blocks; td = curthread; error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); @@ -910,6 +938,7 @@ *vpp = NULL; return (error); } + /* convert ext2 inode to dinode */ ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); @@ -919,19 +948,31 @@ ip->i_prealloc_count = 0; ip->i_prealloc_block = 0; + /* initialize ext lock */ + bzero(&ip->i_ext_lock, sizeof(struct mtx)); + mtx_init(&ip->i_ext_lock, "inode ext lock", NULL, MTX_DEF); + /* * Now we want to make sure that block pointers for unused * blocks are zeroed out - ext2_balloc depends on this * although for regular files and directories only */ - if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { - used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize; - for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) - ip->i_db[i] = 0; - } -/* - ext2_print_inode(ip); -*/ + + /* + * NOTE: When ext4 file system use extents, we don't zero + * block pointers. + */ + if (!(fs->e2fs->e2fs_features_incompat & EXT4F_INCOMPAT_EXTENTS)) { + if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { + used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize; + for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) + ip->i_db[i] = 0; + } + } + + + /*ext2_print_inode(ip);*/ + bqrelse(bp); /* diff -urN /usr/src/sys/fs/ext2fs/ext2_vnops.c src/ext2_vnops.c --- /usr/src/sys/fs/ext2fs/ext2_vnops.c 2010-02-20 18:19:19.000000000 +0800 +++ src/ext2_vnops.c 2010-08-22 22:41:52.000000000 +0800 @@ -1419,7 +1419,7 @@ struct vnode *vp = ap->a_vp; struct inode *ip; struct bufobj *bo; - int32_t blkno; + int64_t blkno; int error; ip = VTOI(vp); diff -urN /usr/src/sys/fs/ext2fs/ext2fs.h src/ext2fs.h --- /usr/src/sys/fs/ext2fs/ext2fs.h 2010-01-14 22:30:54.000000000 +0800 +++ src/ext2fs.h 2010-08-22 22:50:30.000000000 +0800 @@ -38,6 +38,7 @@ #define _FS_EXT2FS_EXT2_FS_H #include <sys/types.h> +#include <sys/lock.h> /* * Special inode numbers @@ -71,7 +72,7 @@ /* * Maximal count of links to a file */ -#define EXT2_LINK_MAX 32000 +#define EXT2_LINK_MAX 65000 /* * Constants relative to the data blocks @@ -95,9 +96,9 @@ */ struct ext2fs { u_int32_t e2fs_icount; /* Inode count */ - u_int32_t e2fs_bcount; /* blocks count */ - u_int32_t e2fs_rbcount; /* reserved blocks count */ - u_int32_t e2fs_fbcount; /* free blocks count */ + u_int32_t e2fs_bcount_lo; /* blocks count */ + u_int32_t e2fs_rbcount_lo; /* reserved blocks count */ + u_int32_t e2fs_fbcount_lo; /* free blocks count */ u_int32_t e2fs_ficount; /* free inodes count */ u_int32_t e2fs_first_dblock; /* first data block */ u_int32_t e2fs_log_bsize; /* block size = 1024*(2^e2fs_log_bsize) */ @@ -130,8 +131,36 @@ char e2fs_vname[16]; /* volume name */ char e2fs_fsmnt[64]; /* name mounted on */ u_int32_t e2fs_algo; /* For comcate for dir */ - u_int16_t e2fs_reserved_ngdb; /* # of reserved gd blocks for resize */ - u_int32_t reserved2[204]; + u_int8_t e2fs_prealloc_blk; /* number of blocks to try to preallocate */ + u_int8_t e2fs_prealloc_dblk; /* number of dirs to preallocate */ + u_int16_t e2fs_reserved_ngdb; /* # of reserved gd blocks for resize */ + u_int8_t e2fs_journal_uuid[16]; /* uuid of journal superblock */ + u_int32_t e2fs_journal_inum; /* inode number of journal file */ + u_int32_t e2fs_journal_dev; /* device number of journal file */ + u_int32_t e2fs_last_orphan; /* start of list of inodes to delete */ + u_int32_t e2fs_hash_seed[4]; /* HTREE hash seed */ + u_int8_t e2fs_def_hash_ver; /* default hash version to use */ + u_int8_t e2fs_char_pad; + u_int16_t e2fs_desc_size; /* size of group descriptor */ + u_int32_t e2fs_def_mnt_opts; + u_int32_t e2fs_first_meta_bg; /* first metablock block group */ + u_int32_t e2fs_mkfs_time; /* when the fs was created */ + u_int32_t e2fs_jnl_blks[17]; /* backup of the journal inode */ + u_int32_t e2fs_bcount_hi; /* block count */ + u_int32_t e2fs_rbcount_hi; /* reserved blocks count */ + u_int32_t e2fs_fbcount_hi; /* free blocks count */ + u_int16_t e2fs_min_extra_isize;/* all inodes have at least some bytes */ + u_int16_t e2fs_want_extra_isize; /* new inodes should reserve some bytes */ + u_int32_t e2fs_flags; /* miscellaneous flags */ + u_int16_t e2fs_raid_stride; /* RAID stride */ + u_int16_t e2fs_mmpintv; /* number of seconds to wait in MMP checking */ + u_int64_t e2fs_mmpblk; /* block for multi-mount protection */ + u_int32_t e2fs_raid_stripe_wid;/* blocks on all data disks (N * stride) */ + u_int8_t e2fs_log_gpf; /* FLEX_BG group size */ + u_int8_t e2fs_char_pad2; + u_int16_t e2fs_pad; + u_int64_t e2fs_kbytes_written; /* number of lifetime kilobytes written */ + u_int32_t reserved2[160]; }; @@ -173,7 +202,10 @@ uint8_t *e2fs_contigdirs; char e2fs_wasvalid; /* valid at mount time */ off_t e2fs_maxfilesize; - struct ext2_gd *e2fs_gd; /* Group Descriptors */ + struct ext2_gd *e2fs_gd; /* Group Descriptors */ + + u_int16_t e2fs_min_extra_isize; /* all inodes have at least some bytes */ + u_int16_t e2fs_want_extra_isize; /* new inodes should reserve some bytes */ }; /* @@ -198,13 +230,28 @@ */ #define EXT2F_COMPAT_PREALLOC 0x0001 #define EXT2F_COMPAT_RESIZE 0x0010 +#define EXT4F_COMPAT_IMAGIC_INODES 0x0002 +#define EXT4F_COMPAT_HAS_JOURNAL 0x0004 +#define EXT4F_COMPAT_EXT_ATTR 0x0008 +#define EXT4F_COMPAT_DIR_INDEX 0x0020 #define EXT2F_ROCOMPAT_SPARSESUPER 0x0001 #define EXT2F_ROCOMPAT_LARGEFILE 0x0002 #define EXT2F_ROCOMPAT_BTREE_DIR 0x0004 +#define EXT4F_ROCOMPAT_HUGE_FILE 0x0008 +#define EXT4F_ROCOMPAT_GDT_CSUM 0x0010 +#define EXT4F_ROCOMPAT_DIR_NLINK 0x0020 +#define EXT4F_ROCOMPAT_EXTRA_ISIZE 0x0040 #define EXT2F_INCOMPAT_COMP 0x0001 #define EXT2F_INCOMPAT_FTYPE 0x0002 +#define EXT4F_INCOMPAT_RECOVER 0x0004 +#define EXT4F_INCOMPAT_JOURNAL_DEV 0x0008 +#define EXT4F_INCOMPAT_META_BG 0x0010 +#define EXT4F_INCOMPAT_EXTENTS 0x0040 +#define EXT4F_INCOMPAT_64BIT 0x0080 +#define EXT4F_INCOMPAT_MMP 0x0100 +#define EXT4F_INCOMPAT_FLEX_BG 0x0200 /* * Features supported in this implementation @@ -220,6 +267,20 @@ #define EXT2F_INCOMPAT_SUPP EXT2F_INCOMPAT_FTYPE /* + * Features supported in ext4 read-only mode + */ +#define EXT4F_INCOMPAT_SUPP (EXT2F_INCOMPAT_FTYPE \ + | EXT4F_INCOMPAT_EXTENTS \ + | EXT4F_INCOMPAT_FLEX_BG) +#define EXT4F_ROCOMPAT_SUPP (EXT2F_ROCOMPAT_SPARSESUPER \ + | EXT2F_ROCOMPAT_LARGEFILE \ + | EXT2F_ROCOMPAT_BTREE_DIR \ + | EXT4F_ROCOMPAT_GDT_CSUM \ + | EXT4F_ROCOMPAT_DIR_NLINK \ + | EXT4F_ROCOMPAT_EXTRA_ISIZE \ + | EXT4F_ROCOMPAT_HUGE_FILE) + +/* * Feature set definitions */ #define EXT2_HAS_COMPAT_FEATURE(sb,mask) \ @@ -255,14 +316,26 @@ /* ext2 file system block group descriptor */ struct ext2_gd { - u_int32_t ext2bgd_b_bitmap; /* blocks bitmap block */ - u_int32_t ext2bgd_i_bitmap; /* inodes bitmap block */ - u_int32_t ext2bgd_i_tables; /* inodes table block */ - u_int16_t ext2bgd_nbfree; /* number of free blocks */ - u_int16_t ext2bgd_nifree; /* number of free inodes */ - u_int16_t ext2bgd_ndirs; /* number of directories */ - u_int16_t reserved; - u_int32_t reserved2[3]; + u_int32_t ext2bgd_b_bitmap_lo; /* blocks bitmap block */ + u_int32_t ext2bgd_i_bitmap_lo; /* inodes bitmap block */ + u_int32_t ext2bgd_i_tables_lo; /* inodes table block */ + u_int16_t ext2bgd_nbfree_lo; /* number of free blocks */ + u_int16_t ext2bgd_nifree_lo; /* number of free inodes */ + u_int16_t ext2bgd_ndirs_lo; /* number of directories */ + u_int16_t ext2bgd_flags; /* EXT4_BG_flags */ +#if 0 + u_int32_t reserved[2]; + u_int16_t ext2bgd_i_tables_unused_lo; /* number of unused inodes */ + u_int16_t ext2bgd_chksum; /* crc16 checksum */ + u_int32_t ext2bgd_b_bitmap_hi; /* blocks bitmap block MSB */ + u_int32_t ext2bgd_i_bitmap_hi; /* inodes bitmap block MSB */ + u_int32_t ext2bgd_i_tables_hi; /* inodes table block MSB */ + u_int16_t ext2bgd_nbfree_hi; /* number of free blocks MSB */ + u_int16_t ext2bgd_nifree_hi; /* number of free inodes MSB */ + u_int16_t ext2bgd_ndirs_hi; /* number of directories MSB */ + u_int16_t ext2bgd_i_tables_unused_hi; /* number of unused inodes MSB */ +#endif + u_int32_t reserved2[3]; }; /* EXT2FS metadatas are stored in little-endian byte order. These macros @@ -275,7 +348,7 @@ * Macro-instructions used to manage several block sizes */ #define EXT2_MIN_BLOCK_SIZE 1024 -#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MAX_BLOCK_SIZE 65536 #define EXT2_MIN_BLOCK_LOG_SIZE 10 #if defined(_KERNEL) # define EXT2_BLOCK_SIZE(s) ((s)->e2fs_bsize) diff -urN /usr/src/sys/fs/ext2fs/fs.h src/fs.h --- /usr/src/sys/fs/ext2fs/fs.h 2010-01-14 22:30:54.000000000 +0800 +++ src/fs.h 2010-08-22 22:41:52.000000000 +0800 @@ -93,7 +93,7 @@ /* get block containing inode from its number x */ #define ino_to_fsba(fs, x) \ - ((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables + \ + ((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables_lo + \ (((x) - 1) % (fs)->e2fs->e2fs_ipg) / (fs)->e2fs_ipb) /* get offset for inode in block */ diff -urN /usr/src/sys/fs/ext2fs/inode.h src/inode.h --- /usr/src/sys/fs/ext2fs/inode.h 2010-01-14 22:30:54.000000000 +0800 +++ src/inode.h 2010-08-22 22:41:52.000000000 +0800 @@ -38,9 +38,13 @@ #ifndef _FS_EXT2FS_INODE_H_ #define _FS_EXT2FS_INODE_H_ +#include <sys/param.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/queue.h> +#include <fs/ext2fs/ext2_extents.h> + #define ROOTINO ((ino_t)2) #define NDADDR 12 /* Direct addresses in inode. */ @@ -85,7 +89,7 @@ /* Fields from struct dinode in UFS. */ u_int16_t i_mode; /* IFMT, permissions; see below. */ - int16_t i_nlink; /* File link count. */ + u_int16_t i_nlink; /* File link count. */ u_int64_t i_size; /* File byte count. */ int32_t i_atime; /* Last access time. */ int32_t i_atimensec; /* Last access time. */ @@ -96,10 +100,15 @@ int32_t i_db[NDADDR]; /* Direct disk blocks. */ int32_t i_ib[NIADDR]; /* Indirect disk blocks. */ u_int32_t i_flags; /* Status flags (chflags). */ - int32_t i_blocks; /* Blocks actually held. */ + int64_t i_blocks; /* Blocks actually held. */ int32_t i_gen; /* Generation number. */ u_int32_t i_uid; /* File owner. */ u_int32_t i_gid; /* File group. */ + + /* ext4 extents support */ + struct mtx i_ext_lock; /* this lock only is required in read/write mode + but we still use it in read-only mode. */ + struct ext4_extent_cache i_ext_cache; /* cache for ext4 extent */ }; /* --------------000709010802090406030102--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4C71075C.9010802>