From owner-svn-src-all@FreeBSD.ORG Fri Mar 27 15:57:53 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 6D1581065673; Fri, 27 Mar 2009 15:57:53 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 54B2F8FC22; Fri, 27 Mar 2009 15:57:53 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n2RFvrf6065251; Fri, 27 Mar 2009 15:57:53 GMT (envelope-from kib@svn.freebsd.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n2RFvruq065248; Fri, 27 Mar 2009 15:57:53 GMT (envelope-from kib@svn.freebsd.org) Message-Id: <200903271557.n2RFvruq065248@svn.freebsd.org> From: Konstantin Belousov Date: Fri, 27 Mar 2009 15:57:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r190470 - in stable/7/sys: . contrib/pf dev/ath/ath_hal dev/cxgb ufs/ffs ufs/ufs X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 27 Mar 2009 15:57:59 -0000 Author: kib Date: Fri Mar 27 15:57:52 2009 New Revision: 190470 URL: http://svn.freebsd.org/changeset/base/190470 Log: MFC r189737: The non-modifying EA VOPs are executed with only shared vnode lock taken. Provide a custom lock around initializing and tearing down EA area, to prevent both memory leaks and double-free of it. Count the number of EA area accessors. Put the new field i_ea_refs to the end of the struct inode to keep structure layout stable on releng branch for the benefit of lsof. MFC r190469: Correct typo. ABI issue pointed out by: kensmith Approved by: re (kensmith) Modified: stable/7/sys/ (props changed) stable/7/sys/contrib/pf/ (props changed) stable/7/sys/dev/ath/ath_hal/ (props changed) stable/7/sys/dev/cxgb/ (props changed) stable/7/sys/ufs/ffs/ffs_vfsops.c stable/7/sys/ufs/ffs/ffs_vnops.c stable/7/sys/ufs/ufs/inode.h Modified: stable/7/sys/ufs/ffs/ffs_vfsops.c ============================================================================== --- stable/7/sys/ufs/ffs/ffs_vfsops.c Fri Mar 27 15:46:02 2009 (r190469) +++ stable/7/sys/ufs/ffs/ffs_vfsops.c Fri Mar 27 15:57:52 2009 (r190470) @@ -1451,6 +1451,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags ip->i_fs = fs; ip->i_dev = dev; ip->i_number = ino; + ip->i_ea_refs = 0; #ifdef QUOTA { int i; Modified: stable/7/sys/ufs/ffs/ffs_vnops.c ============================================================================== --- stable/7/sys/ufs/ffs/ffs_vnops.c Fri Mar 27 15:46:02 2009 (r190469) +++ stable/7/sys/ufs/ffs/ffs_vnops.c Fri Mar 27 15:57:52 2009 (r190470) @@ -1225,6 +1225,35 @@ ffs_rdextattr(u_char **p, struct vnode * return (0); } +static void +ffs_lock_ea(struct vnode *vp) +{ + struct inode *ip; + + ip = VTOI(vp); + VI_LOCK(vp); + while (ip->i_flag & IN_EA_LOCKED) { + ip->i_flag |= IN_EA_LOCKWAIT; + msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea", + 0); + } + ip->i_flag |= IN_EA_LOCKED; + VI_UNLOCK(vp); +} + +static void +ffs_unlock_ea(struct vnode *vp) +{ + struct inode *ip; + + ip = VTOI(vp); + VI_LOCK(vp); + if (ip->i_flag & IN_EA_LOCKWAIT) + wakeup(&ip->i_ea_refs); + ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT); + VI_UNLOCK(vp); +} + static int ffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) { @@ -1234,14 +1263,22 @@ ffs_open_ea(struct vnode *vp, struct ucr ip = VTOI(vp); - if (ip->i_ea_area != NULL) - return (EBUSY); + ffs_lock_ea(vp); + if (ip->i_ea_area != NULL) { + ip->i_ea_refs++; + ffs_unlock_ea(vp); + return (0); + } dp = ip->i_din2; error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); - if (error) + if (error) { + ffs_unlock_ea(vp); return (error); + } ip->i_ea_len = dp->di_extsize; ip->i_ea_error = 0; + ip->i_ea_refs++; + ffs_unlock_ea(vp); return (0); } @@ -1258,11 +1295,16 @@ ffs_close_ea(struct vnode *vp, int commi struct ufs2_dinode *dp; ip = VTOI(vp); - if (ip->i_ea_area == NULL) + + ffs_lock_ea(vp); + if (ip->i_ea_area == NULL) { + ffs_unlock_ea(vp); return (EINVAL); + } dp = ip->i_din2; error = ip->i_ea_error; if (commit && error == 0) { + ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit"); if (cred == NOCRED) cred = vp->v_mount->mnt_cred; liovec.iov_base = ip->i_ea_area; @@ -1279,10 +1321,13 @@ ffs_close_ea(struct vnode *vp, int commi error = ffs_truncate(vp, 0, IO_EXT, cred, td); error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); } - free(ip->i_ea_area, M_TEMP); - ip->i_ea_area = NULL; - ip->i_ea_len = 0; - ip->i_ea_error = 0; + if (--ip->i_ea_refs == 0) { + free(ip->i_ea_area, M_TEMP); + ip->i_ea_area = NULL; + ip->i_ea_len = 0; + ip->i_ea_error = 0; + } + ffs_unlock_ea(vp); return (error); } @@ -1392,7 +1437,6 @@ vop_deleteextattr { uint32_t ealength, ul; int ealen, olen, eapad1, eapad2, error, i, easize; u_char *eae, *p; - int stand_alone; ip = VTOI(ap->a_vp); fs = ip->i_fs; @@ -1409,19 +1453,19 @@ vop_deleteextattr { error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, IWRITE); if (error) { + + /* + * ffs_lock_ea is not needed there, because the vnode + * must be exclusively locked. + */ if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = error; return (error); } - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } + error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); + if (error) + return (error); ealength = eapad1 = ealen = eapad2 = 0; @@ -1434,8 +1478,7 @@ vop_deleteextattr { if (olen == -1) { /* delete but nonexistent */ free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); + ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); return(ENOATTR); } bcopy(p, &ul, sizeof ul); @@ -1446,9 +1489,8 @@ vop_deleteextattr { } if (easize > NXADDR * fs->fs_bsize) { free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - else if (ip->i_ea_error == 0) + ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); + if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = ENOSPC; return(ENOSPC); } @@ -1456,8 +1498,7 @@ vop_deleteextattr { ip->i_ea_area = eae; ip->i_ea_len = easize; free(p, M_TEMP); - if (stand_alone) - error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); + error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); return(error); } @@ -1482,7 +1523,7 @@ vop_getextattr { struct fs *fs; u_char *eae, *p; unsigned easize; - int error, ealen, stand_alone; + int error, ealen; ip = VTOI(ap->a_vp); fs = ip->i_fs; @@ -1495,14 +1536,10 @@ vop_getextattr { if (error) return (error); - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } + error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); + if (error) + return (error); + eae = ip->i_ea_area; easize = ip->i_ea_len; @@ -1516,8 +1553,8 @@ vop_getextattr { error = uiomove(p, ealen, ap->a_uio); } else error = ENOATTR; - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); + + ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); return(error); } @@ -1542,7 +1579,7 @@ vop_listextattr { u_char *eae, *p, *pe, *pn; unsigned easize; uint32_t ul; - int error, ealen, stand_alone; + int error, ealen; ip = VTOI(ap->a_vp); fs = ip->i_fs; @@ -1555,14 +1592,9 @@ vop_listextattr { if (error) return (error); - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } + error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); + if (error) + return (error); eae = ip->i_ea_area; easize = ip->i_ea_len; @@ -1586,8 +1618,7 @@ vop_listextattr { error = uiomove(p, ealen + 1, ap->a_uio); } } - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); + ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); return(error); } @@ -1612,7 +1643,6 @@ vop_setextattr { uint32_t ealength, ul; int ealen, olen, eapad1, eapad2, error, i, easize; u_char *eae, *p; - int stand_alone; ip = VTOI(ap->a_vp); fs = ip->i_fs; @@ -1633,19 +1663,19 @@ vop_setextattr { error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, ap->a_cred, ap->a_td, IWRITE); if (error) { + + /* + * ffs_lock_ea is not needed there, because the vnode + * must be exclusively locked. + */ if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = error; return (error); } - if (ip->i_ea_area == NULL) { - error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); - if (error) - return (error); - stand_alone = 1; - } else { - stand_alone = 0; - } + error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); + if (error) + return (error); ealen = ap->a_uio->uio_resid; ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); @@ -1677,9 +1707,8 @@ vop_setextattr { } if (easize > NXADDR * fs->fs_bsize) { free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - else if (ip->i_ea_error == 0) + ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); + if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = ENOSPC; return(ENOSPC); } @@ -1695,9 +1724,8 @@ vop_setextattr { error = uiomove(p, ealen, ap->a_uio); if (error) { free(eae, M_TEMP); - if (stand_alone) - ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); - else if (ip->i_ea_error == 0) + ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); + if (ip->i_ea_area != NULL && ip->i_ea_error == 0) ip->i_ea_error = error; return(error); } @@ -1708,8 +1736,7 @@ vop_setextattr { ip->i_ea_area = eae; ip->i_ea_len = easize; free(p, M_TEMP); - if (stand_alone) - error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); + error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); return(error); } Modified: stable/7/sys/ufs/ufs/inode.h ============================================================================== --- stable/7/sys/ufs/ufs/inode.h Fri Mar 27 15:46:02 2009 (r190469) +++ stable/7/sys/ufs/ufs/inode.h Fri Mar 27 15:57:52 2009 (r190470) @@ -112,6 +112,12 @@ struct inode { struct ufs1_dinode *din1; /* UFS1 on-disk dinode. */ struct ufs2_dinode *din2; /* UFS2 on-disk dinode. */ } dinode_u; + + /* + * i_ea_refs is added at the end of the structure to + * keep struct inode layout on releng branch. + */ + int i_ea_refs; /* Number of users of EA area */ }; /* * These flags are kept in i_flag. @@ -125,6 +131,8 @@ struct inode { #define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */ #define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the suspension finished */ +#define IN_EA_LOCKED 0x0200 +#define IN_EA_LOCKWAIT 0x0400 #define i_devvp i_ump->um_devvp #define i_umbufobj i_ump->um_bo