From owner-freebsd-current@FreeBSD.ORG Mon Dec 13 12:49:07 2004 Return-Path: Delivered-To: freebsd-current@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 3E7C016A4CE for ; Mon, 13 Dec 2004 12:49:07 +0000 (GMT) Received: from critter.freebsd.dk (critter.freebsd.dk [212.242.86.163]) by mx1.FreeBSD.org (Postfix) with ESMTP id 50D6943D39 for ; Mon, 13 Dec 2004 12:49:06 +0000 (GMT) (envelope-from phk@critter.freebsd.dk) Received: from critter.freebsd.dk (localhost [127.0.0.1]) by critter.freebsd.dk (8.13.1/8.13.1) with ESMTP id iBDCn2hW036795 for ; Mon, 13 Dec 2004 13:49:02 +0100 (CET) (envelope-from phk@critter.freebsd.dk) To: current@freebsd.org From: Poul-Henning Kamp Date: Mon, 13 Dec 2004 13:49:02 +0100 Message-ID: <36794.1102942142@critter.freebsd.dk> Sender: phk@critter.freebsd.dk Subject: [TEST/REVIEW] struct filedesc refcounting X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 13 Dec 2004 12:49:07 -0000 Add fdunshare() function and use it two places outside kern_descrip.c. (Now all the refcounting happens in kern_descrip.c). Move vfs_mount.c::checkdirs() to kern_descrip.c::mountcheckdirs(). Add a fd_holdcnt which prevents the struct filedesc from being deallocated. Add fdhold(struct proc *) which grabs a fd_holdcnt on the filedesc of the process passed. Add fddrop() which drops a fd_holdcnt and if zero kills the mutex and frees the filedesc memory. Use fdhold() and fddrop() in the sysctl and in mountcheckdirs(). diff -ur -x compile -x _* freebsd/src/sys/kern/kern_descrip.c phk/phk_bufwork/sys/kern/kern_descrip.c --- freebsd/src/sys/kern/kern_descrip.c Fri Dec 3 22:32:41 2004 +++ phk/phk_bufwork/sys/kern/kern_descrip.c Mon Dec 13 13:43:41 2004 @@ -1394,6 +1394,7 @@ /* Create the file descriptor table. */ newfdp->fd_fd.fd_refcnt = 1; + newfdp->fd_fd.fd_holdcnt = 1; newfdp->fd_fd.fd_cmask = CMASK; newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; @@ -1402,6 +1403,34 @@ return (&newfdp->fd_fd); } +static struct filedesc * +fdhold(struct proc *p) +{ + struct filedesc *fdp; + + mtx_lock(&fdesc_mtx); + fdp = p->p_fd; + if (fdp != NULL) + fdp->fd_holdcnt++; + mtx_unlock(&fdesc_mtx); + return (fdp); +} + +static void +fddrop(struct filedesc *fdp) +{ + int i; + + mtx_lock(&fdesc_mtx); + i = --fdp->fd_holdcnt; + mtx_unlock(&fdesc_mtx); + if (i > 0) + return; + + mtx_destroy(&fdp->fd_mtx); + FREE(fdp, M_FILEDESC); +} + /* * Share a filedesc structure. */ @@ -1415,6 +1444,25 @@ } /* + * Unshare a filedesc structure, if necessary by making a copy + */ +void +fdunshare(struct proc *p, struct thread *td) +{ + + FILEDESC_LOCK_FAST(p->p_fd); + if (p->p_fd->fd_refcnt > 1) { + struct filedesc *tmp; + + FILEDESC_UNLOCK_FAST(p->p_fd); + tmp = fdcopy(p->p_fd); + fdfree(td); + p->p_fd = tmp; + } else + FILEDESC_UNLOCK_FAST(p->p_fd); +} + +/* * Copy a filedesc structure. * A NULL pointer in returns a NULL reference, this is to ease callers, * not catch errors. @@ -1569,7 +1617,6 @@ * We are the last reference to the structure, so we can * safely assume it will not change out from under us. */ - FILEDESC_UNLOCK(fdp); fpp = fdp->fd_ofiles; for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp) @@ -1585,14 +1632,22 @@ FREE(fdp->fd_ofiles, M_FILEDESC); if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) FREE(fdp->fd_map, M_FILEDESC); + + fdp->fd_nfiles = 0; + if (fdp->fd_cdir) vrele(fdp->fd_cdir); + fdp->fd_cdir = NULL; if (fdp->fd_rdir) vrele(fdp->fd_rdir); + fdp->fd_rdir = NULL; if (fdp->fd_jdir) vrele(fdp->fd_jdir); - mtx_destroy(&fdp->fd_mtx); - FREE(fdp, M_FILEDESC); + fdp->fd_jdir = NULL; + + FILEDESC_UNLOCK(fdp); + + fddrop(fdp); } /* @@ -2237,6 +2292,50 @@ /* NOTREACHED */ } +/* + * Scan all active processes to see if any of them have a current + * or root directory of `olddp'. If so, replace them with the new + * mount point. + */ +void +mountcheckdirs(struct vnode *olddp, struct vnode *newdp) +{ + struct filedesc *fdp; + struct proc *p; + int nrele; + + if (vrefcnt(olddp) == 1) + return; + sx_slock(&allproc_lock); + LIST_FOREACH(p, &allproc, p_list) { + fdp = fdhold(p); + if (fdp == NULL) + continue; + nrele = 0; + FILEDESC_LOCK_FAST(fdp); + if (fdp->fd_cdir == olddp) { + vref(newdp); + fdp->fd_cdir = newdp; + nrele++; + } + if (fdp->fd_rdir == olddp) { + vref(newdp); + fdp->fd_rdir = newdp; + nrele++; + } + FILEDESC_UNLOCK_FAST(fdp); + while (nrele--) + vrele(olddp); + } + sx_sunlock(&allproc_lock); + if (rootvnode == olddp) { + vrele(rootvnode); + vref(newdp); + rootvnode = newdp; + } +} + + struct filedesc_to_leader * filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) { @@ -2316,11 +2415,9 @@ xf.xf_pid = p->p_pid; xf.xf_uid = p->p_ucred->cr_uid; PROC_UNLOCK(p); - mtx_lock(&fdesc_mtx); - if ((fdp = p->p_fd) == NULL) { - mtx_unlock(&fdesc_mtx); + fdp = fdhold(p); + if (fdp == NULL) continue; - } FILEDESC_LOCK_FAST(fdp); for (n = 0; n < fdp->fd_nfiles; ++n) { if ((fp = fdp->fd_ofiles[n]) == NULL) @@ -2339,7 +2436,7 @@ break; } FILEDESC_UNLOCK_FAST(fdp); - mtx_unlock(&fdesc_mtx); + fddrop(fdp); if (error) break; } diff -ur -x compile -x _* freebsd/src/sys/kern/kern_exec.c phk/phk_bufwork/sys/kern/kern_exec.c --- freebsd/src/sys/kern/kern_exec.c Sat Nov 27 09:20:24 2004 +++ phk/phk_bufwork/sys/kern/kern_exec.c Mon Dec 13 13:01:24 2004 @@ -465,16 +465,7 @@ * For security and other reasons, the file descriptor table cannot * be shared after an exec. */ - FILEDESC_LOCK_FAST(p->p_fd); - if (p->p_fd->fd_refcnt > 1) { - struct filedesc *tmp; - - FILEDESC_UNLOCK_FAST(p->p_fd); - tmp = fdcopy(p->p_fd); - fdfree(td); - p->p_fd = tmp; - } else - FILEDESC_UNLOCK_FAST(p->p_fd); + fdunshare(p, td); /* * Malloc things before we need locks. diff -ur -x compile -x _* freebsd/src/sys/kern/kern_fork.c phk/phk_bufwork/sys/kern/kern_fork.c --- freebsd/src/sys/kern/kern_fork.c Sat Nov 27 09:20:24 2004 +++ phk/phk_bufwork/sys/kern/kern_fork.c Mon Dec 13 13:01:24 2004 @@ -233,18 +233,9 @@ /* * Unshare file descriptors (from parent). */ - if (flags & RFFDG) { - FILEDESC_LOCK_FAST(p1->p_fd); - if (p1->p_fd->fd_refcnt > 1) { - struct filedesc *newfd; - - FILEDESC_UNLOCK_FAST(p1->p_fd); - newfd = fdcopy(p1->p_fd); - fdfree(td); - p1->p_fd = newfd; - } else - FILEDESC_UNLOCK_FAST(p1->p_fd); - } + if (flags & RFFDG) + fdunshare(p1, td); + *procp = NULL; return (0); } diff -ur -x compile -x _* freebsd/src/sys/kern/vfs_mount.c phk/phk_bufwork/sys/kern/vfs_mount.c --- freebsd/src/sys/kern/vfs_mount.c Sun Dec 12 00:09:32 2004 +++ phk/phk_bufwork/sys/kern/vfs_mount.c Mon Dec 13 13:43:41 2004 @@ -73,7 +73,6 @@ #define ROOTNAME "root_device" #define VFS_MOUNTARG_SIZE_MAX (1024 * 64) -static void checkdirs(struct vnode *olddp, struct vnode *newdp); static void gets(char *cp); static int vfs_domount(struct thread *td, const char *fstype, char *fspath, int fsflags, void *fsdata); @@ -784,7 +783,7 @@ vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, &newdp, td)) panic("mount: lost mount"); - checkdirs(vp, newdp); + mountcheckdirs(vp, newdp); vput(newdp); VOP_UNLOCK(vp, 0, td); if ((mp->mnt_flag & MNT_RDONLY) == 0) @@ -801,55 +800,6 @@ } return (error); } - -/* - * Scan all active processes to see if any of them have a current - * or root directory of `olddp'. If so, replace them with the new - * mount point. - */ -static void -checkdirs(olddp, newdp) - struct vnode *olddp, *newdp; -{ - struct filedesc *fdp; - struct proc *p; - int nrele; - - if (vrefcnt(olddp) == 1) - return; - sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { - mtx_lock(&fdesc_mtx); - fdp = p->p_fd; - if (fdp == NULL) { - mtx_unlock(&fdesc_mtx); - continue; - } - nrele = 0; - FILEDESC_LOCK_FAST(fdp); - if (fdp->fd_cdir == olddp) { - vref(newdp); - fdp->fd_cdir = newdp; - nrele++; - } - if (fdp->fd_rdir == olddp) { - vref(newdp); - fdp->fd_rdir = newdp; - nrele++; - } - FILEDESC_UNLOCK_FAST(fdp); - mtx_unlock(&fdesc_mtx); - while (nrele--) - vrele(olddp); - } - sx_sunlock(&allproc_lock); - if (rootvnode == olddp) { - vrele(rootvnode); - vref(newdp); - rootvnode = newdp; - } -} - /* * --------------------------------------------------------------------- * Unmount a filesystem. @@ -991,7 +941,7 @@ */ if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) { if (mp->mnt_vnodecovered != NULL) - checkdirs(fsrootvp, mp->mnt_vnodecovered); + mountcheckdirs(fsrootvp, mp->mnt_vnodecovered); if (fsrootvp == rootvnode) { vrele(rootvnode); rootvnode = NULL; @@ -1008,7 +958,7 @@ /* Undo cdir/rdir and rootvnode changes made above. */ if ((flags & MNT_FORCE) && VFS_ROOT(mp, &fsrootvp, td) == 0) { if (mp->mnt_vnodecovered != NULL) - checkdirs(mp->mnt_vnodecovered, fsrootvp); + mountcheckdirs(mp->mnt_vnodecovered, fsrootvp); if (rootvnode == NULL) { rootvnode = fsrootvp; vref(rootvnode); diff -ur -x compile -x _* freebsd/src/sys/sys/filedesc.h phk/phk_bufwork/sys/sys/filedesc.h --- freebsd/src/sys/sys/filedesc.h Thu Dec 2 13:22:35 2004 +++ phk/phk_bufwork/sys/sys/filedesc.h Mon Dec 13 13:43:41 2004 @@ -58,7 +58,8 @@ int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_freefile; /* approx. next free file */ u_short fd_cmask; /* mask for file creation */ - u_short fd_refcnt; /* reference count */ + u_short fd_refcnt; /* thread reference count */ + u_short fd_holdcnt; /* hold count on structure + mutex */ struct mtx fd_mtx; /* protects members of this struct */ int fd_locked; /* long lock flag */ @@ -163,6 +164,7 @@ void fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td); void fdcloseexec(struct thread *td); struct filedesc *fdcopy(struct filedesc *fdp); +void fdunshare(struct proc *p, struct thread *td); void fdfree(struct thread *td); struct filedesc *fdinit(struct filedesc *fdp); struct filedesc *fdshare(struct filedesc *fdp); @@ -171,6 +173,7 @@ filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader); int getvnode(struct filedesc *fdp, int fd, struct file **fpp); +void mountcheckdirs(struct vnode *olddp, struct vnode *newdp); void setugidsafety(struct thread *td); static __inline struct file * -- Poul-Henning Kamp | UNIX since Zilog Zeus 3.20 phk@FreeBSD.ORG | TCP/IP since RFC 956 FreeBSD committer | BSD since 4.3-tahoe Never attribute to malice what can adequately be explained by incompetence.