Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 28 Jun 2013 12:10:19 +0300
From:      Mikolaj Golub <trociny@FreeBSD.org>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        svn-src-head@freebsd.org, svn-src-all@freebsd.org, Mateusz Guzik <mjguzik@gmail.com>, src-committers@freebsd.org
Subject:   Re: svn commit: r252313 - head/sys/kern
Message-ID:  <20130628091017.GA3549@gmail.com>
In-Reply-To: <20130628064430.GK91021@kib.kiev.ua>
References:  <201306271914.r5RJE4on047806@svn.freebsd.org> <20130628010345.GA25051@dft-labs.eu> <20130628064430.GK91021@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help

--jI8keyz6grp/JLjh
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Fri, Jun 28, 2013 at 09:44:30AM +0300, Konstantin Belousov wrote:
> On Fri, Jun 28, 2013 at 03:03:46AM +0200, Mateusz Guzik wrote:
> > On Thu, Jun 27, 2013 at 07:14:04PM +0000, Mikolaj Golub wrote:
> > > Author: trociny
> > > Date: Thu Jun 27 19:14:03 2013
> > > New Revision: 252313
> > > URL: http://svnweb.freebsd.org/changeset/base/252313
> > > 
> > > Log:
> > >   To avoid LOR, always drop the filedesc lock before exporting fd to sbuf.
> > >   
> > >   Reviewed by:	kib
> > >   MFC after:	3 days
> > > 
> > > Modified:
> > >   head/sys/kern/kern_descrip.c
> > > 
> > > Modified: head/sys/kern/kern_descrip.c
> > > ==============================================================================
> > > --- head/sys/kern/kern_descrip.c	Thu Jun 27 18:59:07 2013	(r252312)
> > > +++ head/sys/kern/kern_descrip.c	Thu Jun 27 19:14:03 2013	(r252313)
> > > @@ -3427,12 +3427,10 @@ kern_proc_filedesc_out(struct proc *p,  
> > >  		 * re-validate and re-evaluate its properties when
> > >  		 * the loop continues.
> > >  		 */
> > > -		if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO)
> > > -			FILEDESC_SUNLOCK(fdp);
> > > +		FILEDESC_SUNLOCK(fdp);
> > >  		error = export_fd_to_sb(data, type, i, fflags, refcnt,
> > >  		    offset, fd_cap_rights, kif, sb, &remainder);
> > > -		if (type == KF_TYPE_VNODE || type == KF_TYPE_FIFO)
> > > -			FILEDESC_SLOCK(fdp);
> > > +		FILEDESC_SLOCK(fdp);
> > >  		if (error)
> > >  			break;
> > >  	}
> > 
> > Is this really ok? What prevents given fd from going away during
> > export_fd_to_sb execution? Both DTYPE_VNODE and DTYPE_FIFO pass down
> > a vrefed vnode so these are safe. But for example DTYPE_SOCKET goes with
> > fp->f_data, which can go away in the meantime (or I'm misreading the code).
> > 

Thanks!

> > I suggest obtainng ref to fp and passing it down in all cases.
> 
> Oops, I am sorry for missed this. But, I do not actually like the
> idea of referencing the fd.  It de-facto means that the process calling
> the sysctl duped the descriptor, potentially causing the close to be
> performed in the sysctl context on dereference.
> 
> Ideal solution would be to drop the filedesc lock between processing
> of the filedescriptors and draining sbuf while lock is dropped.

You mean something like below? (not well tested yet)

-- 
Mikolaj Golub

--jI8keyz6grp/JLjh
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: inline; filename="kern_proc_filedesc_out.lor.2.patch"

Index: sys/kern/kern_descrip.c
===================================================================
--- sys/kern/kern_descrip.c	(revision 252313)
+++ sys/kern/kern_descrip.c	(working copy)
@@ -3176,10 +3176,16 @@
 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
 #endif
 
+struct export_fd_buf {
+	struct filedesc		*fdp;
+	struct sbuf 		*sb;
+	ssize_t			remainder;
+	struct kinfo_file	kif;
+};
+
 static int
 export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
-    int64_t offset, cap_rights_t fd_cap_rights, struct kinfo_file *kif,
-    struct sbuf *sb, ssize_t *remainder)
+    int64_t offset, cap_rights_t fd_cap_rights, struct export_fd_buf *efbuf)
 {
 	struct {
 		int	fflag;
@@ -3202,16 +3208,20 @@
 		{ O_TRUNC, KF_FLAG_TRUNC }
 	};
 #define	NFFLAGS	(sizeof(fflags_table) / sizeof(*fflags_table))
+	struct kinfo_file *kif;
 	struct vnode *vp;
 	int error;
 	unsigned int i;
 
-	if (*remainder == 0)
+	if (efbuf->remainder == 0)
 		return (0);
+	kif = &efbuf->kif;
 	bzero(kif, sizeof(*kif));
 	switch (type) {
 	case KF_TYPE_FIFO:
 	case KF_TYPE_VNODE:
+		if (efbuf->fdp != NULL)
+			FILEDESC_SUNLOCK(efbuf->fdp);
 		vp = (struct vnode *)data;
 		error = fill_vnode_info(vp, kif);
 		vrele(vp);
@@ -3255,15 +3265,19 @@
 	kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
 	    strlen(kif->kf_path) + 1;
 	kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
-	if (*remainder != -1) {
-		if (*remainder < kif->kf_structsize) {
+	if (efbuf->remainder != -1) {
+		if (efbuf->remainder < kif->kf_structsize) {
 			/* Terminate export. */
-			*remainder = 0;
+			efbuf->remainder = 0;
 			return (0);
 		}
-		*remainder -= kif->kf_structsize;
+		efbuf->remainder -= kif->kf_structsize;
 	}
-	error = sbuf_bcat(sb, kif, kif->kf_structsize);
+	if (efbuf->fdp != NULL && type != KF_TYPE_FIFO && type != KF_TYPE_VNODE)
+		FILEDESC_SUNLOCK(efbuf->fdp);
+	error = sbuf_bcat(efbuf->sb, kif, kif->kf_structsize);
+	if (efbuf->fdp != NULL)
+		FILEDESC_SLOCK(efbuf->fdp);
 	return (error);
 }
 
@@ -3277,18 +3291,16 @@
 {
 	struct file *fp;
 	struct filedesc *fdp;
-	struct kinfo_file *kif;
+	struct export_fd_buf *efbuf;
 	struct vnode *cttyvp, *textvp, *tracevp;
 	int64_t offset;
 	void *data;
-	ssize_t remainder;
 	int error, i;
 	int type, refcnt, fflags;
 	cap_rights_t fd_cap_rights;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
-	remainder = maxlen;
 	/* ktrace vnode */
 	tracevp = p->p_tracevp;
 	if (tracevp != NULL)
@@ -3306,46 +3318,44 @@
 	}
 	fdp = fdhold(p);
 	PROC_UNLOCK(p);
-	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
+	efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK);
+	efbuf->fdp = NULL;
+	efbuf->sb = sb;
+	efbuf->remainder = maxlen;
 	if (tracevp != NULL)
 		export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE,
-		    FREAD | FWRITE, -1, -1, 0, kif, sb, &remainder);
+		    FREAD | FWRITE, -1, -1, 0, efbuf);
 	if (textvp != NULL)
 		export_fd_to_sb(textvp, KF_TYPE_VNODE, KF_FD_TYPE_TEXT,
-		    FREAD, -1, -1, 0, kif, sb, &remainder);
+		    FREAD, -1, -1, 0, efbuf);
 	if (cttyvp != NULL)
 		export_fd_to_sb(cttyvp, KF_TYPE_VNODE, KF_FD_TYPE_CTTY,
-		    FREAD | FWRITE, -1, -1, 0, kif, sb, &remainder);
+		    FREAD | FWRITE, -1, -1, 0, efbuf);
 	error = 0;
 	if (fdp == NULL)
 		goto fail;
+	efbuf->fdp = fdp;
 	FILEDESC_SLOCK(fdp);
 	/* working directory */
 	if (fdp->fd_cdir != NULL) {
 		vref(fdp->fd_cdir);
 		data = fdp->fd_cdir;
-		FILEDESC_SUNLOCK(fdp);
 		export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_CWD,
-		    FREAD, -1, -1, 0, kif, sb, &remainder);
-		FILEDESC_SLOCK(fdp);
+		    FREAD, -1, -1, 0, efbuf);
 	}
 	/* root directory */
 	if (fdp->fd_rdir != NULL) {
 		vref(fdp->fd_rdir);
 		data = fdp->fd_rdir;
-		FILEDESC_SUNLOCK(fdp);
 		export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_ROOT,
-		    FREAD, -1, -1, 0, kif, sb, &remainder);
-		FILEDESC_SLOCK(fdp);
+		    FREAD, -1, -1, 0, efbuf);
 	}
 	/* jail directory */
 	if (fdp->fd_jdir != NULL) {
 		vref(fdp->fd_jdir);
 		data = fdp->fd_jdir;
-		FILEDESC_SUNLOCK(fdp);
 		export_fd_to_sb(data, KF_TYPE_VNODE, KF_FD_TYPE_JAIL,
-		    FREAD, -1, -1, 0, kif, sb, &remainder);
-		FILEDESC_SLOCK(fdp);
+		    FREAD, -1, -1, 0, efbuf);
 	}
 	for (i = 0; i < fdp->fd_nfiles; i++) {
 		if ((fp = fdp->fd_ofiles[i].fde_file) == NULL)
@@ -3427,10 +3437,8 @@
 		 * re-validate and re-evaluate its properties when
 		 * the loop continues.
 		 */
-		FILEDESC_SUNLOCK(fdp);
 		error = export_fd_to_sb(data, type, i, fflags, refcnt,
-		    offset, fd_cap_rights, kif, sb, &remainder);
-		FILEDESC_SLOCK(fdp);
+		    offset, fd_cap_rights, efbuf);
 		if (error)
 			break;
 	}
@@ -3438,7 +3446,7 @@
 fail:
 	if (fdp != NULL)
 		fddrop(fdp);
-	free(kif, M_TEMP);
+	free(efbuf, M_TEMP);
 	return (error);
 }
 

--jI8keyz6grp/JLjh--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130628091017.GA3549>