Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 Oct 2009 18:09:57 +0000 (UTC)
From:      "Simon L. Nielsen" <simon@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-6@freebsd.org
Subject:   svn commit: r197715 - releng/6.3 releng/6.3/sys/conf releng/6.3/sys/fs/devfs releng/6.3/sys/kern releng/6.3/sys/sys releng/6.4 releng/6.4/sys/conf releng/6.4/sys/fs/devfs releng/6.4/sys/kern releng...
Message-ID:  <200910021809.n92I9v0c009121@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: simon
Date: Fri Oct  2 18:09:56 2009
New Revision: 197715
URL: http://svn.freebsd.org/changeset/base/197715

Log:
  MFC r197711 (partial) to 6.x and 7.x:
  
  - Add no zero mapping feature, disabled by default. [EN-09:05]
  
  MFC 178913,178914,179242,179243,180336,180340 to 6.x:
  
  - Fix kqueue pipe race conditions. [SA-09:13]
  
  MFC r192301 to 7.x; 6.x has slightly different fix:
  
  - Fix devfs / VFS NULL pointer race condition. [SA-09:14]
  
  Security:	FreeBSD-SA-09:13.pipe
  Security:	FreeBSD-SA-09:14.devfs
  Errata:		FreeBSD-EN-09:05.null
  Submitted by:	kib [SA-09:13] [SA-09:14]
  Submitted by:	bz [EN-09:05]
  In collaboration with:	jhb, kib, alc [EN-09:05]
  Approved by:	so (simon)

Modified:
  stable/6/sys/fs/devfs/devfs_vnops.c
  stable/6/sys/kern/kern_event.c
  stable/6/sys/kern/kern_exec.c
  stable/6/sys/kern/kern_fork.c
  stable/6/sys/kern/sys_pipe.c
  stable/6/sys/sys/event.h
  stable/6/sys/sys/pipe.h

Changes in other areas also in this revision:
Modified:
  releng/6.3/UPDATING
  releng/6.3/sys/conf/newvers.sh
  releng/6.3/sys/fs/devfs/devfs_vnops.c
  releng/6.3/sys/kern/kern_event.c
  releng/6.3/sys/kern/kern_exec.c
  releng/6.3/sys/kern/kern_fork.c
  releng/6.3/sys/kern/sys_pipe.c
  releng/6.3/sys/sys/event.h
  releng/6.3/sys/sys/pipe.h
  releng/6.4/UPDATING
  releng/6.4/sys/conf/newvers.sh
  releng/6.4/sys/fs/devfs/devfs_vnops.c
  releng/6.4/sys/kern/kern_event.c
  releng/6.4/sys/kern/kern_exec.c
  releng/6.4/sys/kern/kern_fork.c
  releng/6.4/sys/kern/sys_pipe.c
  releng/6.4/sys/sys/event.h
  releng/6.4/sys/sys/pipe.h
  releng/7.1/UPDATING
  releng/7.1/sys/conf/newvers.sh
  releng/7.1/sys/fs/devfs/devfs_vnops.c
  releng/7.1/sys/kern/kern_exec.c
  releng/7.2/UPDATING
  releng/7.2/sys/conf/newvers.sh
  releng/7.2/sys/fs/devfs/devfs_vnops.c
  releng/7.2/sys/kern/kern_exec.c
  stable/7/sys/kern/kern_exec.c

Modified: stable/6/sys/fs/devfs/devfs_vnops.c
==============================================================================
--- stable/6/sys/fs/devfs/devfs_vnops.c	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/fs/devfs/devfs_vnops.c	Fri Oct  2 18:09:56 2009	(r197715)
@@ -759,6 +759,9 @@ devfs_open(struct vop_open_args *ap)
 
 	VOP_UNLOCK(vp, 0, td);
 
+	if (ap->a_fdidx >= 0)
+		ap->a_td->td_proc->p_fd->fd_ofiles[ap->a_fdidx]->f_vnode = vp;
+
 	if(!(dsw->d_flags & D_NEEDGIANT)) {
 		DROP_GIANT();
 		if (dsw->d_fdopen != NULL)

Modified: stable/6/sys/kern/kern_event.c
==============================================================================
--- stable/6/sys/kern/kern_event.c	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/kern/kern_event.c	Fri Oct  2 18:09:56 2009	(r197715)
@@ -392,30 +392,82 @@ filt_proc(struct knote *kn, long hint)
 		return (1);
 	}
 
-	/*
-	 * process forked, and user wants to track the new process,
-	 * so attach a new knote to it, and immediately report an
-	 * event with the parent's pid.
-	 */
-	if ((event == NOTE_FORK) && (kn->kn_sfflags & NOTE_TRACK)) {
-		struct kevent kev;
-		int error;
+	return (kn->kn_fflags != 0);
+}
+
+/*
+ * Called when the process forked. It mostly does the same as the
+ * knote(), activating all knotes registered to be activated when the
+ * process forked. Additionally, for each knote attached to the
+ * parent, check whether user wants to track the new process. If so
+ * attach a new knote to it, and immediately report an event with the
+ * child's pid.
+ */
+void
+knote_fork(struct knlist *list, int pid)
+{
+	struct kqueue *kq;
+	struct knote *kn;
+	struct kevent kev;
+	int error;
+
+	if (list == NULL)
+		return;
+	list->kl_lock(list->kl_lockarg);
+
+	SLIST_FOREACH(kn, &list->kl_list, kn_selnext) {
+		if ((kn->kn_status & KN_INFLUX) == KN_INFLUX)
+			continue;
+		kq = kn->kn_kq;
+		KQ_LOCK(kq);
+		if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
+			KQ_UNLOCK(kq);
+			continue;
+		}
 
 		/*
-		 * register knote with new process.
+		 * The same as knote(), activate the event.
 		 */
-		kev.ident = hint & NOTE_PDATAMASK;	/* pid */
+		if ((kn->kn_sfflags & NOTE_TRACK) == 0) {
+			kn->kn_status |= KN_HASKQLOCK;
+			if (kn->kn_fop->f_event(kn, NOTE_FORK | pid))
+				KNOTE_ACTIVATE(kn, 1);
+			kn->kn_status &= ~KN_HASKQLOCK;
+			KQ_UNLOCK(kq);
+			continue;
+		}
+
+		/*
+		 * The NOTE_TRACK case. In addition to the activation
+		 * of the event, we need to register new event to
+		 * track the child. Drop the locks in preparation for
+		 * the call to kqueue_register().
+		 */
+		kn->kn_status |= KN_INFLUX;
+		KQ_UNLOCK(kq);
+		list->kl_unlock(list->kl_lockarg);
+
+		/*
+		 * Activate existing knote and register a knote with
+		 * new process.
+		 */
+		kev.ident = pid;
 		kev.filter = kn->kn_filter;
 		kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_FLAG1;
 		kev.fflags = kn->kn_sfflags;
-		kev.data = kn->kn_id;			/* parent */
-		kev.udata = kn->kn_kevent.udata;	/* preserve udata */
-		error = kqueue_register(kn->kn_kq, &kev, NULL, 0);
+		kev.data = kn->kn_id;		/* parent */
+		kev.udata = kn->kn_kevent.udata;/* preserve udata */
+		error = kqueue_register(kq, &kev, NULL, 0);
+		if (kn->kn_fop->f_event(kn, NOTE_FORK | pid))
+			KNOTE_ACTIVATE(kn, 0);
 		if (error)
 			kn->kn_fflags |= NOTE_TRACKERR;
+		KQ_LOCK(kq);
+		kn->kn_status &= ~KN_INFLUX;
+		KQ_UNLOCK_FLUX(kq);
+		list->kl_lock(list->kl_lockarg);
 	}
-
-	return (kn->kn_fflags != 0);
+	list->kl_unlock(list->kl_lockarg);
 }
 
 static int
@@ -1123,7 +1175,7 @@ kqueue_scan(struct kqueue *kq, int maxev
 	struct kevent *kevp;
 	struct timeval atv, rtv, ttv;
 	struct knote *kn, *marker;
-	int count, timeout, nkev, error;
+	int count, timeout, nkev, error, influx;
 	int haskqglobal;
 
 	count = maxevents;
@@ -1193,12 +1245,17 @@ start:
 	}
 
 	TAILQ_INSERT_TAIL(&kq->kq_head, marker, kn_tqe);
+	influx = 0;
 	while (count) {
 		KQ_OWNED(kq);
 		kn = TAILQ_FIRST(&kq->kq_head);
 
 		if ((kn->kn_status == KN_MARKER && kn != marker) ||
 		    (kn->kn_status & KN_INFLUX) == KN_INFLUX) {
+			if (influx) {
+				influx = 0;
+				KQ_FLUX_WAKEUP(kq);
+			}
 			kq->kq_state |= KQ_FLUXWAIT;
 			error = msleep(kq, &kq->kq_lock, PSOCK,
 			    "kqflxwt", 0);
@@ -1248,6 +1305,7 @@ start:
 				    ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX);
 				kq->kq_count--;
 				KN_LIST_UNLOCK(kn);
+				influx = 1;
 				continue;
 			}
 			*kevp = kn->kn_kevent;
@@ -1263,6 +1321,7 @@ start:
 			
 			kn->kn_status &= ~(KN_INFLUX);
 			KN_LIST_UNLOCK(kn);
+			influx = 1;
 		}
 
 		/* we are returning a copy to the user */
@@ -1271,6 +1330,7 @@ start:
 		count--;
 
 		if (nkev == KQ_NEVENTS) {
+			influx = 0;
 			KQ_UNLOCK_FLUX(kq);
 			error = k_ops->k_copyout(k_ops->arg, keva, nkev);
 			nkev = 0;
@@ -1434,8 +1494,11 @@ kqueue_close(struct file *fp, struct thr
 
 	for (i = 0; i < kq->kq_knlistsize; i++) {
 		while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) {
-			KASSERT((kn->kn_status & KN_INFLUX) == 0,
-			    ("KN_INFLUX set when not suppose to be"));
+			if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
+				kq->kq_state |= KQ_FLUXWAIT;
+				msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0);
+				continue;
+			}
 			kn->kn_status |= KN_INFLUX;
 			KQ_UNLOCK(kq);
 			if (!(kn->kn_status & KN_DETACHED))
@@ -1447,8 +1510,12 @@ kqueue_close(struct file *fp, struct thr
 	if (kq->kq_knhashmask != 0) {
 		for (i = 0; i <= kq->kq_knhashmask; i++) {
 			while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) {
-				KASSERT((kn->kn_status & KN_INFLUX) == 0,
-				    ("KN_INFLUX set when not suppose to be"));
+				if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) {
+					kq->kq_state |= KQ_FLUXWAIT;
+					msleep(kq, &kq->kq_lock, PSOCK,
+					       "kqclo2", 0);
+					continue;
+				}
 				kn->kn_status |= KN_INFLUX;
 				KQ_UNLOCK(kq);
 				if (!(kn->kn_status & KN_DETACHED))

Modified: stable/6/sys/kern/kern_exec.c
==============================================================================
--- stable/6/sys/kern/kern_exec.c	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/kern/kern_exec.c	Fri Oct  2 18:09:56 2009	(r197715)
@@ -104,6 +104,13 @@ u_long ps_arg_cache_limit = PAGE_SIZE / 
 SYSCTL_ULONG(_kern, OID_AUTO, ps_arg_cache_limit, CTLFLAG_RW, 
     &ps_arg_cache_limit, 0, "");
 
+SYSCTL_DECL(_security_bsd);
+
+static int map_at_zero = 1;
+TUNABLE_INT("security.bsd.map_at_zero", &map_at_zero);
+SYSCTL_INT(_security_bsd, OID_AUTO, map_at_zero, CTLFLAG_RW, &map_at_zero, 0,
+    "Permit processes to map an object at virtual address 0.");
+
 static int
 sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS)
 {
@@ -914,7 +921,7 @@ exec_new_vmspace(imgp, sv)
 	int error;
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
-	vm_offset_t stack_addr;
+	vm_offset_t sv_minuser, stack_addr;
 	vm_map_t map;
 
 	imgp->vmspace_destroyed = 1;
@@ -928,14 +935,18 @@ exec_new_vmspace(imgp, sv)
 	 * not disrupted
 	 */
 	map = &vmspace->vm_map;
-	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv->sv_minuser &&
+	if (map_at_zero)
+		sv_minuser = sv->sv_minuser;
+	else
+		sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE);
+	if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser &&
 	    vm_map_max(map) == sv->sv_maxuser) {
 		shmexit(vmspace);
 		pmap_remove_pages(vmspace_pmap(vmspace), vm_map_min(map),
 		    vm_map_max(map));
 		vm_map_remove(map, vm_map_min(map), vm_map_max(map));
 	} else {
-		vmspace_exec(p, sv->sv_minuser, sv->sv_maxuser);
+		vmspace_exec(p, sv_minuser, sv->sv_maxuser);
 		vmspace = p->p_vmspace;
 		map = &vmspace->vm_map;
 	}

Modified: stable/6/sys/kern/kern_fork.c
==============================================================================
--- stable/6/sys/kern/kern_fork.c	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/kern/kern_fork.c	Fri Oct  2 18:09:56 2009	(r197715)
@@ -699,14 +699,12 @@ again:
 	 */
 	PROC_LOCK(p1);
 	_PRELE(p1);
+	PROC_UNLOCK(p1);
 
 	/*
 	 * Tell any interested parties about the new process.
 	 */
-	KNOTE_LOCKED(&p1->p_klist, NOTE_FORK | p2->p_pid);
-
-	PROC_UNLOCK(p1);
-
+	knote_fork(&p1->p_klist, p2->p_pid);
 	/*
 	 * Preserve synchronization semantics of vfork.  If waiting for
 	 * child to exec or exit, set P_PPWAIT on child, and sleep on our

Modified: stable/6/sys/kern/sys_pipe.c
==============================================================================
--- stable/6/sys/kern/sys_pipe.c	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/kern/sys_pipe.c	Fri Oct  2 18:09:56 2009	(r197715)
@@ -268,8 +268,8 @@ pipe_zone_ctor(void *mem, int size, void
 	 * one at a time.  When both are free'd, then the whole pair
 	 * is released.
 	 */
-	rpipe->pipe_present = 1;
-	wpipe->pipe_present = 1;
+	rpipe->pipe_present = PIPE_ACTIVE;
+	wpipe->pipe_present = PIPE_ACTIVE;
 
 	/*
 	 * Eventually, the MAC Framework may initialize the label
@@ -1003,7 +1003,8 @@ pipe_write(fp, uio, active_cred, flags, 
 	/*
 	 * detect loss of pipe read side, issue SIGPIPE if lost.
 	 */
-	if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) {
+	if (wpipe->pipe_present != PIPE_ACTIVE ||
+	    (wpipe->pipe_state & PIPE_EOF)) {
 		pipeunlock(wpipe);
 		PIPE_UNLOCK(rpipe);
 		return (EPIPE);
@@ -1361,13 +1362,14 @@ pipe_poll(fp, events, active_cred, td)
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
-		if (!wpipe->pipe_present || (wpipe->pipe_state & PIPE_EOF) ||
+		if (wpipe->pipe_present != PIPE_ACTIVE ||
+		    (wpipe->pipe_state & PIPE_EOF) ||
 		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
 		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if ((rpipe->pipe_state & PIPE_EOF) ||
-	    (!wpipe->pipe_present) ||
+	    wpipe->pipe_present != PIPE_ACTIVE ||
 	    (wpipe->pipe_state & PIPE_EOF))
 		revents |= POLLHUP;
 
@@ -1506,7 +1508,7 @@ pipeclose(cpipe)
 	 * Disconnect from peer, if any.
 	 */
 	ppipe = cpipe->pipe_peer;
-	if (ppipe->pipe_present != 0) {
+	if (ppipe->pipe_present == PIPE_ACTIVE) {
 		pipeselwakeup(ppipe);
 
 		ppipe->pipe_state |= PIPE_EOF;
@@ -1523,16 +1525,23 @@ pipeclose(cpipe)
 	PIPE_UNLOCK(cpipe);
 	pipe_free_kmem(cpipe);
 	PIPE_LOCK(cpipe);
-	cpipe->pipe_present = 0;
+	cpipe->pipe_present = PIPE_CLOSING;
 	pipeunlock(cpipe);
+
+	/*
+	 * knlist_clear() may sleep dropping the PIPE_MTX. Set the
+	 * PIPE_FINALIZED, that allows other end to free the
+	 * pipe_pair, only after the knotes are completely dismantled.
+	 */
 	knlist_clear(&cpipe->pipe_sel.si_note, 1);
+	cpipe->pipe_present = PIPE_FINALIZED;
 	knlist_destroy(&cpipe->pipe_sel.si_note);
 
 	/*
 	 * If both endpoints are now closed, release the memory for the
 	 * pipe pair.  If not, unlock.
 	 */
-	if (ppipe->pipe_present == 0) {
+	if (ppipe->pipe_present == PIPE_FINALIZED) {
 		PIPE_UNLOCK(cpipe);
 #ifdef MAC
 		mac_destroy_pipe(pp);
@@ -1556,7 +1565,7 @@ pipe_kqfilter(struct file *fp, struct kn
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &pipe_wfiltops;
-		if (!cpipe->pipe_peer->pipe_present) {
+		if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) {
 			/* other end of pipe has been closed */
 			PIPE_UNLOCK(cpipe);
 			return (EPIPE);
@@ -1579,13 +1588,8 @@ filt_pipedetach(struct knote *kn)
 	struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data;
 
 	PIPE_LOCK(cpipe);
-	if (kn->kn_filter == EVFILT_WRITE) {
-		if (!cpipe->pipe_peer->pipe_present) {
-			PIPE_UNLOCK(cpipe);
-			return;
-		}
+	if (kn->kn_filter == EVFILT_WRITE)
 		cpipe = cpipe->pipe_peer;
-	}
 	knlist_remove(&cpipe->pipe_sel.si_note, kn, 1);
 	PIPE_UNLOCK(cpipe);
 }
@@ -1604,7 +1608,8 @@ filt_piperead(struct knote *kn, long hin
 		kn->kn_data = rpipe->pipe_map.cnt;
 
 	if ((rpipe->pipe_state & PIPE_EOF) ||
-	    (!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) {
+	    wpipe->pipe_present != PIPE_ACTIVE ||
+	    (wpipe->pipe_state & PIPE_EOF)) {
 		kn->kn_flags |= EV_EOF;
 		PIPE_UNLOCK(rpipe);
 		return (1);
@@ -1622,7 +1627,8 @@ filt_pipewrite(struct knote *kn, long hi
 	struct pipe *wpipe = rpipe->pipe_peer;
 
 	PIPE_LOCK(rpipe);
-	if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) {
+	if (wpipe->pipe_present != PIPE_ACTIVE ||
+	    (wpipe->pipe_state & PIPE_EOF)) {
 		kn->kn_data = 0;
 		kn->kn_flags |= EV_EOF;
 		PIPE_UNLOCK(rpipe);

Modified: stable/6/sys/sys/event.h
==============================================================================
--- stable/6/sys/sys/event.h	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/sys/event.h	Fri Oct  2 18:09:56 2009	(r197715)
@@ -208,6 +208,7 @@ struct proc;
 struct knlist;
 
 extern void	knote(struct knlist *list, long hint, int islocked);
+extern void	knote_fork(struct knlist *list, int pid);
 extern void	knlist_add(struct knlist *knl, struct knote *kn, int islocked);
 extern void	knlist_remove(struct knlist *knl, struct knote *kn, int islocked);
 extern void	knlist_remove_inevent(struct knlist *knl, struct knote *kn);

Modified: stable/6/sys/sys/pipe.h
==============================================================================
--- stable/6/sys/sys/pipe.h	Fri Oct  2 17:58:47 2009	(r197714)
+++ stable/6/sys/sys/pipe.h	Fri Oct  2 18:09:56 2009	(r197715)
@@ -115,6 +115,13 @@ struct pipe {
 };
 
 /*
+ * Values for the pipe_present.
+ */
+#define PIPE_ACTIVE		1
+#define	PIPE_CLOSING		2
+#define	PIPE_FINALIZED		3
+
+/*
  * Container structure to hold the two pipe endpoints, mutex, and label
  * pointer.
  */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200910021809.n92I9v0c009121>