Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 30 Jan 2004 12:43:08 +1100
From:      Tim Robbins <tjr@freebsd.org>
To:        Xin LI <delphij@frontfree.net>, current@FreeBSD.ORG, hackers@FreeBSD.ORG, junsu@delphij.net
Subject:   Re: Call for testers: New PID allocator patch for -CURRENT
Message-ID:  <20040130014308.GA21425@cat.robbins.dropbear.id.au>
In-Reply-To: <20040129200442.GA52780@VARK.homeunix.com>
References:  <20040129134121.GB53644@frontfree.net> <20040129200442.GA52780@VARK.homeunix.com>

next in thread | previous in thread | raw e-mail | index | archive | help
On Thu, Jan 29, 2004 at 12:04:42PM -0800, David Schultz wrote:

> On Thu, Jan 29, 2004, Xin LI wrote:
> > Greetings, everyone
> > 
> > A friend of mine has ported NetBSD's new PID allocation[1] code to FreeBSD,
> > you can obtain the patch here:
> > 
> > 	http://www.arbornet.org/~junsu/pid.diff
> > 
> > Some of you may be already aware of this for he has posted[2] this to both
> > current@ and hackers@ last September.
> > 
> > Currently the patchset has been updated to make it applicable to -HEAD.
> > 
> > A number of problems were found and fixed after the first post with many
> > from the FreeBSD community, and we think it's time to post it again and,
> > if you are interested, please give it a chance to run on your own (test)
> > box.
> 
> Thanks for the reminder.  Your patch uses a very clever idea.  I
> messed around with the original patch in your PR a few months ago.
> It would take more work to prove that your proposed patch improves
> performance[1] and is a good approach, and to review and clean up
> the implementation.
[...]
> [1] That shouldn't be hard, given that the present algorithm takes
>     O(N) amortized time in the worst case, and can examine as many
>     as PID_MAX^2 pids if the number of processes in the system is
>     close to PID_MAX.
[...]

In my testing, the current pid allocator turned out to be more efficient
than I had expected. Even after reducing PID_MAX to 10,000 and
fragmenting the pid-space by having sleeping processes at every N'th
pid, it didn't run much slower than the hash-based alternative I was
testing it against.

This is the hash-based pid allocator, if anyone's interested:

Change 43361 by tjr@tjr_dev2 on 2003/12/03 01:30:24

	Improve scalability of process ID allocation:
	- Use hashtables to check whether a pid is in used to avoid traversing
	  the entire allproc and zombproc lists and acquiring each item's proc
	  lock in fork1().
	- Add a hashtable for session IDs, sesshashtbl, similar to pidhashtbl
	  and pgrphashtbl.
	- Keep zombies on pidhash chains. Weed them out in pfind(). Rewrite
	  zpfind() to use pidhash.
	
	PID allocation now scales as a function of the number of used pids
	between lastpid+1 and the first free one, instead of the number of
	processes in the system. This new allocator is expected to be slightly
	slower than the existing one's best-case performance, but should
	easily outperform it when the PID space becomes fragmented.

Affected files ...

... //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_exit.c#11 edit
... //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_fork.c#14 edit
... //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_proc.c#21 edit
... //depot/user/tjr/freebsd-tjr/src/sys/sys/proc.h#27 edit

Differences ...

==== //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_exit.c#11 (text+ko) ====

@@ -391,13 +391,12 @@
 	crfree(td->td_ucred);
 
 	/*
-	 * Remove proc from allproc queue and pidhash chain.
+	 * Remove proc from allproc queue.
 	 * Place onto zombproc.  Unlink from parent's child list.
 	 */
 	sx_xlock(&allproc_lock);
 	LIST_REMOVE(p, p_list);
 	LIST_INSERT_HEAD(&zombproc, p, p_list);
-	LIST_REMOVE(p, p_hash);
 	sx_xunlock(&allproc_lock);
 
 	sx_xlock(&proctree_lock);
@@ -653,6 +652,7 @@
 			 */
 			sx_xlock(&allproc_lock);
 			LIST_REMOVE(p, p_list);	/* off zombproc */
+			LIST_REMOVE(p, p_hash);	/* off pidhash chain */
 			sx_xunlock(&allproc_lock);
 			LIST_REMOVE(p, p_sibling);
 			leavepgrp(p);

==== //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_fork.c#14 (text+ko) ====

@@ -158,9 +158,7 @@
  * Random component to lastpid generation.  We mix in a random factor to make
  * it a little harder to predict.  We sanity check the modulus value to avoid
  * doing it in critical paths.  Don't let it be too small or we pointlessly
- * waste randomness entropy, and don't let it be impossibly large.  Using a
- * modulus that is too big causes a LOT more process table scans and slows
- * down fork processing as the pidchecked caching is defeated.
+ * waste randomness entropy, and don't let it be impossibly large.
  */
 static int randompid = 0;
 
@@ -199,8 +197,8 @@
 	struct proc *p1, *p2, *pptr;
 	uid_t uid;
 	struct proc *newproc;
-	int ok, trypid;
-	static int curfail, pidchecked = 0;
+	int ok, trypid, wrapped;
+	static int curfail;
 	static struct timeval lastfail;
 	struct filedesc *fd;
 	struct filedesc_to_leader *fdtol;
@@ -208,6 +206,9 @@
 	struct kse *ke2;
 	struct ksegrp *kg2;
 	struct sigacts *newsigacts;
+	struct proc *pi;
+	struct pgrp *pgi;
+	struct session *si;
 	int error;
 
 	/* Can't copy and clear. */
@@ -323,8 +324,7 @@
 	nprocs++;
 
 	/*
-	 * Find an unused process ID.  We remember a range of unused IDs
-	 * ready to use (from lastpid+1 through pidchecked-1).
+	 * Find an unused process ID.
 	 *
 	 * If RFHIGHPID is set (used during system boot), do not allocate
 	 * low-numbered pids.
@@ -337,63 +337,44 @@
 		if (randompid)
 			trypid += arc4random() % randompid;
 	}
-retry:
-	/*
-	 * If the process ID prototype has wrapped around,
-	 * restart somewhat above 0, as the low-numbered procs
-	 * tend to include daemons that don't exit.
-	 */
-	if (trypid >= PID_MAX) {
-		trypid = trypid % PID_MAX;
-		if (trypid < 100)
-			trypid += 100;
-		pidchecked = 0;
-	}
-	if (trypid >= pidchecked) {
-		int doingzomb = 0;
-
-		pidchecked = PID_MAX;
+	wrapped = 0;
+	while (!wrapped || trypid < lastpid + 1) {
+		/*
+		 * If the process ID prototype has wrapped around,
+		 * restart somewhat above 0, as the low-numbered procs
+		 * tend to include daemons that don't exit.
+		 */
+		if (trypid >= PID_MAX) {
+			trypid = trypid % PID_MAX;
+			if (trypid < 100)
+				trypid += 100;
+			wrapped = 1;
+		}
 		/*
-		 * Scan the active and zombie procs to check whether this pid
-		 * is in use.  Remember the lowest pid that's greater
-		 * than trypid, so we can avoid checking for a while.
+		 * Check that the prospective ID hasn't already been used.
+		 * Use the hash tables to avoid scanning allproc.
 		 */
-		p2 = LIST_FIRST(&allproc);
-again:
-		for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
-			PROC_LOCK(p2);
-			while (p2->p_pid == trypid ||
-			    p2->p_pgrp->pg_id == trypid ||
-			    p2->p_session->s_sid == trypid) {
-				trypid++;
-				if (trypid >= pidchecked) {
-					PROC_UNLOCK(p2);
-					goto retry;
-				}
-			}
-			if (p2->p_pid > trypid && pidchecked > p2->p_pid)
-				pidchecked = p2->p_pid;
-			if (p2->p_pgrp->pg_id > trypid &&
-			    pidchecked > p2->p_pgrp->pg_id)
-				pidchecked = p2->p_pgrp->pg_id;
-			if (p2->p_session->s_sid > trypid &&
-			    pidchecked > p2->p_session->s_sid)
-				pidchecked = p2->p_session->s_sid;
-			PROC_UNLOCK(p2);
+		LIST_FOREACH(pi, PIDHASH(trypid), p_hash) {
+			if (pi->p_pid == trypid)
+				goto trynext;
+		}
+		LIST_FOREACH(pgi, PGRPHASH(trypid), pg_hash) {
+			if (pgi->pg_id == trypid)
+				goto trynext;
 		}
-		if (!doingzomb) {
-			doingzomb = 1;
-			p2 = LIST_FIRST(&zombproc);
-			goto again;
+		LIST_FOREACH(si, SESSHASH(trypid), s_hash) {
+			if (si->s_sid == trypid)
+				goto trynext;
 		}
+		break;
+trynext:
+		trypid++;
 	}
 
 	/*
 	 * RFHIGHPID does not mess with the lastpid counter during boot.
 	 */
-	if (flags & RFHIGHPID)
-		pidchecked = 0;
-	else
+	if (!(flags & RFHIGHPID))
 		lastpid = trypid;
 
 	p2 = newproc;

==== //depot/user/tjr/freebsd-tjr/src/sys/kern/kern_proc.c#21 (text+ko) ====

@@ -90,6 +90,8 @@
 u_long pidhash;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
+struct sesshashhead *sesshashtbl;
+u_long sesshash;
 struct proclist allproc;
 struct proclist zombproc;
 struct sx allproc_lock;
@@ -123,6 +125,7 @@
 	LIST_INIT(&zombproc);
 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
+	sesshashtbl = hashinit(maxproc / 4, M_PROC, &sesshash);
 	proc_zone = uma_zcreate("PROC", sched_sizeof_proc(),
 	    proc_ctor, proc_dtor, proc_init, proc_fini,
 	    UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
@@ -254,7 +257,7 @@
 
 	sx_slock(&allproc_lock);
 	LIST_FOREACH(p, PIDHASH(pid), p_hash)
-		if (p->p_pid == pid) {
+		if (p->p_pid == pid && p->p_state != PRS_ZOMBIE) {
 			PROC_LOCK(p);
 			break;
 		}
@@ -328,6 +331,7 @@
 		sess->s_ttyp = NULL;
 		bcopy(p->p_session->s_login, sess->s_login,
 			    sizeof(sess->s_login));
+		LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash);
 		pgrp->pg_session = sess;
 		KASSERT(p == curproc,
 		    ("enterpgrp: mksession and p != curproc"));
@@ -473,8 +477,9 @@
 	SESS_UNLOCK(savesess);
 	PGRP_UNLOCK(pgrp);
 	if (savesess->s_count == 0) {
+		LIST_REMOVE(savesess, s_hash);
 		mtx_destroy(&savesess->s_mtx);
-		FREE(pgrp->pg_session, M_SESSION);
+		FREE(savesess, M_SESSION);
 	}
 	mtx_destroy(&pgrp->pg_mtx);
 	FREE(pgrp, M_PGRP);
@@ -829,8 +834,8 @@
 	struct proc *p;
 
 	sx_slock(&allproc_lock);
-	LIST_FOREACH(p, &zombproc, p_list)
-		if (p->p_pid == pid) {
+	LIST_FOREACH(p, PIDHASH(pid), p_hash)
+		if (p->p_pid == pid && p->p_state == PRS_ZOMBIE) {
 			PROC_LOCK(p);
 			break;
 		}

==== //depot/user/tjr/freebsd-tjr/src/sys/sys/proc.h#27 (text+ko) ====

@@ -73,6 +73,7 @@
  * (c)		const until freeing
  */
 struct session {
+	LIST_ENTRY(session) s_hash;	/* (e) Hash chain. */
 	int		s_count;	/* (m) Ref cnt; pgrps in session. */
 	struct proc	*s_leader;	/* (m + e) Session leader. */
 	struct vnode	*s_ttyvp;	/* (m) Vnode of controlling tty. */
@@ -799,6 +800,10 @@
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
+#define	SESSHASH(sid)	(&sesshashtbl[(sid) & sesshash])
+extern LIST_HEAD(sesshashhead, session) *sesshashtbl;
+extern u_long sesshash;
+
 extern struct sx allproc_lock;
 extern struct sx proctree_lock;
 extern struct mtx pargs_ref_lock;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20040130014308.GA21425>