From owner-cvs-all@FreeBSD.ORG Mon Aug 16 00:51:55 2004 Return-Path: Delivered-To: cvs-all@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id C8E2816A4CE; Mon, 16 Aug 2004 00:51:55 +0000 (GMT) Received: from gw.catspoiler.org (217-ip-163.nccn.net [209.79.217.163]) by mx1.FreeBSD.org (Postfix) with ESMTP id 1513943D2D; Mon, 16 Aug 2004 00:51:55 +0000 (GMT) (envelope-from truckman@FreeBSD.org) Received: from FreeBSD.org (mousie.catspoiler.org [192.168.101.2]) by gw.catspoiler.org (8.12.11/8.12.11) with ESMTP id i7G0pamD001343; Sun, 15 Aug 2004 17:51:40 -0700 (PDT) (envelope-from truckman@FreeBSD.org) Message-Id: <200408160051.i7G0pamD001343@gw.catspoiler.org> Date: Sun, 15 Aug 2004 17:51:36 -0700 (PDT) From: Don Lewis To: julian@elischer.org In-Reply-To: <411E7CD4.7010504@elischer.org> MIME-Version: 1.0 Content-Type: TEXT/plain; charset=us-ascii cc: cvs-src@FreeBSD.org cc: src-committers@FreeBSD.org cc: rwatson@FreeBSD.org cc: cvs-all@FreeBSD.org Subject: Re: cvs commit: src/sys/kern kern_proc.c X-BeenThere: cvs-all@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: CVS commit messages for the entire tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 16 Aug 2004 00:51:56 -0000 On 14 Aug, Julian Elischer wrote: > Robert Watson wrote: >> rwatson 2004-08-14 17:15:16 UTC >> >> FreeBSD src repository >> >> Modified files: >> sys/kern kern_proc.c >> Log: >> Cause pfind() not to return processes in the PRS_NEW state. As a result, >> threads consuming the result of pfind() will not need to check for a NULL >> credential pointer or other signs of an incompletely created process. >> However, this also means that pfind() cannot be used to test for the >> existence or find such a process. Annotate pfind() to indicate that this >> is the case. A review of curent consumers seems to indicate that this is >> not a problem for any of them. This closes a number of race conditions >> that could result in NULL pointer dereferences and related failure modes. >> Other related races continue to exist, especially during iteration of the >> allproc list without due caution. > > possibly part of the answer would be to not put the proc on any queues until it > is more set up.. That's been my opinion for quite a while. I spent some time this morning rototilling fork1() to move pid selection and adding the child process to the allproc and other lists to one location in the code. I also gathered all the scheduler stuff in one spot. I think there are more things that should be done, such as moving the block of code projected by Giant that beings with vm_forkproc() to a spot above where we grab proctree_lock the final time. I think the _PHOLD() and _PRELE() calls could be moved with it. Maybe also move the EVENTHANDLER_INVOKE() call a bit later, near the KNOTE_LOCKED() call. This patch has survived a "make -j10 buildworld" and building some ports on a UP machine. Unfortunately this is not a good time for making extensive changes to the fork code. Index: sys/kern/kern_fork.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_fork.c,v retrieving revision 1.234 diff -u -r1.234 kern_fork.c --- sys/kern/kern_fork.c 15 Aug 2004 06:24:41 -0000 1.234 +++ sys/kern/kern_fork.c 15 Aug 2004 21:17:15 -0000 @@ -194,9 +194,8 @@ int pages; struct proc **procp; { - struct proc *p1, *p2, *pptr; + struct proc *p1, *p2, *p3, *pptr; uid_t uid; - struct proc *newproc; int ok, trypid; static int curfail, pidchecked = 0; static struct timeval lastfail; @@ -283,14 +282,11 @@ } /* Allocate new proc. */ - newproc = uma_zalloc(proc_zone, M_WAITOK); + p2 = uma_zalloc(proc_zone, M_WAITOK); #ifdef MAC - mac_init_proc(newproc); + mac_init_proc(p2); #endif - knlist_init(&newproc->p_klist, &newproc->p_mtx); - - /* We have to lock the process tree while we look for a pid. */ - sx_slock(&proctree_lock); + knlist_init(&p2->p_klist, &p2->p_mtx); /* * Although process entries are dynamically created, we still keep @@ -326,92 +322,6 @@ * are hard-limits as to the number of processes that can run. */ nprocs++; - - /* - * Find an unused process ID. We remember a range of unused IDs - * ready to use (from lastpid+1 through pidchecked-1). - * - * If RFHIGHPID is set (used during system boot), do not allocate - * low-numbered pids. - */ - trypid = lastpid + 1; - if (flags & RFHIGHPID) { - if (trypid < 10) - trypid = 10; - } else { - if (randompid) - trypid += arc4random() % randompid; - } -retry: - /* - * If the process ID prototype has wrapped around, - * restart somewhat above 0, as the low-numbered procs - * tend to include daemons that don't exit. - */ - if (trypid >= PID_MAX) { - trypid = trypid % PID_MAX; - if (trypid < 100) - trypid += 100; - pidchecked = 0; - } - if (trypid >= pidchecked) { - int doingzomb = 0; - - pidchecked = PID_MAX; - /* - * Scan the active and zombie procs to check whether this pid - * is in use. Remember the lowest pid that's greater - * than trypid, so we can avoid checking for a while. - */ - p2 = LIST_FIRST(&allproc); -again: - for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) { - PROC_LOCK(p2); - while (p2->p_pid == trypid || - (p2->p_pgrp != NULL && - (p2->p_pgrp->pg_id == trypid || - (p2->p_session != NULL && - p2->p_session->s_sid == trypid)))) { - trypid++; - if (trypid >= pidchecked) { - PROC_UNLOCK(p2); - goto retry; - } - } - if (p2->p_pid > trypid && pidchecked > p2->p_pid) - pidchecked = p2->p_pid; - if (p2->p_pgrp != NULL) { - if (p2->p_pgrp->pg_id > trypid && - pidchecked > p2->p_pgrp->pg_id) - pidchecked = p2->p_pgrp->pg_id; - if (p2->p_session != NULL && - p2->p_session->s_sid > trypid && - pidchecked > p2->p_session->s_sid) - pidchecked = p2->p_session->s_sid; - } - PROC_UNLOCK(p2); - } - if (!doingzomb) { - doingzomb = 1; - p2 = LIST_FIRST(&zombproc); - goto again; - } - } - sx_sunlock(&proctree_lock); - - /* - * RFHIGHPID does not mess with the lastpid counter during boot. - */ - if (flags & RFHIGHPID) - pidchecked = 0; - else - lastpid = trypid; - - p2 = newproc; - p2->p_state = PRS_NEW; /* protect against others */ - p2->p_pid = trypid; - LIST_INSERT_HEAD(&allproc, p2, p_list); - LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); sx_xunlock(&allproc_lock); /* @@ -511,15 +421,7 @@ p2->p_flag = 0; if (p1->p_flag & P_PROFIL) startprofclock(p2); - mtx_lock_spin(&sched_lock); - p2->p_sflag = PS_INMEM; - /* - * Allow the scheduler to adjust the priority of the child and - * parent while we hold the sched_lock. - */ - sched_fork(td, p2); - mtx_unlock_spin(&sched_lock); p2->p_ucred = crhold(td->td_ucred); td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */ @@ -551,42 +453,109 @@ if (p2->p_textvp) vref(p2->p_textvp); + sx_xlock(&proctree_lock); + /* We have to lock the allproc list while we look for a pid. */ + sx_xlock(&allproc_lock); + /* - * Set up linkage for kernel based threading. + * Find an unused process ID. We remember a range of unused IDs + * ready to use (from lastpid+1 through pidchecked-1). + * + * If RFHIGHPID is set (used during system boot), do not allocate + * low-numbered pids. */ - if ((flags & RFTHREAD) != 0) { - mtx_lock(&ppeers_lock); - p2->p_peers = p1->p_peers; - p1->p_peers = p2; - p2->p_leader = p1->p_leader; - mtx_unlock(&ppeers_lock); - PROC_LOCK(p1->p_leader); - if ((p1->p_leader->p_flag & P_WEXIT) != 0) { - PROC_UNLOCK(p1->p_leader); - /* - * The task leader is exiting, so process p1 is - * going to be killed shortly. Since p1 obviously - * isn't dead yet, we know that the leader is either - * sending SIGKILL's to all the processes in this - * task or is sleeping waiting for all the peers to - * exit. We let p1 complete the fork, but we need - * to go ahead and kill the new process p2 since - * the task leader may not get a chance to send - * SIGKILL to it. We leave it on the list so that - * the task leader will wait for this new process - * to commit suicide. - */ - PROC_LOCK(p2); - psignal(p2, SIGKILL); - PROC_UNLOCK(p2); - } else - PROC_UNLOCK(p1->p_leader); + trypid = lastpid + 1; + if (flags & RFHIGHPID) { + if (trypid < 10) + trypid = 10; } else { - p2->p_peers = NULL; - p2->p_leader = p2; + if (randompid) + trypid += arc4random() % randompid; + } +retry: + /* + * If the process ID prototype has wrapped around, + * restart somewhat above 0, as the low-numbered procs + * tend to include daemons that don't exit. + */ + if (trypid >= PID_MAX) { + trypid = trypid % PID_MAX; + if (trypid < 100) + trypid += 100; + pidchecked = 0; } + if (trypid >= pidchecked) { + int doingzomb = 0; + + pidchecked = PID_MAX; + /* + * Scan the active and zombie procs to check whether this pid + * is in use. Remember the lowest pid that's greater + * than trypid, so we can avoid checking for a while. + */ + p3 = LIST_FIRST(&allproc); +again: + for (; p3 != NULL; p3 = LIST_NEXT(p3, p_list)) { + PROC_LOCK(p3); + while (p3->p_pid == trypid || + (p3->p_pgrp != NULL && + (p3->p_pgrp->pg_id == trypid || + (p3->p_session != NULL && + p3->p_session->s_sid == trypid)))) { + trypid++; + if (trypid >= pidchecked) { + PROC_UNLOCK(p3); + goto retry; + } + } + if (p3->p_pid > trypid && pidchecked > p3->p_pid) + pidchecked = p3->p_pid; + if (p3->p_pgrp != NULL) { + if (p3->p_pgrp->pg_id > trypid && + pidchecked > p3->p_pgrp->pg_id) + pidchecked = p3->p_pgrp->pg_id; + if (p3->p_session != NULL && + p3->p_session->s_sid > trypid && + pidchecked > p3->p_session->s_sid) + pidchecked = p3->p_session->s_sid; + } + PROC_UNLOCK(p3); + } + if (!doingzomb) { + doingzomb = 1; + p3 = LIST_FIRST(&zombproc); + goto again; + } + } + + /* + * RFHIGHPID does not mess with the lastpid counter during boot. + */ + if (flags & RFHIGHPID) + pidchecked = 0; + else + lastpid = trypid; + + p2->p_state = PRS_NEW; /* protect against others */ + p2->p_pid = trypid; + LIST_INSERT_HEAD(&allproc, p2, p_list); + LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); + sx_xunlock(&allproc_lock); + + /* + * Attach the new process to its parent. + * + * If RFNOWAIT is set, the newly created process becomes a child + * of init. This effectively disassociates the child from the + * parent. + */ + if (flags & RFNOWAIT) + pptr = initproc; + else + pptr = p1; + p2->p_pptr = pptr; + LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); - sx_xlock(&proctree_lock); PGRP_LOCK(p1->p_pgrp); PROC_LOCK(p2); PROC_LOCK(p1); @@ -645,20 +614,42 @@ _PHOLD(p1); PROC_UNLOCK(p1); + sx_xunlock(&proctree_lock); + /* - * Attach the new process to its parent. - * - * If RFNOWAIT is set, the newly created process becomes a child - * of init. This effectively disassociates the child from the - * parent. + * Set up linkage for kernel based threading. */ - if (flags & RFNOWAIT) - pptr = initproc; - else - pptr = p1; - p2->p_pptr = pptr; - LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); - sx_xunlock(&proctree_lock); + if ((flags & RFTHREAD) != 0) { + mtx_lock(&ppeers_lock); + p2->p_peers = p1->p_peers; + p1->p_peers = p2; + p2->p_leader = p1->p_leader; + mtx_unlock(&ppeers_lock); + PROC_LOCK(p1->p_leader); + if ((p1->p_leader->p_flag & P_WEXIT) != 0) { + PROC_UNLOCK(p1->p_leader); + /* + * The task leader is exiting, so process p1 is + * going to be killed shortly. Since p1 obviously + * isn't dead yet, we know that the leader is either + * sending SIGKILL's to all the processes in this + * task or is sleeping waiting for all the peers to + * exit. We let p1 complete the fork, but we need + * to go ahead and kill the new process p2 since + * the task leader may not get a chance to send + * SIGKILL to it. We leave it on the list so that + * the task leader will wait for this new process + * to commit suicide. + */ + PROC_LOCK(p2); + psignal(p2, SIGKILL); + PROC_UNLOCK(p2); + } else + PROC_UNLOCK(p1->p_leader); + } else { + p2->p_peers = NULL; + p2->p_leader = p2; + } /* Inform accounting that we have forked. */ p2->p_acflag = AFORK; @@ -700,10 +691,18 @@ /* * Set the child start time and mark the process as being complete. */ + PROC_LOCK(p2); + PROC_LOCK(p1); microuptime(&p2->p_stats->p_start); mtx_lock_spin(&sched_lock); + p2->p_sflag = PS_INMEM; p2->p_state = PRS_NORMAL; - + /* + * Allow the scheduler to adjust the priority of the child and + * parent while we hold the sched_lock. + */ + sched_fork(td, p2); + PROC_UNLOCK(p2); /* * If RFSTOPPED not requested, make child runnable and add to * run queue. @@ -717,7 +716,6 @@ /* * Now can be swapped. */ - PROC_LOCK(p1); _PRELE(p1); /* @@ -752,15 +750,14 @@ *procp = p2; return (0); fail: - sx_sunlock(&proctree_lock); if (ppsratecheck(&lastfail, &curfail, 1)) printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n", uid); sx_xunlock(&allproc_lock); #ifdef MAC - mac_destroy_proc(newproc); + mac_destroy_proc(p2); #endif - uma_zfree(proc_zone, newproc); + uma_zfree(proc_zone, p2); if (p1->p_flag & P_SA) { PROC_LOCK(p1); thread_single_end();