From owner-freebsd-bugs Wed Jun 28 16:16:50 2000 Delivered-To: freebsd-bugs@freebsd.org Received: from ns.tar.com (ns.tar.com [204.95.187.2]) by hub.freebsd.org (Postfix) with ESMTP id D0A5437BA4A for ; Wed, 28 Jun 2000 16:16:26 -0700 (PDT) (envelope-from dick@tar.com) Received: from test.tar.com (test [204.95.187.4]) by ns.tar.com (8.9.3/8.9.3) with ESMTP id SAA88717; Wed, 28 Jun 2000 18:15:57 -0500 (CDT) (envelope-from dick@tar.com) Received: by test.tar.com (Postfix, from userid 1000) id 3F90D81D4A; Wed, 28 Jun 2000 18:15:56 -0500 (CDT) Date: Wed, 28 Jun 2000 18:15:56 -0500 From: "Richard Seaman, Jr." To: Andy Farkas Cc: freebsd-bugs@FreeBSD.ORG Subject: Re: kern/12758: idprio-related panic in -stable and -current Message-ID: <20000628181556.A402@tar.com> References: <200006281600.JAA75299@freefall.freebsd.org> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="45Z9DzgjV8m4Oswq" X-Mailer: Mutt 1.0.1i In-Reply-To: <200006281600.JAA75299@freefall.freebsd.org>; from andyf@speednet.com.au on Wed, Jun 28, 2000 at 09:00:02AM -0700 Sender: owner-freebsd-bugs@FreeBSD.ORG Precedence: bulk X-Loop: FreeBSD.org --45Z9DzgjV8m4Oswq Content-Type: text/plain; charset=us-ascii On Wed, Jun 28, 2000 at 09:00:02AM -0700, Andy Farkas wrote: > This PR is comming up to its first year anniversary! > > I just tried it on a 4.0-STABLE built Jun 28 2000 and got a "kernel trap > 12 with interrupts disabled" followed by two(!) "Fatal trap 12: page fault > while in kernel mode" dumps. Reproducable. > > It triggers when you kill the idprio'd process, which is a bit different > #include #include +#include /* For need_resched */ +#include /* For need_resched */ static int donice __P((struct proc *curp, struct proc *chgp, int n)); /* dosetrlimit non-static: Needed by SysVR4 emulator */ @@ -233,37 +235,54 @@ struct proc *curp; register struct rtprio_args *uap; { - register struct proc *p; - register struct pcred *pcred = curp->p_cred; struct rtprio rtp; int error; - error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); - if (error) - return (error); + switch (uap->function) { + case RTP_LOOKUP: + error = dortprio(curp, uap->pid, &rtp, uap->function); + if (error) + return (error); + return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); + case RTP_SET: + error = copyin(uap->rtp, &rtp, sizeof(struct rtprio)); + if (error) + return (error); + return (dortprio(curp, uap->pid, &rtp, uap->function)); + } + return (EINVAL); +} - if (uap->pid == 0) +int dortprio(struct proc *curp, pid_t pid, struct rtprio *rtp, int function) +{ + register struct pcred *pcred = curp->p_cred; + register struct proc *p; + int c_class, p_class; + + if (pid == 0) p = curp; else - p = pfind(uap->pid); + p = pfind(pid); if (p == 0) return (ESRCH); - switch (uap->function) { - case RTP_LOOKUP: - return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio))); - case RTP_SET: - if (pcred->pc_ucred->cr_uid && pcred->p_ruid && - pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid && - pcred->p_ruid != p->p_ucred->cr_uid) - return (EPERM); - /* disallow setting rtprio in most cases if not superuser */ - if (suser(curp)) { - /* can't set someone else's */ - if (uap->pid) - return (EPERM); - /* can't set realtime priority */ + if (function == RTP_LOOKUP) { + *rtp = p->p_rtprio; + return (0); + } else if (function != RTP_SET) + return (EINVAL); + + if (pcred->pc_ucred->cr_uid && pcred->p_ruid && + pcred->pc_ucred->cr_uid != p->p_ucred->cr_uid && + pcred->p_ruid != p->p_ucred->cr_uid) + return (EPERM); + /* disallow setting rtprio in most cases if not superuser */ + if (suser(curp)) { + /* can't set someone else's */ + if (pid) + return (EPERM); + /* can't set realtime priority */ /* * Realtime priority has to be restricted for reasons which should be * obvious. However, for idle priority, there is a potential for @@ -272,29 +291,76 @@ * due to a CPU-bound normal process). Fix me! XXX */ #if 0 - if (RTP_PRIO_IS_REALTIME(rtp.type)) + if (RTP_PRIO_IS_REALTIME(rtp->type)) #endif - if (rtp.type != RTP_PRIO_NORMAL) + if (rtp->type != RTP_PRIO_NORMAL) return (EPERM); - } - switch (rtp.type) { + } + + /* + * We are implicitly assuming that RTP_PRIO_MIN and + * RTP_NORMAL_PRIO_MIN are zero. This is currently + * true. If this changes, we should explicitly test + * against the MIN values. + */ + switch (rtp->type) { #ifdef RTP_PRIO_FIFO - case RTP_PRIO_FIFO: + case RTP_PRIO_FIFO: #endif - case RTP_PRIO_REALTIME: - case RTP_PRIO_NORMAL: - case RTP_PRIO_IDLE: - if (rtp.prio > RTP_PRIO_MAX) - return (EINVAL); - p->p_rtprio = rtp; - return (0); - default: + case RTP_PRIO_REALTIME: + case RTP_PRIO_IDLE: + if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); - } - + break; + case RTP_PRIO_NORMAL: + if (rtp->prio > RTP_NORMAL_PRIO_MAX) + return (EINVAL); + break; default: return (EINVAL); } + + if (p != curproc) { + maybe_resched(p); + } else { + /* + * XXX maybe_resched is broken for the REALTIME case if + * p == curproc. For now we work around it. Possibly + * we are the only potential caller of maybe_resched() + * involving priority changes to types other than + * RTP_PRIO_NORMAL. If so, perhaps there is no need to + * fix maybe_resched(). + */ + c_class = RTP_PRIO_BASE(p->p_rtprio.type); + p_class = RTP_PRIO_BASE(rtp->type); + if (p_class != c_class) { + if (p_class > c_class) + need_resched(); + } else if (p_class == RTP_PRIO_NORMAL) { + maybe_resched(p); + } else if (rtp->prio > p->p_rtprio.prio) { + need_resched(); + } + } + /* + * If p is on a runqueue and the priority has changed, we + * need to put it on a new runqueue. + */ + if ((p != curproc) && +#ifdef SMP + p->p_oncpu == 0xff && /* idle */ +#endif + p->p_stat == SRUN && + (p->p_flag & P_INMEM) && + (p->p_rtprio.type != rtp->type || + p->p_rtprio.prio != rtp->prio)) { + remrunqueue(p); + p->p_rtprio = *rtp; + setrunqueue(p); + } else + p->p_rtprio = *rtp; + + return (0); } #if defined(COMPAT_43) || defined(COMPAT_SUNOS) Index: kern/kern_synch.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v retrieving revision 1.93 diff -u -r1.93 kern_synch.c --- kern/kern_synch.c 2000/05/26 02:04:35 1.93 +++ kern/kern_synch.c 2000/06/28 18:51:24 @@ -70,7 +70,6 @@ static int curpriority_cmp __P((struct proc *p)); static void endtsleep __P((void *)); -static void maybe_resched __P((struct proc *chk)); static void roundrobin __P((void *arg)); static void schedcpu __P((void *arg)); static void updatepri __P((struct proc *p)); @@ -103,6 +102,18 @@ * if p is on the same scheduler as curproc. Otherwise the process on the * more realtimeish scheduler has lowest priority. As usual, a higher * priority really means a lower priority. + * + * XXXX It is possible that p and curproc are the same when this is called. + * In this case a change in class will not be detected, and the comparison of + * the prio values when the classes are the same and they are not RTP_PRIO_NORMAL + * is meaningless since the prio will always be identical. This comparison + * should involve p and the curproc's priority before any changes (eg. its + * priority when it entered the kernel). + * + * XXXX In the case p_class == RTP_PRIO_NORMAL, this comparison may not be + * correct since we can get called after p->p_usrpri has been changed, but + * before p->p_priority has been adjusted. Possibly the correct comparison + * in this case is (p->p_usrpri - curpriority)? */ static int curpriority_cmp(p) @@ -123,7 +134,7 @@ * Arrange to reschedule if necessary, taking the priorities and * schedulers into account. */ -static void +void maybe_resched(chk) struct proc *chk; { Index: posix4/p1003_1b.c =================================================================== RCS file: /home/ncvs/src/sys/posix4/p1003_1b.c,v retrieving revision 1.6 diff -u -r1.6 p1003_1b.c --- posix4/p1003_1b.c 2000/05/01 20:26:16 1.6 +++ posix4/p1003_1b.c 2000/06/27 01:22:57 @@ -44,213 +44,213 @@ #include #include #include +#include /* For need_resched */ +#include /* For need_resched */ #include MALLOC_DEFINE(M_P31B, "p1003.1b", "Posix 1003.1B"); -/* p31b_proc: Return a proc struct corresponding to a pid to operate on. - * - * Enforce permission policy. - * - * The policy is the same as for sending signals except there - * is no notion of process groups. - * - * pid == 0 means my process. - * - * This is disabled until I've got a permission gate in again: - * only root can do this. - */ - -#if 0 -/* - * This is stolen from CANSIGNAL in kern_sig: - * - * Can process p, with pcred pc, do "write flavor" operations to process q? - */ -#define CAN_AFFECT(p, pc, q) \ - ((pc)->pc_ucred->cr_uid == 0 || \ - (pc)->p_ruid == (q)->p_cred->p_ruid || \ - (pc)->pc_ucred->cr_uid == (q)->p_cred->p_ruid || \ - (pc)->p_ruid == (q)->p_ucred->cr_uid || \ - (pc)->pc_ucred->cr_uid == (q)->p_ucred->cr_uid) -#else -#define CAN_AFFECT(p, pc, q) ((pc)->pc_ucred->cr_uid == 0) -#endif - -/* - * p31b_proc: Look up a proc from a PID. If proc is 0 it is - * my own proc. - */ -int p31b_proc(struct proc *p, pid_t pid, struct proc **pp) -{ - int ret = 0; - struct proc *other_proc = 0; - - if (pid == 0) - other_proc = p; - else - other_proc = pfind(pid); - - if (other_proc) - { - /* Enforce permission policy. - */ - if (CAN_AFFECT(p, p->p_cred, other_proc)) - *pp = other_proc; - else - ret = EPERM; - } - else - ret = ESRCH; +struct ksched { + struct timespec rr_interval; +}; - return ret; -} - -/* The system calls return ENOSYS if an entry is called that is - * not run-time supported. I am also logging since some programs - * start to use this when they shouldn't. That will be removed if annoying. - */ -int -syscall_not_present(struct proc *p, const char *s, struct nosys_args *uap) -{ - log(LOG_ERR, "cmd %s pid %d tried to use non-present %s\n", - p->p_comm, p->p_pid, s); - - /* a " return nosys(p, uap); " here causes a core dump. - */ - - return ENOSYS; -} -#if !defined(_KPOSIX_PRIORITY_SCHEDULING) +static struct ksched *ksched; -/* Not configured but loadable via a module: - */ +#define p4prio_to_rtpprio(P) (RTP_PRIO_MAX - (P)) +#define rtpprio_to_p4prio(P) (RTP_PRIO_MAX - (P)) +#define p4prio_to_normalprio(P) (RTP_NORMAL_PRIO_MAX - (P)) +#define normalprio_to_p4prio(P) (RTP_NORMAL_PRIO_MAX - (P)) + +/* These improve readability a bit for me: + */ +#define P1B_PRIO_MIN rtpprio_to_p4prio(RTP_PRIO_MAX) +#define P1B_PRIO_MAX rtpprio_to_p4prio(RTP_PRIO_MIN) +#define P1B_NORMAL_PRIO_MIN normalprio_to_p4prio(RTP_NORMAL_PRIO_MAX) +#define P1B_NORMAL_PRIO_MAX normalprio_to_p4prio(RTP_NORMAL_PRIO_MIN) static int sched_attach(void) { - return 0; -} + struct ksched *ksched = p31b_malloc(sizeof(*ksched)); -SYSCALL_NOT_PRESENT_GEN(sched_setparam) -SYSCALL_NOT_PRESENT_GEN(sched_getparam) -SYSCALL_NOT_PRESENT_GEN(sched_setscheduler) -SYSCALL_NOT_PRESENT_GEN(sched_getscheduler) -SYSCALL_NOT_PRESENT_GEN(sched_yield) -SYSCALL_NOT_PRESENT_GEN(sched_get_priority_max) -SYSCALL_NOT_PRESENT_GEN(sched_get_priority_min) -SYSCALL_NOT_PRESENT_GEN(sched_rr_get_interval) + ksched->rr_interval.tv_sec = 0; + ksched->rr_interval.tv_nsec = 1000000000L / roundrobin_interval(); -#else + p31b_setcfg(CTL_P1003_1B_PRIORITY_SCHEDULING, 1); -/* Configured in kernel version: - */ -static struct ksched *ksched; - -static int sched_attach(void) -{ - int ret = ksched_attach(&ksched); - - if (ret == 0) - p31b_setcfg(CTL_P1003_1B_PRIORITY_SCHEDULING, 1); - - return ret; + return (0); } int sched_setparam(struct proc *p, struct sched_setparam_args *uap) { + struct sched_param sched_param; + struct rtprio rtp; int e; - struct sched_param sched_param; - copyin(uap->param, &sched_param, sizeof(sched_param)); + e = copyin(uap->param, &sched_param, sizeof(sched_param)); + if (e != 0) + return (e); - (void) (0 - || (e = p31b_proc(p, uap->pid, &p)) - || (e = ksched_setparam(&p->p_retval[0], ksched, p, - (const struct sched_param *)&sched_param)) - ); + rtp = p->p_rtprio; + rtp.prio = sched_param.sched_priority; - return e; + return (dortprio (p, p->p_pid, &rtp, RTP_SET)); } int sched_getparam(struct proc *p, struct sched_getparam_args *uap) { - int e; + struct rtprio rtp; struct sched_param sched_param; + int e; - (void) (0 - || (e = p31b_proc(p, uap->pid, &p)) - || (e = ksched_getparam(&p->p_retval[0], ksched, p, &sched_param)) - ); + /* + * We could skip dortprio and just access p->p_rtprio directly here. + * But we go thru dortprio in case someone ever wants to add any + * credential checking to our ability to access this information. + * This would logically be placed in dortprio(). + */ + e = dortprio(p, 0, &rtp, RTP_LOOKUP); + if (e) + return (e); - if (!e) - copyout(&sched_param, uap->param, sizeof(sched_param)); + switch (rtp.type) + { + case RTP_PRIO_FIFO: + case RTP_PRIO_REALTIME: + case RTP_PRIO_IDLE: + sched_param.sched_priority = rtpprio_to_p4prio(rtp.prio); + break; + + case RTP_PRIO_NORMAL: + sched_param.sched_priority = normalprio_to_p4prio(rtp.prio); + break; + } - return e; + return (copyout(&sched_param, uap->param, sizeof(sched_param))); } + int sched_setscheduler(struct proc *p, struct sched_setscheduler_args *uap) { - int e; + int e = 0; + struct sched_param param; + struct rtprio rtp; + + e = copyin(uap->param, ¶m, sizeof(param)); + if (e != 0) + return (EINVAL); - struct sched_param sched_param; - copyin(uap->param, &sched_param, sizeof(sched_param)); - - (void) (0 - || (e = p31b_proc(p, uap->pid, &p)) - || (e = ksched_setscheduler(&p->p_retval[0], - ksched, p, uap->policy, - (const struct sched_param *)&sched_param)) - ); + switch(uap->policy) + { + case SCHED_RR: + rtp.prio = p4prio_to_rtpprio(param.sched_priority); + rtp.type = RTP_PRIO_REALTIME; + break; + case SCHED_FIFO: + rtp.prio = p4prio_to_rtpprio(param.sched_priority); + rtp.type = RTP_PRIO_FIFO; + break; + case SCHED_IDLE: + rtp.prio = p4prio_to_rtpprio(param.sched_priority); + rtp.type = RTP_PRIO_IDLE; + break; + case SCHED_OTHER: + rtp.prio = p4prio_to_normalprio(param.sched_priority); + rtp.type = RTP_PRIO_NORMAL; + break; + default: + return (EPERM); + } - return e; + return dortprio(p, uap->pid, &rtp, RTP_SET); } + int sched_getscheduler(struct proc *p, struct sched_getscheduler_args *uap) { + struct rtprio rtp; int e; - (void) (0 - || (e = p31b_proc(p, uap->pid, &p)) - || (e = ksched_getscheduler(&p->p_retval[0], ksched, p)) - ); - return e; + e = dortprio(p, uap->pid, &rtp, RTP_LOOKUP); + if (e) + return (e); + + p->p_retval[0] = rtp.type; + return (0); } + int sched_yield(struct proc *p, struct sched_yield_args *uap) { - return ksched_yield(&p->p_retval[0], ksched); + need_resched(); + return (0); } + +/* No credential checking here. Can't imagine we'd ever want it */ int sched_get_priority_max(struct proc *p, struct sched_get_priority_max_args *uap) { - return ksched_get_priority_max(&p->p_retval[0], - ksched, uap->policy); + int e = 0; + + switch (uap->policy) + { + case SCHED_FIFO: + case SCHED_RR: + case SCHED_IDLE: + p->p_retval[0] = P1B_PRIO_MAX; + break; + + case SCHED_OTHER: + p->p_retval[0] = P1B_NORMAL_PRIO_MAX; + break; + + default: + e = EINVAL; + } + + return e; + } + +/* No credential checking here. Can't imagine we'd ever want it */ int sched_get_priority_min(struct proc *p, struct sched_get_priority_min_args *uap) { - return ksched_get_priority_min(&p->p_retval[0], - ksched, uap->policy); + int e = 0; + + switch (uap->policy) + { + case SCHED_FIFO: + case SCHED_RR: + case SCHED_IDLE: + p->p_retval[0] = P1B_PRIO_MIN; + break; + + case SCHED_OTHER: + p->p_retval[0] = P1B_NORMAL_PRIO_MIN; + break; + + default: + e = EINVAL; + } + + return e; } + +/* No credential checking here. Can't imagine we'd ever want it */ int sched_rr_get_interval(struct proc *p, struct sched_rr_get_interval_args *uap) { int e; - (void) (0 - || (e = p31b_proc(p, uap->pid, &p)) - || (e = ksched_rr_get_interval(&p->p_retval[0], ksched, - p, uap->interval)) - ); - - return e; + e = copyin(uap->interval, &ksched->rr_interval, sizeof(uap->interval)); + if (e != 0) + return (EINVAL); + return 0; } - -#endif static void p31binit(void *notused) { Index: posix4/posix4.h =================================================================== RCS file: /home/ncvs/src/sys/posix4/posix4.h,v retrieving revision 1.6 diff -u -r1.6 posix4.h --- posix4/posix4.h 1999/12/27 10:22:09 1.6 +++ posix4/posix4.h 2000/06/27 11:38:31 @@ -41,21 +41,6 @@ #include #include -/* Generate syscall stubs for when something is optionally - * loadable as a module. References "syscall_not_present". - * XXX Good candidate for sys/syscall.h - */ -struct proc; -struct nosys_args; -extern int syscall_not_present(struct proc *, const char *, struct nosys_args *); - -#define SYSCALL_NOT_PRESENT_GEN(SC) \ -int SC (struct proc *p, struct SC##_args *uap) \ -{ \ - return syscall_not_present(p, #SC , (struct nosys_args *)uap); \ -} - - MALLOC_DECLARE(M_P31B); #define p31b_malloc(SIZE) malloc((SIZE), M_P31B, M_WAITOK) @@ -64,53 +49,5 @@ int p31b_proc __P((struct proc *, pid_t, struct proc **)); void p31b_setcfg __P((int, int)); - -#ifdef _KPOSIX_PRIORITY_SCHEDULING - -/* - * KSCHED_OP_RW is a vector of read/write flags for each entry indexed - * by the enum ksched_op. - * - * 1 means you need write access, 0 means read is sufficient. - */ - -enum ksched_op { - -#define KSCHED_OP_RW { 1, 0, 1, 0, 0, 0, 0, 0 } - - SCHED_SETPARAM, - SCHED_GETPARAM, - SCHED_SETSCHEDULER, - SCHED_GETSCHEDULER, - SCHED_YIELD, - SCHED_GET_PRIORITY_MAX, - SCHED_GET_PRIORITY_MIN, - SCHED_RR_GET_INTERVAL, - SCHED_OP_MAX -}; - -struct ksched; - -int ksched_attach(struct ksched **); -int ksched_detach(struct ksched *); - -int ksched_setparam(register_t *, struct ksched *, - struct proc *, const struct sched_param *); -int ksched_getparam(register_t *, struct ksched *, - struct proc *, struct sched_param *); - -int ksched_setscheduler(register_t *, struct ksched *, - struct proc *, int, const struct sched_param *); -int ksched_getscheduler(register_t *, struct ksched *, struct proc *); - -int ksched_yield(register_t *, struct ksched *); - -int ksched_get_priority_max(register_t *, struct ksched *, int); -int ksched_get_priority_min(register_t *, struct ksched *, int); - -int ksched_rr_get_interval(register_t *, struct ksched *, - struct proc *, struct timespec *); - -#endif /* _KPOSIX_PRIORITY_SCHEDULING */ #endif /* _P1003_1B_P1003_1B_H_ */ Index: posix4/sched.h =================================================================== RCS file: /home/ncvs/src/sys/posix4/sched.h,v retrieving revision 1.4 diff -u -r1.4 sched.h --- posix4/sched.h 1999/12/29 04:55:02 1.4 +++ posix4/sched.h 2000/06/26 20:09:45 @@ -49,6 +49,7 @@ #define SCHED_FIFO 1 #define SCHED_OTHER 2 #define SCHED_RR 3 +#define SCHED_IDLE 4 struct sched_param { Index: sys/proc.h =================================================================== RCS file: /home/ncvs/src/sys/sys/proc.h,v retrieving revision 1.106 diff -u -r1.106 proc.h --- sys/proc.h 2000/06/22 22:27:16 1.106 +++ sys/proc.h 2000/06/28 18:51:16 @@ -430,6 +430,7 @@ void procinit __P((void)); int p_trespass __P((struct proc *p1, struct proc *p2)); void resetpriority __P((struct proc *)); +void maybe_resched __P((struct proc *chk)); int roundrobin_interval __P((void)); void schedclock __P((struct proc *)); void setrunnable __P((struct proc *)); Index: sys/rtprio.h =================================================================== RCS file: /home/ncvs/src/sys/sys/rtprio.h,v retrieving revision 1.9 diff -u -r1.9 rtprio.h --- sys/rtprio.h 1999/12/29 04:24:46 1.9 +++ sys/rtprio.h 2000/06/27 01:28:31 @@ -56,6 +56,8 @@ /* priority range */ #define RTP_PRIO_MIN 0 /* Highest priority */ #define RTP_PRIO_MAX 31 /* Lowest priority */ +#define RTP_NORMAL_PRIO_MIN 0 /* Highest priority */ +#define RTP_NORMAL_PRIO_MAX 31 /* Lowest priority */ /* * rtprio() syscall functions @@ -68,6 +70,7 @@ u_short type; u_short prio; }; +int dortprio __P((struct proc *curp, pid_t pid, struct rtprio *rtp, int function)); #endif #ifndef _KERNEL @@ -75,6 +78,9 @@ __BEGIN_DECLS int rtprio __P((int, pid_t, struct rtprio *)); + __END_DECLS +#else + #endif /* !_KERNEL */ #endif /* !_SYS_RTPRIO_H_ */ --45Z9DzgjV8m4Oswq-- To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-bugs" in the body of the message