Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 4 Oct 2002 15:41:41 +0200
From:      Stefan Farfeleder <e0026813@stud3.tuwien.ac.at>
To:        Poul-Henning Kamp <phk@critter.freebsd.dk>
Cc:        current@FreeBSD.ORG
Subject:   Re: Junior Kernel Hacker page updated...
Message-ID:  <20021004134141.GA263@frog.fafoe>
In-Reply-To: <4860.1033656106@critter.freebsd.dk>
References:  <20021002214028.GA94673@frog.fafoe> <4860.1033656106@critter.freebsd.dk>

next in thread | previous in thread | raw e-mail | index | archive | help

--NMuMz9nt05w80d4+
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Thu, Oct 03, 2002 at 04:41:46PM +0200, Poul-Henning Kamp wrote:
> 
> Hi Stefan,
> 
> I tried this patch and it paniced my (almost-) current machine with
> a pagefault in the kqueue code:  Bravo!
> 
> I can see that there is some amount of #ifdef stuff in your patch,

The #ifdefs are already in the code, namely REMOTE and RMT_WILL_WATCH.
Is anybody using them? Building with -DREMOTE doesn't compile and with
-DRMT_WILL_WATCH the linker is complaining about the lack of the
functions Rmt_Ignore(), Rmt_Watch() and Rmt_Wait(). Can't we get rid of
those defines? I understand Juli Mallett wants to rewrite make, so maybe
this effort would be wasted.

> in light of that, would it be possible to make an #ifdef'ed version
> of your patch which we could commit ?

Ok, the new patch is attached. Compile with -DUSE_KQUEUE to use the new
code.

> That way we give the kqueue hackers a good testcase, and we can
> easily enable when they have solved the problem.

After Don Lewis fixed the 'could sleep with' problem (thanks!), I'm
still encountering freezes and panics. Here's one I caught:

[warning: parts are typed in]
%%%
Fatal trap 12: page fault while in kernel mode
cpuid = 0; lapic.id = 00000000
fault virtual address   = 0x8
fault code              = supervisor read, page not present
instruction pointer     = 0x8:0xc01a1212
stack pointer           = 0x10:0xe5226c14
frame pointer           = 0x10:0xe5226ca0
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, def32 1, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 56525 (make)

kernel: type 12 trap, code = 0

Stopped at    kqueue_scan+0x242:  cmpl $0,0x8(%ebx)
db> trace
kqueue_scan(c6472bf4,4,bfbfebc0,0,c70ecea0) at kqueue_scan+0x242
kevent(c70ecea0,e5226d10,c0351d80,418,6) at kevent+0x1e1
syscall(2f,2f,2f,818d780,818d960) at syscall+0x2be
%%%

The core file doesn't seem to be particularly helpful, as the
kqueue_scan frame seems to miss:

%%%
(kgdb) bt
#0  doadump () at /freebsd/current/src/sys/kern/kern_shutdown.c:223
#1  0xc01ba92a in boot (howto=260)
    at /freebsd/current/src/sys/kern/kern_shutdown.c:355
#2  0xc01babe7 in panic () at /freebsd/current/src/sys/kern/kern_shutdown.c:508
#3  0xc013b1d2 in db_panic () at /freebsd/current/src/sys/ddb/db_command.c:450
#4  0xc013b152 in db_command (last_cmdp=0xc035db20, cmd_table=0x0, 
    aux_cmd_tablep=0xc03577dc, aux_cmd_tablep_end=0xc03577e0)
    at /freebsd/current/src/sys/ddb/db_command.c:346
#5  0xc013b266 in db_command_loop ()
    at /freebsd/current/src/sys/ddb/db_command.c:472
#6  0xc013deca in db_trap (type=12, code=0)
    at /freebsd/current/src/sys/ddb/db_trap.c:72
#7  0xc02e9f60 in kdb_trap (type=12, code=0, regs=0xe5226bd4)
    at /freebsd/current/src/sys/i386/i386/db_interface.c:166
#8  0xc0302602 in trap_fatal (frame=0xe5226bd4, eva=0)
    at /freebsd/current/src/sys/i386/i386/trap.c:841
#9  0xc03022e2 in trap_pfault (frame=0xe5226bd4, usermode=0, eva=8)
    at /freebsd/current/src/sys/i386/i386/trap.c:760
#10 0xc0301e0d in trap (frame=
      {tf_fs = 24, tf_es = 16, tf_ds = 16, tf_edi = -943606528, tf_esi = -943606488, tf_ebp = -450728800, tf_isp = -450728960, tf_ebx = 0, tf_edx = 4, tf_ecx = 1, tf_eax = 0, tf_trapno = 12, tf_err = 0, tf_eip = -1072033262, tf_cs = 8, tf_eflags = 66050, tf_esp = -966356416, tf_ss = 0})
---Type <return> to continue, or q <return> to quit---
    at /freebsd/current/src/sys/i386/i386/trap.c:446
#11 0xc02eb768 in calltrap () at {standard input}:99
#12 0xc01a0ad1 in kevent (td=0xc70ecea0, uap=0xe5226d10)
    at /freebsd/current/src/sys/kern/kern_event.c:470
#13 0xc030299e in syscall (frame=
      {tf_fs = 47, tf_es = 47, tf_ds = 47, tf_edi = 135845760, tf_esi = 135846240, tf_ebp = -1077941224, tf_isp = -450728588, tf_ebx = 134831872, tf_edx = 2184, tf_ecx = 0, tf_eax = 363, tf_trapno = 12, tf_err = 2, tf_eip = 134626551, tf_cs = 31, tf_eflags = 514, tf_esp = -1077941348, tf_ss = 47})
    at /freebsd/current/src/sys/i386/i386/trap.c:1050
#14 0xc02eb7bd in Xint0x80_syscall () at {standard input}:141
---Can't read userspace from dump, or kernel process---
%%%

objdump -dS kern_event.o says the following about kqueue_scan+0x242
[==0xfd2]:

%%%
     fce:       90                      nop    
     fcf:       90                      nop    
                kn = TAILQ_FIRST(&kq->kq_head);
     fd0:       8b 1f                   mov    (%edi),%ebx
                TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe); 
     fd2:       83 7b 08 00             cmpl   $0x0,0x8(%ebx)
     fd6:       74 0b                   je     fe3 <kqueue_scan+0x253>
     fd8:       8b 53 08                mov    0x8(%ebx),%edx
     fdb:       8b 43 0c                mov    0xc(%ebx),%eax
     fde:       89 42 0c                mov    %eax,0xc(%edx)
     fe1:       eb 06                   jmp    fe9 <kqueue_scan+0x259>
     fe3:       8b 43 0c                mov    0xc(%ebx),%eax
     fe6:       89 47 04                mov    %eax,0x4(%edi)
     fe9:       8b 43 0c                mov    0xc(%ebx),%eax
     fec:       8b 53 08                mov    0x8(%ebx),%edx
     fef:       89 10                   mov    %edx,(%eax)
                if (kn == &marker) {
     ff1:       8d 45 b4                lea    0xffffffb4(%ebp),%eax
     ff4:       39 c3                   cmp    %eax,%ebx
     ff6:       75 18                   jne    1010 <kqueue_scan+0x280>
                        splx(s);
[...]
%%%

This seems to indicate that TAILQ_FIRST returns a null pointer and
TAILQ_REMOVE tries to dereference it, doesn't it? Maybe 'count' is not 0
though it should be?

Regards,
Stefan Farfeleder

--NMuMz9nt05w80d4+
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="make-cond.diff"

Index: job.c
===================================================================
RCS file: /home/ncvs/src/usr.bin/make/job.c,v
retrieving revision 1.43
diff -u -r1.43 job.c
--- job.c	29 Sep 2002 00:20:28 -0000	1.43
+++ job.c	4 Oct 2002 10:58:52 -0000
@@ -106,6 +106,7 @@
 #include <sys/stat.h>
 #include <sys/file.h>
 #include <sys/time.h>
+#include <sys/event.h>
 #include <sys/wait.h>
 #include <err.h>
 #include <errno.h>
@@ -237,9 +238,13 @@
 				 * (2) a job can only be run locally, but
 				 * nLocal equals maxLocal */
 #ifndef RMT_WILL_WATCH
+#ifdef USE_KQUEUE
+static int	kqfd;		/* File descriptor obtained by kqueue() */
+#else
 static fd_set  	outputs;    	/* Set of descriptors of pipes connected to
 				 * the output channels of children */
 #endif
+#endif
 
 STATIC GNode   	*lastNode;	/* The node for which output was most recently
 				 * produced. */
@@ -692,7 +697,7 @@
     if (usePipes) {
 #ifdef RMT_WILL_WATCH
 	Rmt_Ignore(job->inPipe);
-#else
+#elif !defined(USE_KQUEUE)
 	FD_CLR(job->inPipe, &outputs);
 #endif
 	if (job->outPipe != job->inPipe) {
@@ -1267,10 +1272,22 @@
 	     * position in the buffer to the beginning and mark another
 	     * stream to watch in the outputs mask
 	     */
+#ifdef USE_KQUEUE
+	    struct kevent	kev[2];
+#endif
 	    job->curPos = 0;
 
 #ifdef RMT_WILL_WATCH
 	    Rmt_Watch(job->inPipe, JobLocalInput, job);
+#elif defined(USE_KQUEUE)
+	    EV_SET(&kev[0], job->inPipe, EVFILT_READ, EV_ADD, 0, 0, job);
+	    EV_SET(&kev[1], job->pid, EVFILT_PROC, EV_ADD | EV_ONESHOT,
+		NOTE_EXIT, 0, NULL);
+	    if (kevent(kqfd, kev, 2, NULL, 0, NULL) != 0) {
+		/* kevent() will fail if the job is already finished */
+		if (errno != EBADF && errno != ESRCH)
+		    Punt("kevent: %s", strerror(errno));
+	    }
 #else
 	    FD_SET(job->inPipe, &outputs);
 #endif /* RMT_WILL_WATCH */
@@ -2229,10 +2246,16 @@
 Job_CatchOutput()
 {
     int           	  nfds;
+#ifdef USE_KQUEUE
+#define KEV_SIZE	4
+    struct kevent	  kev[KEV_SIZE];
+    int			  i;
+#else
     struct timeval	  timeout;
     fd_set           	  readfds;
     LstNode		  ln;
     Job		   	  *job;
+#endif
 #ifdef RMT_WILL_WATCH
     int	    	  	  pnJobs;   	/* Previous nJobs */
 #endif
@@ -2262,6 +2285,27 @@
     }
 #else
     if (usePipes) {
+#ifdef USE_KQUEUE
+	if ((nfds = kevent(kqfd, NULL, 0, kev, KEV_SIZE, NULL)) == -1) {
+	    Punt("kevent: %s", strerror(errno));
+	} else {
+	    for (i = 0; i < nfds; i++) {
+		if (kev[i].flags & EV_ERROR) {
+		    warnc(kev[i].data, "kevent");
+		    continue;
+		}
+		switch (kev[i].filter) {
+		case EVFILT_READ:
+		    JobDoOutput(kev[i].udata, FALSE);
+		    break;
+		case EVFILT_PROC:
+		    /* Just wake up and let Job_CatchChildren() collect the
+		     * terminated job. */
+		    break;
+		}
+	    }
+	}
+#else
 	readfds = outputs;
 	timeout.tv_sec = SEL_SEC;
 	timeout.tv_usec = SEL_USEC;
@@ -2282,6 +2326,7 @@
 	    }
 	    Lst_Close(jobs);
 	}
+#endif /* !USE_KQUEUE */
     }
 #endif /* RMT_WILL_WATCH */
 }
@@ -2408,6 +2453,12 @@
     }
     if (signal(SIGWINCH, SIG_IGN) != SIG_IGN) {
 	(void) signal(SIGWINCH, JobPassSig);
+    }
+#endif
+
+#ifdef USE_KQUEUE
+    if ((kqfd = kqueue()) == -1) {
+	Punt("kqueue: %s", strerror(errno));
     }
 #endif
 
Index: job.h
===================================================================
RCS file: /home/ncvs/src/usr.bin/make/job.h,v
retrieving revision 1.18
diff -u -r1.18 job.h
--- job.h	26 Sep 2002 01:39:22 -0000	1.18
+++ job.h	4 Oct 2002 10:55:11 -0000
@@ -50,6 +50,7 @@
 
 #define	TMPPAT	"/tmp/makeXXXXXXXXXX"
 
+#ifndef USE_KQUEUE
 /*
  * The SEL_ constants determine the maximum amount of time spent in select
  * before coming out to see if a child has finished. SEL_SEC is the number of
@@ -57,6 +58,7 @@
  */
 #define	SEL_SEC		0
 #define	SEL_USEC	100000
+#endif /* !USE_KQUEUE */
 
 
 /*-

--NMuMz9nt05w80d4+--

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20021004134141.GA263>