Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 14 Aug 1998 02:45:46 +0200 (CEST)
From:      Stefan Bethke <stb@hanse.de>
To:        freebsd-stable@FreeBSD.ORG
Cc:        freebsd-current@FreeBSD.ORG
Subject:   Re: Huge Bug in FreeBSD not fixed?
Message-ID:  <Pine.BSF.3.96.980814023617.9883A-300000@transit.hanse.de>
In-Reply-To: <1682190.3111854089@d254.promo.de>

next in thread | previous in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
[ please followup technical discussions to -current. ]

On Tue, 11 Aug 1998, Stefan Bethke wrote:

> On Die, 11. Aug 1998 13:33 Uhr +0200 Thomas Gellekum
> <tg@ihf.rwth-aachen.de> wrote:
> 
> > I have run this program five times and it finished once. The other
> > four occasions I got
> > 
> > Fatal trap 12: page fault while in kernel mode
> > fault virtual address   = 0x18
> > fault code              = supervisor write, page ot present
> > instruction pointer     = 0x8:0xf0126d21
> > stack pointer           = 0x10:0xefbffe50
> > frame pointer           = 0x10:0xefbffe74
> > code segment            = base 0x0, limit 0xfffff, type 0x1b
> >                         = DPL 0, pres 1, def32 1, gran 1
> > processor eflags        = interrupt enabled, resume, IOPL = 0
> > current process         = 395 (crashbsd)
> > interrupt mask          = 
> > kernel: type 12 trap, code=0
> > Stopped at      _sosend+0x391:  movl   $0, 0x18(%ebx)
> > 
> > After saving the core dump and recompiling a few object files with -g:
> 
> > #9  0xf01c0a37 in trap (frame={tf_es = -2147483632, tf_ds = -272695280, 
> >       tf_edi = -272630136, tf_esi = -2147483648, tf_ebp = -272630156, 
> >       tf_isp = -272630212, tf_ebx = 0, tf_edx = 2147483647, 
> >       tf_ecx = -1073277766, tf_eax = 0, tf_trapno = 12, tf_err = 2, 
> >       tf_eip = -267227871, tf_cs = 8, tf_eflags = 66198, tf_esp = 0, 
> >       tf_ss = 1}) at ../../i386/i386/trap.c:324
> > #10 0xf0126d21 in sosend (so=0xf0937f00, addr=0x0, uio=0xefbffeb0,
> > top=0x0,  control=0xf06fff00, flags=0) at ../../kern/uipc_socket.c:432
> 
> Looking at kern/uipc_socket.c:sosend(), one can easily spot the problem

> Because sosend() expects a MGET(m, M_WAIT, MT_DATA) to always succeed, it
> pagefaults while trying to manipulate the non-allocated mbuf
> (m->m_pkthdr.len  at 0+0x18).

> The solution would be either to make MGET() and MGETHRD() to always succeed
> (or sleep indefinitly), or check the result of any of those calls (as many
> callers already do).

> This in both -stable and -current.

I've made a simple patch to uipc_mbuf.c that makes sure M_GET() and
M_GETHDR() always succeed when called with M_WAIT.

The patch (attached) is against -current about 48 hrs ago.

I've done a little testing (slightly modified version of the test program,
also attached). It *seems* to work. I'd really appreciate comments from
those more knowledgable...

Cheers,
Stefan

--
Stefan Bethke
Muehlendamm 12            Phone: +49-40-256848, +49-177-3504009
D-22087 Hamburg           <stefan.bethke@hanse.de>
Hamburg, Germany          <stb@freebsd.org>

[-- Attachment #2 --]
Index: uipc_mbuf.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.37
diff -u -r1.37 uipc_mbuf.c
--- uipc_mbuf.c	1998/07/27 03:59:48	1.37
+++ uipc_mbuf.c	1998/08/14 00:34:53
@@ -250,6 +250,7 @@
 	int i, t;
 {
 	register struct mbuf *m;
+	static int have_warned = 0;
 
 	/*
 	 * Must only do the reclaim if not in an interrupt context.
@@ -264,9 +265,30 @@
 	} else {
 		if (i == M_DONTWAIT)
 			mbstat.m_drops++;
-		else
-			panic("Out of mbuf clusters");
+		else {
+			if (!have_warned) {
+				printf ("Out of mbufs -- increase MAXUSERS\n");
+				have_warned = 1;
+			}
+			/*
+			 * Because the caller passed M_WAIT, we're allowed to
+			 * tsleep().
+			 */
+			while (m == NULL) {
+				(void)tsleep((caddr_t)&mmbfree, PSOCK, "mmbfree", 100);
+				/* XXX we should do something with the return value? */
+				m_reclaim();
+				MGET(m, i, t);
+			}
+		}
 	}
+
+	/* Should another caller come by and m_reclaim() actually
+	 * free'd some mbufs, wake up the others sleeping.
+	 */
+	if (mmbfree)
+		wakeup (&mmbfree);
+
 	return (m);
 }
 
@@ -278,6 +300,7 @@
 	int i, t;
 {
 	register struct mbuf *m;
+	static int have_warned = 0;
 
 	/*
 	 * Must only do the reclaim if not in an interrupt context.
@@ -292,9 +315,30 @@
 	} else {
 		if (i == M_DONTWAIT)
 			mbstat.m_drops++;
-		else
-			panic("Out of mbuf clusters");
+		else {
+			if (!have_warned) {
+				printf ("Out of mbufs -- increase MAXUSERS\n");
+				have_warned = 1;
+			}
+			/*
+			 * Because the caller passed M_WAIT, we're allowed to
+			 * tsleep().
+			 */
+			while (m == NULL) {
+				(void)tsleep((caddr_t)&mmbfree, PSOCK, "mmbfree", 100);
+				/* XXX we should do something with the return value? */
+				m_reclaim();
+				MGET(m, i, t);
+			}
+		}
 	}
+
+	/* Should another caller come by and m_reclaim() actually
+	 * free'd some mbufs, wake up the others sleeping.
+	 */
+	if (mmbfree)
+		wakeup (&mmbfree);
+
 	return (m);
 }
 

[-- Attachment #3 --]
/* crashbsd.c
**
** THIS PROGRAM CAUSES KERNEL PANIC ON SOME SYSTEMS
**
** Usage: crashme [--harder]
**
** --harder option causes this program to leave opened file descriptors hanging
** thus increasing the probability of the crash.
**
*/

#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <sys/wait.h>

int main(int argc,char **argv) {
	int harder=0, p, nproc, h, i, socketfds[2];
	char a[10];
	struct iovec iov1={a,1};
	struct cmsghdr *cm;
	struct msghdr msg;
	char bbuffer[sizeof(struct cmsghdr)+sizeof(int)*24];

	if(argc>=2&&!strcmp(argv[1],"--harder")) harder=1;

	nproc=-1;
	for(i=0;i<60;i++) {
		if ((p=fork())==0) {
			if (socketpair(AF_UNIX,SOCK_STREAM,0,socketfds)) {
				perror("socketpair");
			} else {
				cm=(struct cmsghdr*)bbuffer;
				cm->cmsg_level=SOL_SOCKET;
				cm->cmsg_type=SCM_RIGHTS;
				cm->cmsg_len=sizeof(struct cmsghdr)+sizeof(int);
				msg.msg_name=(caddr_t)0;
				msg.msg_namelen=0;
				msg.msg_flags=0;
				msg.msg_iov=&iov1;
				msg.msg_iovlen=1;
				msg.msg_control=(caddr_t)cm;
				msg.msg_controllen=cm->cmsg_len;
				if ((p=fork()) > 0) {
					close(socketfds[0]);
					*(int*)(bbuffer+sizeof(struct cmsghdr))=open("/dev/null",O_RDONLY);
					for(i=0;i<2048;i++) {
						fprintf(stdout,"%d> ",i+1);
						while(sendmsg(socketfds[1],&msg,0)!=1) {
							if(errno!=EAGAIN) {
								perror("\nsendmsg");
							}
				 		}
				 	}
				} else {
					if (p < 0) {
						perror("fork");
						exit (0);
					}
					close(socketfds[1]);
					for(i=0;i<2048;i++) {
						*(int*)(bbuffer+sizeof(struct cmsghdr))=-1;
						fprintf(stdout,">%d ",i+1);
						cm=(struct cmsghdr*)bbuffer;
						cm->cmsg_level=SOL_SOCKET;
						cm->cmsg_type=SCM_RIGHTS;
						cm->cmsg_len=sizeof(struct cmsghdr)+sizeof(int)*24;
						msg.msg_name=(caddr_t)0;
						msg.msg_namelen=0;
						iov1.iov_len=10;
						msg.msg_iov=&iov1;
						msg.msg_iovlen=1;
						msg.msg_control=(caddr_t)cm;
						msg.msg_controllen=cm->cmsg_len;

						if(recvmsg(socketfds[0],&msg,0)!=1){
					 		perror("\nrecvmsg");
					 	} else {
					 		fprintf(stdout,"(%d) ",*(int*)(bbuffer+sizeof(struct cmsghdr)));
						 	if(!harder) {
								close(*(int*)(bbuffer+sizeof(struct cmsghdr)));
							}
						}
					}
					exit(0);
				}
				wait(&h);
			}
			exit(0);
		} else {
			if(p<0) {
				nproc=i;
				break;
			}
		}
	}
	if(nproc<0)
		nproc=60;
	for(i=0;i<nproc;i++) 
		wait(&h);
	fprintf(stderr,"\n%d processes finished\n",nproc);
	return 0;
}

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.3.96.980814023617.9883A-300000>