From owner-freebsd-stable Mon May 7 11:21:36 2001 Delivered-To: freebsd-stable@freebsd.org Received: from fw.wintelcom.net (ns1.wintelcom.net [209.1.153.20]) by hub.freebsd.org (Postfix) with ESMTP id 9522C37B42C for ; Mon, 7 May 2001 11:21:22 -0700 (PDT) (envelope-from bright@fw.wintelcom.net) Received: (from bright@localhost) by fw.wintelcom.net (8.10.0/8.10.0) id f47ILK427403; Mon, 7 May 2001 11:21:20 -0700 (PDT) Date: Mon, 7 May 2001 11:21:20 -0700 From: Alfred Perlstein To: "Nick's Lists" Cc: freebsd-stable@FreeBSD.ORG Subject: please test. Message-ID: <20010507112120.T18676@fw.wintelcom.net> References: <026b01c0d1b9$161f0320$3ba2640a@int.netzero.net> <20010507054623.Q18676@fw.wintelcom.net> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.2.5i In-Reply-To: <20010507054623.Q18676@fw.wintelcom.net>; from alfred@FreeBSD.ORG on Mon, May 07, 2001 at 05:46:24AM -0700 X-all-your-base: are belong to us. Sender: owner-freebsd-stable@FreeBSD.ORG Precedence: bulk X-Loop: FreeBSD.ORG * Alfred Perlstein [010507 05:47] wrote: > * Nick's Lists [010430 14:03] wrote: > > The box is a dual P3 with 2g ram, running Qmail which is handling inbound > > email. I'm running 4.2 Release (though I've had similar problems with 3.4 > > Stable a few months back, and never solved them either). > > > > After a 36 - 48hrs, the box will panic with "panic: pipeinit: cannot > > allocate pipe -- out of kvm -- code = 3". It doesn't seem to happen during > > periods of peak load, the last crash occurred at 3:45AM and there have been > > days when the box has made it through our peak load times with no problems. > > Here's a patch that should make pipe safe, it's untested. If it crashes > you right away then let me know, I'll have a cleaned up patch ready > later tonight. If it crashes you later then also let me know. :) > > For now give this a shot, it forces resource allocation to happen at > pipe creation and safely resizes pipes when the system wants them to > grow. k, sorry about that, I did say it was untested... :) here's one that is tested and seems to boot fine. just: cd /usr/src/sys/kern && patch < /path/to/this/file then remake, install and boot your new kernel. Index: sys_pipe.c =================================================================== RCS file: /home/ncvs/src/sys/kern/sys_pipe.c,v retrieving revision 1.60.2.6 diff -u -r1.60.2.6 sys_pipe.c --- sys_pipe.c 2001/02/26 04:23:16 1.60.2.6 +++ sys_pipe.c 2001/05/07 18:11:24 @@ -144,7 +144,8 @@ static int amountpipekva; static void pipeclose __P((struct pipe *cpipe)); -static void pipeinit __P((struct pipe *cpipe)); +static void pipe_free_kmem __P((struct pipe *cpipe)); +static int pipe_create __P((struct pipe **cpipep)); static __inline int pipelock __P((struct pipe *cpipe, int catch)); static __inline void pipeunlock __P((struct pipe *cpipe)); static __inline void pipeselwakeup __P((struct pipe *cpipe)); @@ -154,7 +155,7 @@ static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); static void pipe_clone_write_buffer __P((struct pipe *wpipe)); #endif -static void pipespace __P((struct pipe *cpipe)); +static int pipespace __P((struct pipe *cpipe, int size)); static vm_zone_t pipe_zone; @@ -170,7 +171,7 @@ int dummy; } */ *uap; { - register struct filedesc *fdp = p->p_fd; + struct filedesc *fdp = p->p_fd; struct file *rf, *wf; struct pipe *rpipe, *wpipe; int fd, error; @@ -178,11 +179,13 @@ if (pipe_zone == NULL) pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); - rpipe = zalloc( pipe_zone); - pipeinit(rpipe); + if (pipe_create(&rpipe) || pipe_create(&wpipe)) { + pipeclose(rpipe); + pipeclose(wpipe); + return (ENFILE); + } + rpipe->pipe_state |= PIPE_DIRECTOK; - wpipe = zalloc( pipe_zone); - pipeinit(wpipe); wpipe->pipe_state |= PIPE_DIRECTOK; error = falloc(p, &rf, &fd); @@ -230,61 +233,82 @@ /* * Allocate kva for pipe circular buffer, the space is pageable + * This routine will 'realloc' the size of a pipe safely, if it fails + * it will retain the old buffer. + * If it fails it will return ENOMEM. */ -static void -pipespace(cpipe) +static int +pipespace(cpipe, size) struct pipe *cpipe; + int size; { + struct vm_object *object; + caddr_t buffer; int npages, error; - npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; + npages = round_page(size)/PAGE_SIZE; /* * Create an object, I don't like the idea of paging to/from * kernel_object. * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. */ - cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); - cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); + object = vm_object_allocate(OBJT_DEFAULT, npages); + buffer = (caddr_t) vm_map_min(kernel_map); /* * Insert the object into the kernel map, and allocate kva for it. * The map entry is, by default, pageable. * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. */ - error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, - (vm_offset_t *) &cpipe->pipe_buffer.buffer, - cpipe->pipe_buffer.size, 1, + error = vm_map_find(kernel_map, object, 0, + (vm_offset_t *) &buffer, size, 1, VM_PROT_ALL, VM_PROT_ALL, 0); - if (error != KERN_SUCCESS) - panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); + if (error != KERN_SUCCESS) { + vm_object_deallocate(object); + return (ENOMEM); + } + + /* free old resources if we're resizing */ + pipe_free_kmem(cpipe); + cpipe->pipe_buffer.object = object; + cpipe->pipe_buffer.buffer = buffer; + cpipe->pipe_buffer.size = size; + cpipe->pipe_buffer.in = 0; + cpipe->pipe_buffer.out = 0; + cpipe->pipe_buffer.cnt = 0; amountpipekva += cpipe->pipe_buffer.size; + return (0); } /* * initialize and allocate VM and memory for pipe */ -static void -pipeinit(cpipe) - struct pipe *cpipe; +static int +pipe_create(cpipep) + struct pipe **cpipep; { - - cpipe->pipe_buffer.in = 0; - cpipe->pipe_buffer.out = 0; - cpipe->pipe_buffer.cnt = 0; - cpipe->pipe_buffer.size = PIPE_SIZE; + struct pipe *cpipe; + int error; - /* Buffer kva gets dynamically allocated */ - cpipe->pipe_buffer.buffer = NULL; - /* cpipe->pipe_buffer.object = invalid */ + *cpipep = zalloc(pipe_zone); + if (*cpipep == NULL) + return (ENOMEM); + cpipe = *cpipep; + + /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ + cpipe->pipe_buffer.object = NULL; +#ifndef PIPE_NODIRECT + cpipe->pipe_map.kva = NULL; +#endif + /* + * protect so pipeclose() doesn't follow a junk pointer + * if pipespace() fails. + */ cpipe->pipe_state = 0; cpipe->pipe_peer = NULL; cpipe->pipe_busy = 0; - vfs_timestamp(&cpipe->pipe_ctime); - cpipe->pipe_atime = cpipe->pipe_ctime; - cpipe->pipe_mtime = cpipe->pipe_ctime; - bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); #ifndef PIPE_NODIRECT /* @@ -296,6 +320,18 @@ cpipe->pipe_map.npages = 0; /* cpipe->pipe_map.ms[] = invalid */ #endif + + error = pipespace(cpipe, PIPE_SIZE); + if (error) { + return (error); + } + + vfs_timestamp(&cpipe->pipe_ctime); + cpipe->pipe_atime = cpipe->pipe_ctime; + cpipe->pipe_mtime = cpipe->pipe_ctime; + bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); + + return (0); } @@ -308,6 +344,7 @@ int catch; { int error; + while (cpipe->pipe_state & PIPE_LOCK) { cpipe->pipe_state |= PIPE_LWANT; if ((error = tsleep( cpipe, @@ -326,6 +363,7 @@ pipeunlock(cpipe) struct pipe *cpipe; { + cpipe->pipe_state &= ~PIPE_LOCK; if (cpipe->pipe_state & PIPE_LWANT) { cpipe->pipe_state &= ~PIPE_LWANT; @@ -337,6 +375,7 @@ pipeselwakeup(cpipe) struct pipe *cpipe; { + if (cpipe->pipe_state & PIPE_SEL) { cpipe->pipe_state &= ~PIPE_SEL; selwakeup(&cpipe->pipe_sel); @@ -355,7 +394,6 @@ struct proc *p; int flags; { - struct pipe *rpipe = (struct pipe *) fp->f_data; int error; int nread = 0; @@ -575,6 +613,7 @@ struct pipe *wpipe; { int i; + if (wpipe->pipe_map.kva) { pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); @@ -597,7 +636,7 @@ */ static void pipe_clone_write_buffer(wpipe) -struct pipe *wpipe; + struct pipe *wpipe; { int size; int pos; @@ -629,6 +668,7 @@ struct uio *uio; { int error; + retry: while (wpipe->pipe_state & PIPE_DIRECTW) { if ( wpipe->pipe_state & PIPE_WANTR) { @@ -719,7 +759,6 @@ { int error = 0; int orig_resid; - struct pipe *wpipe, *rpipe; rpipe = (struct pipe *) fp->f_data; @@ -742,47 +781,16 @@ (wpipe->pipe_buffer.size <= PIPE_SIZE) && (wpipe->pipe_buffer.cnt == 0)) { - if (wpipe->pipe_buffer.buffer) { - amountpipekva -= wpipe->pipe_buffer.size; - kmem_free(kernel_map, - (vm_offset_t)wpipe->pipe_buffer.buffer, - wpipe->pipe_buffer.size); - } - -#ifndef PIPE_NODIRECT - if (wpipe->pipe_map.kva) { - amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; - kmem_free(kernel_map, - wpipe->pipe_map.kva, - wpipe->pipe_buffer.size + PAGE_SIZE); - } -#endif - - wpipe->pipe_buffer.in = 0; - wpipe->pipe_buffer.out = 0; - wpipe->pipe_buffer.cnt = 0; - wpipe->pipe_buffer.size = BIG_PIPE_SIZE; - wpipe->pipe_buffer.buffer = NULL; - ++nbigpipe; - -#ifndef PIPE_NODIRECT - wpipe->pipe_map.cnt = 0; - wpipe->pipe_map.kva = 0; - wpipe->pipe_map.pos = 0; - wpipe->pipe_map.npages = 0; -#endif - - } - - - if( wpipe->pipe_buffer.buffer == NULL) { if ((error = pipelock(wpipe,1)) == 0) { - pipespace(wpipe); + if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) + nbigpipe++; pipeunlock(wpipe); } else { return error; } } + + KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); ++wpipe->pipe_busy; orig_resid = uio->uio_resid; @@ -1004,10 +1012,10 @@ pipe_ioctl(fp, cmd, data, p) struct file *fp; u_long cmd; - register caddr_t data; + caddr_t data; struct proc *p; { - register struct pipe *mpipe = (struct pipe *)fp->f_data; + struct pipe *mpipe = (struct pipe *)fp->f_data; switch (cmd) { @@ -1056,7 +1064,7 @@ struct ucred *cred; struct proc *p; { - register struct pipe *rpipe = (struct pipe *)fp->f_data; + struct pipe *rpipe = (struct pipe *)fp->f_data; struct pipe *wpipe; int revents = 0; @@ -1132,6 +1140,34 @@ return 0; } +static void +pipe_free_kmem(cpipe) + struct pipe *cpipe; +{ + + if (cpipe->pipe_buffer.buffer != NULL) { + if (cpipe->pipe_buffer.size > PIPE_SIZE) + --nbigpipe; + amountpipekva -= cpipe->pipe_buffer.size; + kmem_free(kernel_map, + (vm_offset_t)cpipe->pipe_buffer.buffer, + cpipe->pipe_buffer.size); + cpipe->pipe_buffer.buffer = NULL; + } +#ifndef PIPE_NODIRECT + if (cpipe->pipe_map.kva != NULL) { + amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; + kmem_free(kernel_map, + cpipe->pipe_map.kva, + cpipe->pipe_buffer.size + PAGE_SIZE); + cpipe->pipe_map.cnt = 0; + cpipe->pipe_map.kva = 0; + cpipe->pipe_map.pos = 0; + cpipe->pipe_map.npages = 0; + } +#endif +} + /* * shutdown the pipe */ @@ -1140,6 +1176,7 @@ struct pipe *cpipe; { struct pipe *ppipe; + if (cpipe) { pipeselwakeup(cpipe); @@ -1168,22 +1205,7 @@ /* * free resources */ - if (cpipe->pipe_buffer.buffer) { - if (cpipe->pipe_buffer.size > PIPE_SIZE) - --nbigpipe; - amountpipekva -= cpipe->pipe_buffer.size; - kmem_free(kernel_map, - (vm_offset_t)cpipe->pipe_buffer.buffer, - cpipe->pipe_buffer.size); - } -#ifndef PIPE_NODIRECT - if (cpipe->pipe_map.kva) { - amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; - kmem_free(kernel_map, - cpipe->pipe_map.kva, - cpipe->pipe_buffer.size + PAGE_SIZE); - } -#endif + pipe_free_kmem(cpipe); zfree(pipe_zone, cpipe); } } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-stable" in the body of the message