Date: Thu, 2 Nov 2006 19:38:18 GMT From: Paolo Pisati <piso@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 109067 for review Message-ID: <200611021938.kA2JcIVx038729@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=109067 Change 109067 by piso@piso_newluxor on 2006/11/02 19:37:58 IFC@109062 Affected files ... .. //depot/projects/soc2005/libalias/sbin/ipfw/ipfw2.c#11 edit .. //depot/projects/soc2005/libalias/sys/boot/common/load_elf.c#3 integrate .. //depot/projects/soc2005/libalias/sys/boot/i386/boot2/boot2.c#3 integrate .. //depot/projects/soc2005/libalias/sys/boot/i386/libi386/elf32_freebsd.c#3 integrate .. //depot/projects/soc2005/libalias/sys/boot/pc98/boot2/boot.c#3 integrate .. //depot/projects/soc2005/libalias/sys/ddb/db_command.c#4 integrate .. //depot/projects/soc2005/libalias/sys/geom/eli/g_eli.c#6 integrate .. //depot/projects/soc2005/libalias/sys/geom/journal/g_journal.c#3 integrate .. //depot/projects/soc2005/libalias/sys/kern/uipc_mbuf.c#4 integrate .. //depot/projects/soc2005/libalias/sys/kern/uipc_socket.c#8 integrate .. //depot/projects/soc2005/libalias/sys/kern/uipc_syscalls.c#6 integrate .. //depot/projects/soc2005/libalias/sys/net/bridgestp.c#7 integrate .. //depot/projects/soc2005/libalias/sys/net/if_tap.c#4 integrate .. //depot/projects/soc2005/libalias/sys/net/if_tun.c#5 integrate .. //depot/projects/soc2005/libalias/sys/net/ppp_tty.c#2 integrate .. //depot/projects/soc2005/libalias/sys/netgraph/ng_device.c#2 integrate .. //depot/projects/soc2005/libalias/sys/sys/libkern.h#4 integrate .. //depot/projects/soc2005/libalias/sys/sys/mbuf.h#6 integrate .. //depot/projects/soc2005/libalias/sys/sys/socket.h#3 integrate Differences ... ==== //depot/projects/soc2005/libalias/sbin/ipfw/ipfw2.c#11 (text+ko) ==== @@ -526,8 +526,9 @@ if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET || optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST || - optname == IP_FW_TABLE_GETSIZE || optname == IP_FW_NAT_GET_CONFIG || - optname == IP_FW_NAT_GET_LOG) + optname == IP_FW_TABLE_GETSIZE || + optname == IP_FW_NAT_GET_CONFIG || + optname == IP_FW_NAT_GET_LOG) i = getsockopt(s, IPPROTO_IP, optname, optval, (socklen_t *)optlen); else ==== //depot/projects/soc2005/libalias/sys/boot/common/load_elf.c#3 (text+ko) ==== @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/boot/common/load_elf.c,v 1.35 2006/10/29 14:50:57 ru Exp $"); +__FBSDID("$FreeBSD: src/sys/boot/common/load_elf.c,v 1.36 2006/11/02 17:28:37 ru Exp $"); #include <sys/param.h> #include <sys/exec.h> @@ -263,7 +263,7 @@ #if __ELF_WORD_SIZE == 64 off = - (off & 0xffffffffff000000ull);/* x86_64 relocates after locore */ #else - off = - (off & 0xc0000000u); /* i386 relocates after locore */ + off = - (off & 0xff000000u); /* i386 relocates after locore */ #endif #else off = 0; /* other archs use direct mapped kernels */ ==== //depot/projects/soc2005/libalias/sys/boot/i386/boot2/boot2.c#3 (text+ko) ==== @@ -14,7 +14,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/boot/i386/boot2/boot2.c,v 1.82 2006/10/29 14:50:57 ru Exp $"); +__FBSDID("$FreeBSD: src/sys/boot/i386/boot2/boot2.c,v 1.83 2006/11/02 17:28:38 ru Exp $"); #include <sys/param.h> #include <sys/disklabel.h> @@ -334,7 +334,7 @@ return; } if (fmt == 0) { - addr = hdr.ex.a_entry & 0x3fffffff; + addr = hdr.ex.a_entry & 0xffffff; p = PTOV(addr); fs_off = PAGE_SIZE; if (xfsread(ino, p, hdr.ex.a_text)) @@ -368,7 +368,7 @@ j++; } for (i = 0; i < 2; i++) { - p = PTOV(ep[i].p_paddr & 0x3fffffff); + p = PTOV(ep[i].p_paddr & 0xffffff); fs_off = ep[i].p_offset; if (xfsread(ino, p, ep[i].p_filesz)) return; @@ -389,7 +389,7 @@ p += es[i].sh_size; } } - addr = hdr.eh.e_entry & 0x3fffffff; + addr = hdr.eh.e_entry & 0xffffff; } bootinfo.bi_esymtab = VTOP(p); bootinfo.bi_kernelname = VTOP(kname); ==== //depot/projects/soc2005/libalias/sys/boot/i386/libi386/elf32_freebsd.c#3 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/boot/i386/libi386/elf32_freebsd.c,v 1.16 2006/10/29 14:50:58 ru Exp $"); +__FBSDID("$FreeBSD: src/sys/boot/i386/libi386/elf32_freebsd.c,v 1.17 2006/11/02 17:28:38 ru Exp $"); #include <sys/param.h> #include <sys/exec.h> @@ -65,7 +65,7 @@ err = bi_load32(fp->f_args, &boothowto, &bootdev, &bootinfop, &modulep, &kernend); if (err != 0) return(err); - entry = ehdr->e_entry & 0x3fffffff; + entry = ehdr->e_entry & 0xffffff; #ifdef DEBUG printf("Start @ 0x%lx ...\n", entry); ==== //depot/projects/soc2005/libalias/sys/boot/pc98/boot2/boot.c#3 (text+ko) ==== @@ -49,7 +49,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/boot/pc98/boot2/boot.c,v 1.15 2006/10/29 14:50:58 ru Exp $"); +__FBSDID("$FreeBSD: src/sys/boot/pc98/boot2/boot.c,v 1.16 2006/11/02 17:28:38 ru Exp $"); #include "boot.h" #include <a.out.h> @@ -199,9 +199,9 @@ /* * We assume that the entry address is the same as the lowest text * address and that the kernel startup code handles relocation by - * this address rounded down to a multiple of 1G. + * this address rounded down to a multiple of 16M. */ - startaddr = head.a_entry & 0x3FFFFFFF; + startaddr = head.a_entry & 0x00FFFFFF; addr = startaddr; printf("Booting %d:%s(%d,%c)%s @ 0x%x\n" , dosdev & 0x0f ==== //depot/projects/soc2005/libalias/sys/ddb/db_command.c#4 (text+ko) ==== @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/ddb/db_command.c,v 1.71 2006/10/10 07:26:53 bde Exp $"); +__FBSDID("$FreeBSD: src/sys/ddb/db_command.c,v 1.72 2006/11/02 11:47:38 kib Exp $"); #include <sys/param.h> #include <sys/linker_set.h> @@ -690,14 +690,22 @@ { struct proc *p; struct thread *td; + jmp_buf jb; + void *prev_jb; LIST_FOREACH(p, &allproc, p_list) { - FOREACH_THREAD_IN_PROC(p, td) { - db_printf("\nTracing command %s pid %d tid %ld td %p\n", - p->p_comm, p->p_pid, (long)td->td_tid, td); - db_trace_thread(td, -1); - if (db_pager_quit) - return; + prev_jb = kdb_jmpbuf(jb); + if (setjmp(jb) == 0) { + FOREACH_THREAD_IN_PROC(p, td) { + db_printf("\nTracing command %s pid %d tid %ld td %p\n", + p->p_comm, p->p_pid, (long)td->td_tid, td); + db_trace_thread(td, -1); + if (db_pager_quit) { + kdb_jmpbuf(prev_jb); + return; + } + } } + kdb_jmpbuf(prev_jb); } } ==== //depot/projects/soc2005/libalias/sys/geom/eli/g_eli.c#6 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli.c,v 1.32 2006/11/01 16:05:06 pjd Exp $"); +__FBSDID("$FreeBSD: src/sys/geom/eli/g_eli.c,v 1.33 2006/11/02 09:01:34 pjd Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -642,7 +642,7 @@ for (i = 0; i < threads; i++) { if (g_eli_cpu_is_disabled(i)) { G_ELI_DEBUG(1, "%s: CPU %u disabled, skipping.", - bpp->name, threads); + bpp->name, i); continue; } wr = malloc(sizeof(*wr), M_ELI, M_WAITOK | M_ZERO); ==== //depot/projects/soc2005/libalias/sys/geom/journal/g_journal.c#3 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/geom/journal/g_journal.c,v 1.6 2006/11/02 00:37:39 pjd Exp $"); +__FBSDID("$FreeBSD: src/sys/geom/journal/g_journal.c,v 1.8 2006/11/02 16:24:18 pjd Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -600,11 +600,9 @@ &error); g_topology_lock(); g_access(cp, -1, 0, 0); - if (error != 0) { + if (buf == NULL) { GJ_DEBUG(1, "Cannot read metadata from %s (error=%d).", cp->provider->name, error); - if (buf != NULL) - g_free(buf); return (error); } @@ -1622,7 +1620,7 @@ } if (bp != NULL) { if (bp->bio_data == NULL) { - nbp = g_clone_bio(pbp); + nbp = g_duplicate_bio(pbp); nbp->bio_cflags = GJ_BIO_READ; nbp->bio_data = pbp->bio_data + cstart - pbp->bio_offset; @@ -1646,7 +1644,7 @@ * Its time for asking data provider. */ GJ_DEBUG(3, "READ(data): (%jd, %jd)", ostart, oend); - nbp = g_clone_bio(pbp); + nbp = g_duplicate_bio(pbp); nbp->bio_cflags = GJ_BIO_READ; nbp->bio_data = pbp->bio_data + ostart - pbp->bio_offset; nbp->bio_offset = ostart; @@ -2239,6 +2237,8 @@ struct g_consumer *cp; int error; + sc = NULL; /* gcc */ + g_topology_assert(); /* * There are two possibilities: ==== //depot/projects/soc2005/libalias/sys/kern/uipc_mbuf.c#4 (text+ko) ==== @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.168 2006/10/22 11:52:13 rwatson Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_mbuf.c,v 1.169 2006/11/02 17:37:21 andre Exp $"); #include "opt_mac.h" #include "opt_param.h" @@ -94,61 +94,61 @@ * chain. */ struct mbuf * -m_getm(struct mbuf *m, int len, int how, short type) +m_getm2(struct mbuf *m, int len, int how, short type, int flags) { - struct mbuf *mb, *top, *cur, *mtail; - int num, rem; - int i; + struct mbuf *mb, *nm = NULL, *mtail = NULL; + + KASSERT(len >= 0, ("%s: len is < 0", __func__)); + + /* Validate flags. */ + flags &= (M_PKTHDR | M_EOR); - KASSERT(len >= 0, ("m_getm(): len is < 0")); + /* Packet header mbuf must be first in chain. */ + if ((flags & M_PKTHDR) && m != NULL) + flags &= ~M_PKTHDR; - /* If m != NULL, we will append to the end of that chain. */ - if (m != NULL) - for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next); - else - mtail = NULL; + /* Loop and append maximum sized mbufs to the chain tail. */ + while (len > 0) { + if (len > MCLBYTES) + mb = m_getjcl(how, type, (flags & M_PKTHDR), + MJUMPAGESIZE); + else if (len >= MINCLSIZE) + mb = m_getcl(how, type, (flags & M_PKTHDR)); + else if (flags & M_PKTHDR) + mb = m_gethdr(how, type); + else + mb = m_get(how, type); - /* - * Calculate how many mbufs+clusters ("packets") we need and how much - * leftover there is after that and allocate the first mbuf+cluster - * if required. - */ - num = len / MCLBYTES; - rem = len % MCLBYTES; - top = cur = NULL; - if (num > 0) { - if ((top = cur = m_getcl(how, type, 0)) == NULL) - goto failed; - top->m_len = 0; - } - num--; + /* Fail the whole operation if one mbuf can't be allocated. */ + if (mb == NULL) { + if (nm != NULL) + m_freem(nm); + return (NULL); + } - for (i = 0; i < num; i++) { - mb = m_getcl(how, type, 0); - if (mb == NULL) - goto failed; - mb->m_len = 0; - cur = (cur->m_next = mb); - } - if (rem > 0) { - mb = (rem >= MINCLSIZE) ? - m_getcl(how, type, 0) : m_get(how, type); - if (mb == NULL) - goto failed; - mb->m_len = 0; - if (cur == NULL) - top = mb; + /* Book keeping. */ + len -= (mb->m_flags & M_EXT) ? mb->m_ext.ext_size : + ((mb->m_flags & M_PKTHDR) ? MHLEN : MLEN); + if (mtail != NULL) + mtail->m_next = mb; else - cur->m_next = mb; + nm = mb; + mtail = mb; + flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ } + if (flags & M_EOR) + mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ + + /* If mbuf was supplied, append new chain to the end of it. */ + if (m != NULL) { + for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) + ; + mtail->m_next = nm; + mtail->m_flags &= ~M_EOR; + } else + m = nm; - if (mtail != NULL) - mtail->m_next = top; - return top; -failed: - if (top != NULL) - m_freem(top); - return NULL; + return (m); } /* @@ -1610,55 +1610,58 @@ #endif +/* + * Copy the contents of uio into a properly sized mbuf chain. + */ struct mbuf * -m_uiotombuf(struct uio *uio, int how, int len, int align) +m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) { - struct mbuf *m_new = NULL, *m_final = NULL; - int progress = 0, error = 0, length, total; + struct mbuf *m, *mb; + int error, length, total; + int progress = 0; + /* + * len can be zero or an arbitrary large value bound by + * the total data supplied by the uio. + */ if (len > 0) total = min(uio->uio_resid, len); else total = uio->uio_resid; + + /* + * The smallest unit returned by m_getm2() is a single mbuf + * with pkthdr. We can't align past it. Align align itself. + */ + if (align) + align &= ~(sizeof(long) - 1); if (align >= MHLEN) - goto nospace; - if (total + align > MHLEN) - m_final = m_getcl(how, MT_DATA, M_PKTHDR); - else - m_final = m_gethdr(how, MT_DATA); - if (m_final == NULL) - goto nospace; - m_final->m_data += align; - m_new = m_final; - while (progress < total) { - length = total - progress; - if (length > MCLBYTES) - length = MCLBYTES; - if (m_new == NULL) { - if (length > MLEN) - m_new = m_getcl(how, MT_DATA, 0); - else - m_new = m_get(how, MT_DATA); - if (m_new == NULL) - goto nospace; + return (NULL); + + /* Give us all or nothing. */ + m = m_getm2(NULL, total + align, how, MT_DATA, flags); + if (m == NULL) + return (NULL); + m->m_data += align; + + /* Fill all mbufs with uio data and update header information. */ + for (mb = m; mb != NULL; mb = mb->m_next) { + length = min(M_TRAILINGSPACE(mb), total - progress); + + error = uiomove(mtod(mb, void *), length, uio); + if (error) { + m_freem(m); + return (NULL); } - error = uiomove(mtod(m_new, void *), length, uio); - if (error) - goto nospace; + + mb->m_len = length; progress += length; - m_new->m_len = length; - if (m_new != m_final) - m_cat(m_final, m_new); - m_new = NULL; + if (flags & M_PKTHDR) + m->m_pkthdr.len += length; } - m_fixhdr(m_final); - return (m_final); -nospace: - if (m_new) - m_free(m_new); - if (m_final) - m_freem(m_final); - return (NULL); + KASSERT(progress == total, ("%s: progress != total", __func__)); + + return (m); } /* ==== //depot/projects/soc2005/libalias/sys/kern/uipc_socket.c#8 (text+ko) ==== @@ -94,7 +94,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.284 2006/10/22 11:52:14 rwatson Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_socket.c,v 1.285 2006/11/02 17:45:28 andre Exp $"); #include "opt_inet.h" #include "opt_mac.h" @@ -813,9 +813,11 @@ #include <vm/vm.h> #include <vm/vm_page.h> #include <vm/vm_object.h> -#endif /*ZERO_COPY_SOCKETS*/ /* + * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise + * sosend_dgram() and sosend_generic() use m_uiotombuf(). + * * sosend_copyin() accepts a uio and prepares an mbuf chain holding part or * all of the data referenced by the uio. If desired, it uses zero-copy. * *space will be updated to reflect data copied in. @@ -939,6 +941,7 @@ *retmp = top; return (error); } +#endif /*ZERO_COPY_SOCKETS*/ #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK) @@ -954,7 +957,9 @@ { long space, resid; int clen = 0, error, dontroute; +#ifdef ZERO_COPY_SOCKETS int atomic = sosendallatonce(so) || top; +#endif KASSERT(so->so_type == SOCK_DGRAM, ("sodgram_send: !SOCK_DGRAM")); KASSERT(so->so_proto->pr_flags & PR_ATOMIC, @@ -1040,9 +1045,19 @@ if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { +#ifdef ZERO_COPY_SOCKETS error = sosend_copyin(uio, &top, atomic, &space, flags); if (error) goto out; +#else + top = m_uiotombuf(uio, M_WAITOK, space, max_hdr, + (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0))); + if (top == NULL) { + error = EFAULT; /* only possible error */ + goto out; + } + space -= resid - uio->uio_resid; +#endif resid = uio->uio_resid; } KASSERT(resid == 0, ("sosend_dgram: resid != 0")); @@ -1202,12 +1217,25 @@ if (flags & MSG_EOR) top->m_flags |= M_EOR; } else { +#ifdef ZERO_COPY_SOCKETS error = sosend_copyin(uio, &top, atomic, &space, flags); if (error != 0) { SOCKBUF_LOCK(&so->so_snd); goto release; } +#else + top = m_uiotombuf(uio, M_WAITOK, space, + (atomic ? max_hdr : 0), + (atomic ? M_PKTHDR : 0) | + ((flags & MSG_EOR) ? M_EOR : 0)); + if (top == NULL) { + SOCKBUF_LOCK(&so->so_snd); + error = EFAULT; /* only possible error */ + goto release; + } + space -= resid - uio->uio_resid; +#endif resid = uio->uio_resid; } if (dontroute) { ==== //depot/projects/soc2005/libalias/sys/kern/uipc_syscalls.c#6 (text+ko) ==== @@ -33,7 +33,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.239 2006/10/22 11:52:14 rwatson Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/uipc_syscalls.c,v 1.241 2006/11/02 17:37:21 andre Exp $"); #include "opt_compat.h" #include "opt_ktrace.h" @@ -1882,19 +1882,20 @@ struct vnode *vp; struct vm_object *obj = NULL; struct socket *so = NULL; - struct mbuf *m, *m_header = NULL; + struct mbuf *m = NULL; struct sf_buf *sf; struct vm_page *pg; - off_t off, xfsize, hdtr_size, sbytes = 0; - int error, headersize = 0, headersent = 0; + off_t off, xfsize, hdtr_size = 0, sbytes = 0, rem = 0; + int error, headersize = 0, headersent = 0, mnw = 0; int vfslocked; NET_LOCK_GIANT(); - hdtr_size = 0; - /* - * The descriptor must be a regular file and have a backing VM object. + * The file descriptor must be a regular file and have a + * backing VM object. + * File offset must be positive. If it goes beyond EOF + * we send only the header/trailer and no payload data. */ if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) goto done; @@ -1922,7 +1923,17 @@ error = EINVAL; goto done; } - if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0) + if (uap->offset < 0) { + error = EINVAL; + goto done; + } + + /* + * The socket must be a stream socket and connected. + * Remember if it a blocking or non-blocking socket. + */ + if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, + NULL)) != 0) goto done; so = sock_fp->f_data; if (so->so_type != SOCK_STREAM) { @@ -1933,10 +1944,13 @@ error = ENOTCONN; goto done; } - if (uap->offset < 0) { - error = EINVAL; - goto done; - } + /* + * Do not wait on memory allocations but return ENOMEM for + * caller to retry later. + * XXX: Experimental. + */ + if (uap->flags & SF_MNOWAIT) + mnw = 1; #ifdef MAC SOCK_LOCK(so); @@ -1946,290 +1960,307 @@ goto done; #endif - /* - * If specified, get the pointer to the sf_hdtr struct for - * any headers/trailers. - */ + /* If headers are specified copy them into mbufs. */ if (hdr_uio != NULL) { hdr_uio->uio_td = td; hdr_uio->uio_rw = UIO_WRITE; if (hdr_uio->uio_resid > 0) { - m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); - if (m_header == NULL) + m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), + 0, 0, 0); + if (m == NULL) { + error = mnw ? EAGAIN : ENOBUFS; goto done; - headersize = m_header->m_pkthdr.len; + } + headersize = hdr_uio->uio_resid; if (compat) sbytes += headersize; } } - /* - * Protect against multiple writers to the socket. - */ + /* Protect against multiple writers to the socket. */ SOCKBUF_LOCK(&so->so_snd); (void) sblock(&so->so_snd, M_WAITOK); SOCKBUF_UNLOCK(&so->so_snd); /* - * Loop through the pages in the file, starting with the requested + * Loop through the pages of the file, starting with the requested * offset. Get a file page (do I/O if necessary), map the file page * into an sf_buf, attach an mbuf header to the sf_buf, and queue * it on the socket. + * This is done in two loops. The inner loop turns as many pages + * as it can, up to available socket buffer space, without blocking + * into mbufs to have it bulk delivered into the socket send buffer. + * The outer loop checks the state and available space of the socket + * and takes care of the overall progress. */ - for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { - vm_pindex_t pindex; - vm_offset_t pgoff; + for (off = uap->offset; ; ) { + int loopbytes = 0; + int space = 0; + int done = 0; - pindex = OFF_TO_IDX(off); - VM_OBJECT_LOCK(obj); -retry_lookup: /* - * Calculate the amount to transfer. Not to exceed a page, - * the EOF, or the passed in nbytes. - */ - xfsize = obj->un_pager.vnp.vnp_size - off; - VM_OBJECT_UNLOCK(obj); - if (xfsize > PAGE_SIZE) - xfsize = PAGE_SIZE; - pgoff = (vm_offset_t)(off & PAGE_MASK); - if (PAGE_SIZE - pgoff < xfsize) - xfsize = PAGE_SIZE - pgoff; - if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) - xfsize = uap->nbytes - sbytes; - if (xfsize <= 0) { - if (m_header != NULL) { - m = m_header; - m_header = NULL; - SOCKBUF_LOCK(&so->so_snd); - goto retry_space; - } else - break; - } - /* - * Optimize the non-blocking case by looking at the socket space - * before going to the extra work of constituting the sf_buf. + * Check the socket state for ongoing connection, + * no errors and space in socket buffer. + * If space is low allow for the remainder of the + * file to be processed if it fits the socket buffer. + * Otherwise block in waiting for sufficient space + * to proceed, or if the socket is nonblocking, return + * to userland with EAGAIN while reporting how far + * we've come. + * We wait until the socket buffer has significant free + * space to do bulk sends. This makes good use of file + * system read ahead and allows packet segmentation + * offloading hardware to take over lots of work. If + * we were not careful here we would send off only one + * sfbuf at a time. */ SOCKBUF_LOCK(&so->so_snd); - if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { - if (so->so_snd.sb_state & SBS_CANTSENDMORE) - error = EPIPE; - else - error = EAGAIN; - sbunlock(&so->so_snd); + if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) + so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; +retry_space: + if (so->so_snd.sb_state & SBS_CANTSENDMORE) { + error = EPIPE; + SOCKBUF_UNLOCK(&so->so_snd); + goto done; + } else if (so->so_error) { + error = so->so_error; + so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto done; } - SOCKBUF_UNLOCK(&so->so_snd); - VM_OBJECT_LOCK(obj); - /* - * Attempt to look up the page. - * - * Allocate if not found - * - * Wait and loop if busy. - */ - pg = vm_page_lookup(obj, pindex); - - if (pg == NULL) { - pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | - VM_ALLOC_NORMAL | VM_ALLOC_WIRED); - if (pg == NULL) { - VM_OBJECT_UNLOCK(obj); - VM_WAIT; - VM_OBJECT_LOCK(obj); - goto retry_lookup; + space = sbspace(&so->so_snd); + if (space < rem && + (space <= 0 || + space < so->so_snd.sb_lowat)) { + if (so->so_state & SS_NBIO) { + SOCKBUF_UNLOCK(&so->so_snd); + error = EAGAIN; + goto done; } - } else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) - goto retry_lookup; - else { + /* + * sbwait drops the lock while sleeping. + * When we loop back to retry_space the + * state may have changed and we retest + * for it. + */ + error = sbwait(&so->so_snd); /* - * Wire the page so it does not get ripped out from - * under us. + * An error from sbwait usually indicates that we've + * been interrupted by a signal. If we've sent anything + * then return bytes sent, otherwise return the error. */ - vm_page_lock_queues(); - vm_page_wire(pg); - vm_page_unlock_queues(); + if (error) { + SOCKBUF_UNLOCK(&so->so_snd); + goto done; + } + goto retry_space; } + SOCKBUF_UNLOCK(&so->so_snd); /* - * If page is not valid for what we need, initiate I/O + * Loop and construct maximum sized mbuf chain to be bulk + * dumped into socket buffer. */ + while(space > loopbytes) { + vm_pindex_t pindex; + vm_offset_t pgoff; + struct mbuf *m0; - if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { - VM_OBJECT_UNLOCK(obj); - } else if (uap->flags & SF_NODISKIO) { - error = EBUSY; - } else { - int bsize, resid; - + VM_OBJECT_LOCK(obj); + /* + * Calculate the amount to transfer. + * Not to exceed a page, the EOF, + * or the passed in nbytes. + */ + pgoff = (vm_offset_t)(off & PAGE_MASK); + xfsize = omin(PAGE_SIZE - pgoff, + obj->un_pager.vnp.vnp_size - off - + sbytes - loopbytes); + if (uap->nbytes) + rem = (uap->nbytes - sbytes - loopbytes); + else + rem = obj->un_pager.vnp.vnp_size - off - + sbytes - loopbytes; + xfsize = omin(rem, xfsize); + if (xfsize <= 0) { + VM_OBJECT_UNLOCK(obj); + done = 1; /* all data sent */ + break; + } /* - * Ensure that our page is still around when the I/O - * completes. + * Don't overflow the send buffer. + * Stop here and send out what we've + * already got. */ - vm_page_io_start(pg); - VM_OBJECT_UNLOCK(obj); - + if (space < loopbytes + xfsize) { + VM_OBJECT_UNLOCK(obj); + break; + } +retry_lookup: /* - * Get the page from backing store. + * Attempt to look up the page. + * Allocate if not found or + * wait and loop if busy. */ - bsize = vp->v_mount->mnt_stat.f_iosize; - vfslocked = VFS_LOCK_GIANT(vp->v_mount); - vn_lock(vp, LK_SHARED | LK_RETRY, td); + pindex = OFF_TO_IDX(off); + pg = vm_page_lookup(obj, pindex); + if (pg == NULL) { + pg = vm_page_alloc(obj, pindex, + VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | + VM_ALLOC_WIRED); + if (pg == NULL) { + VM_OBJECT_UNLOCK(obj); + VM_WAIT; + VM_OBJECT_LOCK(obj); + goto retry_lookup; + } + } else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) + goto retry_lookup; + else { + /* + * Wire the page so it does not get + * ripped out from under us. + */ + vm_page_lock_queues(); + vm_page_wire(pg); + vm_page_unlock_queues(); + } + /* - * XXXMAC: Because we don't have fp->f_cred here, - * we pass in NOCRED. This is probably wrong, but - * is consistent with our original implementation. + * Check if page is valid for what we need, + * otherwise initiate I/O. + * If we already turned some pages into mbufs, + * send them off before we come here again and + * block. */ - error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, - trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | - IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), - td->td_ucred, NOCRED, &resid, td); - VOP_UNLOCK(vp, 0, td); - VFS_UNLOCK_GIANT(vfslocked); - VM_OBJECT_LOCK(obj); - vm_page_io_finish(pg); - if (!error) + if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) + VM_OBJECT_UNLOCK(obj); + else if (m != NULL) + error = EAGAIN; /* send what we already got */ + else if (uap->flags & SF_NODISKIO) + error = EBUSY; + else { + int bsize, resid; + + /* + * Ensure that our page is still around + * when the I/O completes. + */ + vm_page_io_start(pg); + VM_OBJECT_UNLOCK(obj); + + /* + * Get the page from backing store. + */ + bsize = vp->v_mount->mnt_stat.f_iosize; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_SHARED | LK_RETRY, td); + + /* + * XXXMAC: Because we don't have fp->f_cred + * here, we pass in NOCRED. This is probably + * wrong, but is consistent with our original + * implementation. + */ + error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, + trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | + IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), + td->td_ucred, NOCRED, &resid, td); + VOP_UNLOCK(vp, 0, td); + VFS_UNLOCK_GIANT(vfslocked); + VM_OBJECT_LOCK(obj); + vm_page_io_finish(pg); + if (!error) + VM_OBJECT_UNLOCK(obj); + mbstat.sf_iocnt++; + } + if (error) { + vm_page_lock_queues(); + vm_page_unwire(pg, 0); + /* + * See if anyone else might know about + * this page. If not and it is not valid, + * then free it. + */ + if (pg->wire_count == 0 && pg->valid == 0 && + pg->busy == 0 && !(pg->oflags & VPO_BUSY) && + pg->hold_count == 0) { + vm_page_free(pg); + } + vm_page_unlock_queues(); VM_OBJECT_UNLOCK(obj); - mbstat.sf_iocnt++; - } - - if (error) { - vm_page_lock_queues(); - vm_page_unwire(pg, 0); + if (error == EAGAIN) + error = 0; /* not a real error */ + break; + } + /* - * See if anyone else might know about this page. - * If not and it is not valid, then free it. + * Get a sendfile buf. We usually wait as long + * as necessary, but this wait can be interrupted. */ - if (pg->wire_count == 0 && pg->valid == 0 && - pg->busy == 0 && !(pg->oflags & VPO_BUSY) && - pg->hold_count == 0) { >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200611021938.kA2JcIVx038729>