Date: Tue, 11 Nov 2014 12:43:49 -0800 From: Adrian Chadd <adrian@freebsd.org> To: Gleb Smirnoff <glebius@freebsd.org> Cc: "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>, "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>, "src-committers@freebsd.org" <src-committers@freebsd.org> Subject: Re: svn commit: r274403 - in head/sys: compat/freebsd32 kern sys Message-ID: <CAJ-Vmom9sO2tEHGH2eMf=k028NzTFaQS952VMm2z%2B7PNkDYW0A@mail.gmail.com> In-Reply-To: <201411112032.sABKWlVl096129@svn.freebsd.org> References: <201411112032.sABKWlVl096129@svn.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
... wait a sec, you're removing this without even asking first? I've been using this locally to implement / benchmark zero-copy socket writes from userland, which is what it was initially intended for. It's the only sane way of implementing notifications for shared memory based sendfile. I was going to release it as part of the RSS development suite so people could write applications without us having to implement actual zero-copy versions of write/writev for sockets. Please put it back into -HEAD. Thanks, -adrian On 11 November 2014 12:32, Gleb Smirnoff <glebius@freebsd.org> wrote: > Author: glebius > Date: Tue Nov 11 20:32:46 2014 > New Revision: 274403 > URL: https://svnweb.freebsd.org/changeset/base/274403 > > Log: > Remove SF_KQUEUE code. This code was developed at Netflix, but was not > ever used. It didn't go into stable/10, neither was documented. > It might be useful, but we collectively decided to remove it, rather > leave it abandoned and unmaintained. It is removed in one single > commit, so restoring it should be easy, if anyone wants to reopen > this idea. > > Sponsored by: Netflix > > Deleted: > head/sys/sys/sf_base.h > head/sys/sys/sf_sync.h > Modified: > head/sys/compat/freebsd32/freebsd32_misc.c > head/sys/kern/kern_descrip.c > head/sys/kern/uipc_syscalls.c > head/sys/sys/file.h > head/sys/sys/socket.h > > Modified: head/sys/compat/freebsd32/freebsd32_misc.c > ============================================================================== > --- head/sys/compat/freebsd32/freebsd32_misc.c Tue Nov 11 20:05:50 2014 (r274402) > +++ head/sys/compat/freebsd32/freebsd32_misc.c Tue Nov 11 20:32:46 2014 (r274403) > @@ -83,10 +83,6 @@ __FBSDID("$FreeBSD$"); > #include <sys/msg.h> > #include <sys/sem.h> > #include <sys/shm.h> > -#include <sys/condvar.h> > -#include <sys/sf_buf.h> > -#include <sys/sf_sync.h> > -#include <sys/sf_base.h> > > #ifdef INET > #include <netinet/in.h> > @@ -1567,26 +1563,16 @@ struct sf_hdtr32 { > int trl_cnt; > }; > > -struct sf_hdtr_kq32 { > - int kq_fd; > - uint32_t kq_flags; > - uint32_t kq_udata; /* 32-bit void ptr */ > - uint32_t kq_ident; /* 32-bit uintptr_t */ > -}; > - > static int > freebsd32_do_sendfile(struct thread *td, > struct freebsd32_sendfile_args *uap, int compat) > { > struct sf_hdtr32 hdtr32; > struct sf_hdtr hdtr; > - struct sf_hdtr_kq32 hdtr_kq32; > - struct sf_hdtr_kq hdtr_kq; > struct uio *hdr_uio, *trl_uio; > struct iovec32 *iov32; > - off_t offset; > + off_t offset, sbytes; > int error; > - off_t sbytes; > > offset = PAIR32TO64(off_t, uap->offset); > if (offset < 0) > @@ -1617,31 +1603,17 @@ freebsd32_do_sendfile(struct thread *td, > if (error) > goto out; > } > - > - /* > - * If SF_KQUEUE is set, then we need to also copy in > - * the kqueue data after the normal hdtr set and set do_kqueue=1. > - */ > - if (uap->flags & SF_KQUEUE) { > - error = copyin(((char *) uap->hdtr) + sizeof(hdtr32), > - &hdtr_kq32, > - sizeof(hdtr_kq32)); > - if (error != 0) > - goto out; > - > - /* 32->64 bit fields */ > - CP(hdtr_kq32, hdtr_kq, kq_fd); > - CP(hdtr_kq32, hdtr_kq, kq_flags); > - PTRIN_CP(hdtr_kq32, hdtr_kq, kq_udata); > - CP(hdtr_kq32, hdtr_kq, kq_ident); > - } > } > > + AUDIT_ARG_FD(uap->fd); > > - /* Call sendfile */ > - /* XXX stack depth! */ > - error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat, > - offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq); > + if ((error = fget_read(td, uap->fd, > + cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) > + goto out; > + > + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, > + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); > + fdrop(fp, td); > > if (uap->sbytes != NULL) > copyout(&sbytes, uap->sbytes, sizeof(off_t)); > > Modified: head/sys/kern/kern_descrip.c > ============================================================================== > --- head/sys/kern/kern_descrip.c Tue Nov 11 20:05:50 2014 (r274402) > +++ head/sys/kern/kern_descrip.c Tue Nov 11 20:32:46 2014 (r274403) > @@ -3684,7 +3684,7 @@ badfo_chown(struct file *fp, uid_t uid, > static int > badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, > struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, > - int kflags, struct sendfile_sync *sfs, struct thread *td) > + int kflags, struct thread *td) > { > > return (EBADF); > @@ -3770,7 +3770,7 @@ invfo_chown(struct file *fp, uid_t uid, > int > invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, > struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, > - int kflags, struct sendfile_sync *sfs, struct thread *td) > + int kflags, struct thread *td) > { > > return (EINVAL); > > Modified: head/sys/kern/uipc_syscalls.c > ============================================================================== > --- head/sys/kern/uipc_syscalls.c Tue Nov 11 20:05:50 2014 (r274402) > +++ head/sys/kern/uipc_syscalls.c Tue Nov 11 20:32:46 2014 (r274403) > @@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$"); > #include <sys/protosw.h> > #include <sys/rwlock.h> > #include <sys/sf_buf.h> > -#include <sys/sf_sync.h> > -#include <sys/sf_base.h> > #include <sys/sysent.h> > #include <sys/socket.h> > #include <sys/socketvar.h> > @@ -115,10 +113,6 @@ static int getpeername1(struct thread *t > > counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; > > -static int filt_sfsync_attach(struct knote *kn); > -static void filt_sfsync_detach(struct knote *kn); > -static int filt_sfsync(struct knote *kn, long hint); > - > /* > * sendfile(2)-related variables and associated sysctls > */ > @@ -128,28 +122,6 @@ static int sfreadahead = 1; > SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, > &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); > > -#ifdef SFSYNC_DEBUG > -static int sf_sync_debug = 0; > -SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW, > - &sf_sync_debug, 0, "Output debugging during sf_sync lifecycle"); > -#define SFSYNC_DPRINTF(s, ...) \ > - do { \ > - if (sf_sync_debug) \ > - printf((s), ##__VA_ARGS__); \ > - } while (0) > -#else > -#define SFSYNC_DPRINTF(c, ...) > -#endif > - > -static uma_zone_t zone_sfsync; > - > -static struct filterops sendfile_filtops = { > - .f_isfd = 0, > - .f_attach = filt_sfsync_attach, > - .f_detach = filt_sfsync_detach, > - .f_event = filt_sfsync, > -}; > - > static void > sfstat_init(const void *unused) > { > @@ -159,19 +131,6 @@ sfstat_init(const void *unused) > } > SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); > > -static void > -sf_sync_init(const void *unused) > -{ > - > - zone_sfsync = uma_zcreate("sendfile_sync", sizeof(struct sendfile_sync), > - NULL, NULL, > - NULL, NULL, > - UMA_ALIGN_CACHE, > - 0); > - kqueue_add_filteropts(EVFILT_SENDFILE, &sendfile_filtops); > -} > -SYSINIT(sf_sync, SI_SUB_MBUF, SI_ORDER_FIRST, sf_sync_init, NULL); > - > static int > sfstat_sysctl(SYSCTL_HANDLER_ARGS) > { > @@ -1864,116 +1823,11 @@ getsockaddr(namp, uaddr, len) > return (error); > } > > -static int > -filt_sfsync_attach(struct knote *kn) > -{ > - struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_sdata; > - struct knlist *knl = &sfs->klist; > - > - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs); > - > - /* > - * Validate that we actually received this via the kernel API. > - */ > - if ((kn->kn_flags & EV_FLAG1) == 0) > - return (EPERM); > - > - kn->kn_ptr.p_v = sfs; > - kn->kn_flags &= ~EV_FLAG1; > - > - knl->kl_lock(knl->kl_lockarg); > - /* > - * If we're in the "freeing" state, > - * don't allow the add. That way we don't > - * end up racing with some other thread that > - * is trying to finish some setup. > - */ > - if (sfs->state == SF_STATE_FREEING) { > - knl->kl_unlock(knl->kl_lockarg); > - return (EINVAL); > - } > - knlist_add(&sfs->klist, kn, 1); > - knl->kl_unlock(knl->kl_lockarg); > - > - return (0); > -} > - > -/* > - * Called when a knote is being detached. > - */ > -static void > -filt_sfsync_detach(struct knote *kn) > -{ > - struct knlist *knl; > - struct sendfile_sync *sfs; > - int do_free = 0; > - > - sfs = kn->kn_ptr.p_v; > - knl = &sfs->klist; > - > - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs); > - > - knl->kl_lock(knl->kl_lockarg); > - if (!knlist_empty(knl)) > - knlist_remove(knl, kn, 1); > - > - /* > - * If the list is empty _AND_ the refcount is 0 > - * _AND_ we've finished the setup phase and now > - * we're in the running phase, we can free the > - * underlying sendfile_sync. > - * > - * But we shouldn't do it before finishing the > - * underlying divorce from the knote. > - * > - * So, we have the sfsync lock held; transition > - * it to "freeing", then unlock, then free > - * normally. > - */ > - if (knlist_empty(knl)) { > - if (sfs->state == SF_STATE_COMPLETED && sfs->count == 0) { > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, " > - "count==0, empty list: time to free!\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); > - do_free = 1; > - } > - } > - knl->kl_unlock(knl->kl_lockarg); > - > - /* > - * Only call free if we're the one who has transitioned things > - * to free. Otherwise we could race with another thread that > - * is currently tearing things down. > - */ > - if (do_free == 1) { > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p, %s:%d\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs, > - __FILE__, > - __LINE__); > - sf_sync_free(sfs); > - } > -} > - > -static int > -filt_sfsync(struct knote *kn, long hint) > -{ > - struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_ptr.p_v; > - int ret; > - > - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs); > - > - /* > - * XXX add a lock assertion here! > - */ > - ret = (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED); > - > - return (ret); > -} > +struct sendfile_sync { > + struct mtx mtx; > + struct cv cv; > + unsigned count; > +}; > > /* > * Add more references to a vm_page + sf_buf + sendfile_sync. > @@ -2022,344 +1876,13 @@ sf_ext_free(void *arg1, void *arg2) > vm_page_free(pg); > vm_page_unlock(pg); > > - if (sfs != NULL) > - sf_sync_deref(sfs); > -} > - > -/* > - * Called to remove a reference to a sf_sync object. > - * > - * This is generally done during the mbuf free path to signify > - * that one of the mbufs in the transaction has been completed. > - * > - * If we're doing SF_SYNC and the refcount is zero then we'll wake > - * up any waiters. > - * > - * IF we're doing SF_KQUEUE and the refcount is zero then we'll > - * fire off the knote. > - */ > -void > -sf_sync_deref(struct sendfile_sync *sfs) > -{ > - int do_free = 0; > - > - if (sfs == NULL) > - return; > - > - mtx_lock(&sfs->mtx); > - KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); > - sfs->count --; > - > - /* > - * Only fire off the wakeup / kqueue notification if > - * we are in the running state. > - */ > - if (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED) { > - if (sfs->flags & SF_SYNC) > + if (sfs != NULL) { > + mtx_lock(&sfs->mtx); > + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); > + if (--sfs->count == 0) > cv_signal(&sfs->cv); > - > - if (sfs->flags & SF_KQUEUE) { > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: knote!\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - KNOTE_LOCKED(&sfs->klist, 1); > - } > - > - /* > - * If we're not waiting around for a sync, > - * check if the knote list is empty. > - * If it is, we transition to free. > - * > - * XXX I think it's about time I added some state > - * or flag that says whether we're supposed to be > - * waiting around until we've done a signal. > - * > - * XXX Ie, the reason that I don't free it here > - * is because the caller will free the last reference, > - * not us. That should be codified in some flag > - * that indicates "self-free" rather than checking > - * for SF_SYNC all the time. > - */ > - if ((sfs->flags & SF_SYNC) == 0 && knlist_empty(&sfs->klist)) { > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, " > - "count==0, empty list: time to free!\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); > - do_free = 1; > - } > - > - } > - mtx_unlock(&sfs->mtx); > - > - /* > - * Attempt to do a free here. > - * > - * We do this outside of the lock because it may destroy the > - * lock in question as it frees things. We can optimise this > - * later. > - * > - * XXX yes, we should make it a requirement to hold the > - * lock across sf_sync_free(). > - */ > - if (do_free == 1) { > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - sf_sync_free(sfs); > - } > -} > - > -/* > - * Allocate a sendfile_sync state structure. > - * > - * For now this only knows about the "sleep" sync, but later it will > - * grow various other personalities. > - */ > -struct sendfile_sync * > -sf_sync_alloc(uint32_t flags) > -{ > - struct sendfile_sync *sfs; > - > - sfs = uma_zalloc(zone_sfsync, M_WAITOK | M_ZERO); > - mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); > - cv_init(&sfs->cv, "sendfile"); > - sfs->flags = flags; > - sfs->state = SF_STATE_SETUP; > - knlist_init_mtx(&sfs->klist, &sfs->mtx); > - > - SFSYNC_DPRINTF("%s: sfs=%p, flags=0x%08x\n", __func__, sfs, sfs->flags); > - > - return (sfs); > -} > - > -/* > - * Take a reference to a sfsync instance. > - * > - * This has to map 1:1 to free calls coming in via sf_ext_free(), > - * so typically this will be referenced once for each mbuf allocated. > - */ > -void > -sf_sync_ref(struct sendfile_sync *sfs) > -{ > - > - if (sfs == NULL) > - return; > - > - mtx_lock(&sfs->mtx); > - sfs->count++; > - mtx_unlock(&sfs->mtx); > -} > - > -void > -sf_sync_syscall_wait(struct sendfile_sync *sfs) > -{ > - > - if (sfs == NULL) > - return; > - > - KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!", > - __func__, > - sfs)); > - > - /* > - * If we're not requested to wait during the syscall, > - * don't bother waiting. > - */ > - if ((sfs->flags & SF_SYNC) == 0) > - goto out; > - > - /* > - * This is a bit suboptimal and confusing, so bear with me. > - * > - * Ideally sf_sync_syscall_wait() will wait until > - * all pending mbuf transmit operations are done. > - * This means that when sendfile becomes async, it'll > - * run in the background and will transition from > - * RUNNING to COMPLETED when it's finished acquiring > - * new things to send. Then, when the mbufs finish > - * sending, COMPLETED + sfs->count == 0 is enough to > - * know that no further work is being done. > - * > - * So, we will sleep on both RUNNING and COMPLETED. > - * It's up to the (in progress) async sendfile loop > - * to transition the sf_sync from RUNNING to > - * COMPLETED so the wakeup above will actually > - * do the cv_signal() call. > - */ > - if (sfs->state != SF_STATE_COMPLETED && sfs->state != SF_STATE_RUNNING) > - goto out; > - > - if (sfs->count != 0) > - cv_wait(&sfs->cv, &sfs->mtx); > - KASSERT(sfs->count == 0, ("sendfile sync still busy")); > - > -out: > - return; > -} > - > -/* > - * Free an sf_sync if it's appropriate to. > - */ > -void > -sf_sync_free(struct sendfile_sync *sfs) > -{ > - > - if (sfs == NULL) > - return; > - > - SFSYNC_DPRINTF("%s: (%lld) sfs=%p; called; state=%d, flags=0x%08x " > - "count=%d\n", > - __func__, > - (long long) curthread->td_tid, > - sfs, > - sfs->state, > - sfs->flags, > - sfs->count); > - > - mtx_lock(&sfs->mtx); > - > - /* > - * We keep the sf_sync around if the state is active, > - * we are doing kqueue notification and we have active > - * knotes. > - * > - * If the caller wants to free us right this second it > - * should transition this to the freeing state. > - * > - * So, complain loudly if they break this rule. > - */ > - if (sfs->state != SF_STATE_FREEING) { > - printf("%s: (%llu) sfs=%p; not freeing; let's wait!\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > mtx_unlock(&sfs->mtx); > - return; > } > - > - KASSERT(sfs->count == 0, ("sendfile sync still busy")); > - cv_destroy(&sfs->cv); > - /* > - * This doesn't call knlist_detach() on each knote; it just frees > - * the entire list. > - */ > - knlist_delete(&sfs->klist, curthread, 1); > - mtx_destroy(&sfs->mtx); > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; freeing\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - uma_zfree(zone_sfsync, sfs); > -} > - > -/* > - * Setup a sf_sync to post a kqueue notification when things are complete. > - */ > -int > -sf_sync_kqueue_setup(struct sendfile_sync *sfs, struct sf_hdtr_kq *sfkq) > -{ > - struct kevent kev; > - int error; > - > - sfs->flags |= SF_KQUEUE; > - > - /* Check the flags are valid */ > - if ((sfkq->kq_flags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) > - return (EINVAL); > - > - SFSYNC_DPRINTF("%s: sfs=%p: kqfd=%d, flags=0x%08x, ident=%p, udata=%p\n", > - __func__, > - sfs, > - sfkq->kq_fd, > - sfkq->kq_flags, > - (void *) sfkq->kq_ident, > - (void *) sfkq->kq_udata); > - > - /* Setup and register a knote on the given kqfd. */ > - kev.ident = (uintptr_t) sfkq->kq_ident; > - kev.filter = EVFILT_SENDFILE; > - kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | sfkq->kq_flags; > - kev.data = (intptr_t) sfs; > - kev.udata = sfkq->kq_udata; > - > - error = kqfd_register(sfkq->kq_fd, &kev, curthread, 1); > - if (error != 0) { > - SFSYNC_DPRINTF("%s: returned %d\n", __func__, error); > - } > - return (error); > -} > - > -void > -sf_sync_set_state(struct sendfile_sync *sfs, sendfile_sync_state_t state, > - int islocked) > -{ > - sendfile_sync_state_t old_state; > - > - if (! islocked) > - mtx_lock(&sfs->mtx); > - > - /* > - * Update our current state. > - */ > - old_state = sfs->state; > - sfs->state = state; > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; going from %d to %d\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs, > - old_state, > - state); > - > - /* > - * If we're transitioning from RUNNING to COMPLETED and the count is > - * zero, then post the knote. The caller may have completed the > - * send before we updated the state to COMPLETED and we need to make > - * sure this is communicated. > - */ > - if (old_state == SF_STATE_RUNNING > - && state == SF_STATE_COMPLETED > - && sfs->count == 0 > - && sfs->flags & SF_KQUEUE) { > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: triggering knote!\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - KNOTE_LOCKED(&sfs->klist, 1); > - } > - > - if (! islocked) > - mtx_unlock(&sfs->mtx); > -} > - > -/* > - * Set the retval/errno for the given transaction. > - * > - * This will eventually/ideally be used when the KNOTE is fired off > - * to signify the completion of this transaction. > - * > - * The sfsync lock should be held before entering this function. > - */ > -void > -sf_sync_set_retval(struct sendfile_sync *sfs, off_t retval, int xerrno) > -{ > - > - KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!", > - __func__, > - sfs)); > - > - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: errno=%d, retval=%jd\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs, > - xerrno, > - (intmax_t) retval); > - > - sfs->retval = retval; > - sfs->xerrno = xerrno; > } > > /* > @@ -2380,174 +1903,15 @@ sys_sendfile(struct thread *td, struct s > return (do_sendfile(td, uap, 0)); > } > > -int > -_do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags, > - int compat, off_t offset, size_t nbytes, off_t *sbytes, > - struct uio *hdr_uio, > - struct uio *trl_uio, struct sf_hdtr_kq *hdtr_kq) > -{ > - cap_rights_t rights; > - struct sendfile_sync *sfs = NULL; > - struct file *fp; > - int error; > - int do_kqueue = 0; > - int do_free = 0; > - > - AUDIT_ARG_FD(src_fd); > - > - if (hdtr_kq != NULL) > - do_kqueue = 1; > - > - /* > - * sendfile(2) can start at any offset within a file so we require > - * CAP_READ+CAP_SEEK = CAP_PREAD. > - */ > - if ((error = fget_read(td, src_fd, > - cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { > - goto out; > - } > - > - /* > - * IF SF_KQUEUE is set but we haven't copied in anything for > - * kqueue data, error out. > - */ > - if (flags & SF_KQUEUE && do_kqueue == 0) { > - SFSYNC_DPRINTF("%s: SF_KQUEUE but no KQUEUE data!\n", __func__); > - goto out; > - } > - > - /* > - * If we need to wait for completion, initialise the sfsync > - * state here. > - */ > - if (flags & (SF_SYNC | SF_KQUEUE)) > - sfs = sf_sync_alloc(flags & (SF_SYNC | SF_KQUEUE)); > - > - if (flags & SF_KQUEUE) { > - error = sf_sync_kqueue_setup(sfs, hdtr_kq); > - if (error) { > - SFSYNC_DPRINTF("%s: (%llu) error; sfs=%p\n", > - __func__, > - (unsigned long long) curthread->td_tid, > - sfs); > - sf_sync_set_state(sfs, SF_STATE_FREEING, 0); > - sf_sync_free(sfs); > - goto out; > - } > - } > - > - /* > - * Do the sendfile call. > - * > - * If this fails, it'll free the mbuf chain which will free up the > - * sendfile_sync references. > - */ > - error = fo_sendfile(fp, sock_fd, hdr_uio, trl_uio, offset, > - nbytes, sbytes, flags, compat ? SFK_COMPAT : 0, sfs, td); > - > - /* > - * If the sendfile call succeeded, transition the sf_sync state > - * to RUNNING, then COMPLETED. > - * > - * If the sendfile call failed, then the sendfile call may have > - * actually sent some data first - so we check to see whether > - * any data was sent. If some data was queued (ie, count > 0) > - * then we can't call free; we have to wait until the partial > - * transaction completes before we continue along. > - * > - * This has the side effect of firing off the knote > - * if the refcount has hit zero by the time we get here. > - */ > - if (sfs != NULL) { > - mtx_lock(&sfs->mtx); > - if (error == 0 || sfs->count > 0) { > - /* > - * When it's time to do async sendfile, the transition > - * to RUNNING signifies that we're actually actively > - * adding and completing mbufs. When the last disk > - * buffer is read (ie, when we're not doing any > - * further read IO and all subsequent stuff is mbuf > - * transmissions) we'll transition to COMPLETED > - * and when the final mbuf is freed, the completion > - * will be signaled. > - */ > - sf_sync_set_state(sfs, SF_STATE_RUNNING, 1); > - > - /* > - * Set the retval before we signal completed. > - * If we do it the other way around then transitioning to > - * COMPLETED may post the knote before you set the return > - * status! > - * > - * XXX for now, errno is always 0, as we don't post > - * knotes if sendfile failed. Maybe that'll change later. > - */ > - sf_sync_set_retval(sfs, *sbytes, error); > - > - /* > - * And now transition to completed, which will kick off > - * the knote if required. > - */ > - sf_sync_set_state(sfs, SF_STATE_COMPLETED, 1); > - } else { > - /* > - * Error isn't zero, sfs_count is zero, so we > - * won't have some other thing to wake things up. > - * Thus free. > - */ > - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); > - do_free = 1; > - } > - > - /* > - * Next - wait if appropriate. > - */ > - sf_sync_syscall_wait(sfs); > - > - /* > - * If we're not doing kqueue notifications, we can > - * transition this immediately to the freeing state. > - */ > - if ((sfs->flags & SF_KQUEUE) == 0) { > - sf_sync_set_state(sfs, SF_STATE_FREEING, 1); > - do_free = 1; > - } > - > - mtx_unlock(&sfs->mtx); > - } > - > - /* > - * If do_free is set, free here. > - * > - * If we're doing no-kqueue notification and it's just sleep notification, > - * we also do free; it's the only chance we have. > - */ > - if (sfs != NULL && do_free == 1) { > - sf_sync_free(sfs); > - } > - > - /* > - * XXX Should we wait until the send has completed before freeing the source > - * file handle? It's the previous behaviour, sure, but is it required? > - * We've wired down the page references after all. > - */ > - fdrop(fp, td); > - > -out: > - /* Return error */ > - return (error); > -} > - > - > static int > do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) > { > struct sf_hdtr hdtr; > - struct sf_hdtr_kq hdtr_kq; > struct uio *hdr_uio, *trl_uio; > - int error; > + struct file *fp; > + cap_rights_t rights; > off_t sbytes; > - int do_kqueue = 0; > + int error; > > /* > * File offset must be positive. If it goes beyond EOF > @@ -2563,38 +1927,37 @@ do_sendfile(struct thread *td, struct se > if (error != 0) > goto out; > if (hdtr.headers != NULL) { > - error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); > + error = copyinuio(hdtr.headers, hdtr.hdr_cnt, > + &hdr_uio); > if (error != 0) > goto out; > } > if (hdtr.trailers != NULL) { > - error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); > + error = copyinuio(hdtr.trailers, hdtr.trl_cnt, > + &trl_uio); > if (error != 0) > goto out; > } > + } > > - /* > - * If SF_KQUEUE is set, then we need to also copy in > - * the kqueue data after the normal hdtr set and set > - * do_kqueue=1. > - */ > - if (uap->flags & SF_KQUEUE) { > - error = copyin(((char *) uap->hdtr) + sizeof(hdtr), > - &hdtr_kq, > - sizeof(hdtr_kq)); > - if (error != 0) > - goto out; > - do_kqueue = 1; > - } > + AUDIT_ARG_FD(src_fd); > + > + /* > + * sendfile(2) can start at any offset within a file so we require > + * CAP_READ+CAP_SEEK = CAP_PREAD. > + */ > + if ((error = fget_read(td, uap->fd, > + cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { > + goto out; > } > > - /* Call sendfile */ > - error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat, > - uap->offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq); > + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, > + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); > + fdrop(fp, td); > > - if (uap->sbytes != NULL) { > + if (uap->sbytes != NULL) > copyout(&sbytes, uap->sbytes, sizeof(off_t)); > - } > + > out: > free(hdr_uio, M_IOV); > free(trl_uio, M_IOV); > @@ -2819,7 +2182,7 @@ kern_sendfile_getsock(struct thread *td, > int > vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, > struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, > - int kflags, struct sendfile_sync *sfs, struct thread *td) > + int kflags, struct thread *td) > { > struct file *sock_fp; > struct vnode *vp; > @@ -2829,6 +2192,7 @@ vn_sendfile(struct file *fp, int sockfd, > struct sf_buf *sf; > struct vm_page *pg; > struct shmfd *shmfd; > + struct sendfile_sync *sfs; > struct vattr va; > off_t off, xfsize, fsbytes, sbytes, rem, obj_size; > int error, bsize, nd, hdrlen, mnw; > @@ -2837,6 +2201,7 @@ vn_sendfile(struct file *fp, int sockfd, > obj = NULL; > so = NULL; > m = NULL; > + sfs = NULL; > fsbytes = sbytes = 0; > hdrlen = mnw = 0; > rem = nbytes; > @@ -2860,6 +2225,12 @@ vn_sendfile(struct file *fp, int sockfd, > if (flags & SF_MNOWAIT) > mnw = 1; > > + if (flags & SF_SYNC) { > + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); > + mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); > + cv_init(&sfs->cv, "sendfile"); > + } > + > #ifdef MAC > error = mac_socket_check_send(td->td_ucred, so); > if (error != 0) > @@ -3106,12 +2477,11 @@ retry_space: > loopbytes += xfsize; > off += xfsize; > > - /* > - * XXX eventually this should be a sfsync > - * method call! > - */ > - if (sfs != NULL) > - sf_sync_ref(sfs); > + if (sfs != NULL) { > + mtx_lock(&sfs->mtx); > + sfs->count++; > + mtx_unlock(&sfs->mtx); > + } > } > > if (vp != NULL) > @@ -3193,6 +2563,16 @@ out: > if (m) > m_freem(m); > > + if (sfs != NULL) { > + mtx_lock(&sfs->mtx); > + if (sfs->count != 0) > + cv_wait(&sfs->cv, &sfs->mtx); > + KASSERT(sfs->count == 0, ("sendfile sync still busy")); > + cv_destroy(&sfs->cv); > + mtx_destroy(&sfs->mtx); > + free(sfs, M_TEMP); > + } > + > if (error == ERESTART) > error = EINTR; > > > Modified: head/sys/sys/file.h > ============================================================================== > --- head/sys/sys/file.h Tue Nov 11 20:05:50 2014 (r274402) > +++ head/sys/sys/file.h Tue Nov 11 20:32:46 2014 (r274403) > @@ -90,9 +90,6 @@ foffset_get(struct file *fp) > return (foffset_lock(fp, FOF_NOLOCK)); > } > > -/* XXX pollution? */ > -struct sendfile_sync; > - > typedef int fo_rdwr_t(struct file *fp, struct uio *uio, > struct ucred *active_cred, int flags, > struct thread *td); > @@ -112,8 +109,7 @@ typedef int fo_chown_t(struct file *fp, > struct ucred *active_cred, struct thread *td); > typedef int fo_sendfile_t(struct file *fp, int sockfd, struct uio *hdr_uio, > struct uio *trl_uio, off_t offset, size_t nbytes, > - off_t *sent, int flags, int kflags, > - struct sendfile_sync *sfs, struct thread *td); > + off_t *sent, int flags, int kflags, struct thread *td); > typedef int fo_seek_t(struct file *fp, off_t offset, int whence, > struct thread *td); > typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif, > @@ -371,11 +367,11 @@ fo_chown(struct file *fp, uid_t uid, gid > static __inline int > fo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, > struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, > - int kflags, struct sendfile_sync *sfs, struct thread *td) > + int kflags, struct thread *td) > { > > return ((*fp->f_ops->fo_sendfile)(fp, sockfd, hdr_uio, trl_uio, offset, > - nbytes, sent, flags, kflags, sfs, td)); > + nbytes, sent, flags, kflags, td)); > } > > static __inline int > > Modified: head/sys/sys/socket.h > ============================================================================== > --- head/sys/sys/socket.h Tue Nov 11 20:05:50 2014 (r274402) > +++ head/sys/sys/socket.h Tue Nov 11 20:32:46 2014 (r274403) > @@ -584,27 +584,11 @@ struct sf_hdtr { > }; > > /* > - * sendfile(2) kqueue information > - */ > -struct sf_hdtr_kq { > - uintptr_t kq_ident; /* ident (from userland?) */ > - void *kq_udata; /* user data pointer */ > - uint32_t kq_flags; /* extra flags to pass in */ > > *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-Vmom9sO2tEHGH2eMf=k028NzTFaQS952VMm2z%2B7PNkDYW0A>