Date: Fri, 11 Jul 2014 12:48:39 -0700 From: Adrian Chadd <adrian@freebsd.org> To: Gleb Smirnoff <glebius@freebsd.org> Cc: "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>, "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>, "src-committers@freebsd.org" <src-committers@freebsd.org> Subject: Re: svn commit: r268535 - in head/sys: kern sys Message-ID: <CAJ-VmoniXzcs-BGZkqcGXtVcizEX8uSrNw07Hqw__OWhwm9Hxg@mail.gmail.com> In-Reply-To: <201407111940.s6BJepC6081249@svn.freebsd.org> References: <201407111940.s6BJepC6081249@svn.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
I'm so glad this finally made it in! Thanks! -a On 11 July 2014 12:40, Gleb Smirnoff <glebius@freebsd.org> wrote: > Author: glebius > Date: Fri Jul 11 19:40:50 2014 > New Revision: 268535 > URL: http://svnweb.freebsd.org/changeset/base/268535 > > Log: > Improve reference counting of EXT_SFBUF pages attached to mbufs. > > o Do not use UMA refcount zone. The problem with this zone is that > several refcounting words (16 on amd64) share the same cache line, > and issueing atomic(9) updates on them creates cache line contention. > Also, allocating and freeing them is extra CPU cycles. > Instead, refcount the page directly via vm_page_wire() and the sfbuf > via sf_buf_alloc(sf_buf_page(sf)) [1]. > > o Call refcounting/freeing function for EXT_SFBUF via direct function > call, instead of function pointer. This removes barrier for CPU > branch predictor. > > o Do not cleanup the mbuf to be freed in mb_free_ext(), merely to > satisfy assertion in mb_dtor_mbuf(). Remove the assertion from > mb_dtor_mbuf(). Use bcopy() instead of manual assignments to > copy m_ext in mb_dupcl(). > > [1] This has some problems for now. Using sf_buf_alloc() merely to > increase refcount is expensive, and is broken on sparc64. To be > fixed. > > Sponsored by: Netflix > Sponsored by: Nginx, Inc. > > Modified: > head/sys/kern/kern_mbuf.c > head/sys/kern/uipc_mbuf.c > head/sys/kern/uipc_syscalls.c > head/sys/sys/mbuf.h > head/sys/sys/sf_buf.h > > Modified: head/sys/kern/kern_mbuf.c > ============================================================================== > --- head/sys/kern/kern_mbuf.c Fri Jul 11 17:31:40 2014 (r268534) > +++ head/sys/kern/kern_mbuf.c Fri Jul 11 19:40:50 2014 (r268535) > @@ -449,7 +449,6 @@ mb_dtor_mbuf(void *mem, int size, void * > > if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) > m_tag_delete_chain(m, NULL); > - KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); > KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); > #ifdef INVARIANTS > trash_dtor(mem, size, arg); > > Modified: head/sys/kern/uipc_mbuf.c > ============================================================================== > --- head/sys/kern/uipc_mbuf.c Fri Jul 11 17:31:40 2014 (r268534) > +++ head/sys/kern/uipc_mbuf.c Fri Jul 11 19:40:50 2014 (r268535) > @@ -287,19 +287,31 @@ m_extadd(struct mbuf *mb, caddr_t buf, u > void > mb_free_ext(struct mbuf *m) > { > - int skipmbuf; > + int freembuf; > > - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); > - KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__)); > + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); > > /* > - * check if the header is embedded in the cluster > + * Check if the header is embedded in the cluster. > */ > - skipmbuf = (m->m_flags & M_NOFREE); > + freembuf = (m->m_flags & M_NOFREE) ? 0 : 1; > + > + switch (m->m_ext.ext_type) { > + case EXT_SFBUF: > + sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); > + break; > + default: > + KASSERT(m->m_ext.ext_cnt != NULL, > + ("%s: no refcounting pointer on %p", __func__, m)); > + /* > + * Free attached storage if this mbuf is the only > + * reference to it. > + */ > + if (*(m->m_ext.ext_cnt) != 1) { > + if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1) > + break; > + } > > - /* Free attached storage if this mbuf is the only reference to it. */ > - if (*(m->m_ext.ext_cnt) == 1 || > - atomic_fetchadd_int(m->m_ext.ext_cnt, -1) == 1) { > switch (m->m_ext.ext_type) { > case EXT_PACKET: /* The packet zone is special. */ > if (*(m->m_ext.ext_cnt) == 0) > @@ -318,7 +330,6 @@ mb_free_ext(struct mbuf *m) > case EXT_JUMBO16: > uma_zfree(zone_jumbo16, m->m_ext.ext_buf); > break; > - case EXT_SFBUF: > case EXT_NET_DRV: > case EXT_MOD_TYPE: > case EXT_DISPOSABLE: > @@ -337,23 +348,9 @@ mb_free_ext(struct mbuf *m) > ("%s: unknown ext_type", __func__)); > } > } > - if (skipmbuf) > - return; > > - /* > - * Free this mbuf back to the mbuf zone with all m_ext > - * information purged. > - */ > - m->m_ext.ext_buf = NULL; > - m->m_ext.ext_free = NULL; > - m->m_ext.ext_arg1 = NULL; > - m->m_ext.ext_arg2 = NULL; > - m->m_ext.ext_cnt = NULL; > - m->m_ext.ext_size = 0; > - m->m_ext.ext_type = 0; > - m->m_ext.ext_flags = 0; > - m->m_flags &= ~M_EXT; > - uma_zfree(zone_mbuf, m); > + if (freembuf) > + uma_zfree(zone_mbuf, m); > } > > /* > @@ -363,22 +360,24 @@ mb_free_ext(struct mbuf *m) > static void > mb_dupcl(struct mbuf *n, struct mbuf *m) > { > - KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); > - KASSERT(m->m_ext.ext_cnt != NULL, ("%s: ext_cnt not set", __func__)); > - KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); > > - if (*(m->m_ext.ext_cnt) == 1) > - *(m->m_ext.ext_cnt) += 1; > - else > - atomic_add_int(m->m_ext.ext_cnt, 1); > - n->m_ext.ext_buf = m->m_ext.ext_buf; > - n->m_ext.ext_free = m->m_ext.ext_free; > - n->m_ext.ext_arg1 = m->m_ext.ext_arg1; > - n->m_ext.ext_arg2 = m->m_ext.ext_arg2; > - n->m_ext.ext_size = m->m_ext.ext_size; > - n->m_ext.ext_cnt = m->m_ext.ext_cnt; > - n->m_ext.ext_type = m->m_ext.ext_type; > - n->m_ext.ext_flags = m->m_ext.ext_flags; > + KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); > + KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); > + > + switch (m->m_ext.ext_type) { > + case EXT_SFBUF: > + sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2); > + break; > + default: > + KASSERT(m->m_ext.ext_cnt != NULL, > + ("%s: no refcounting pointer on %p", __func__, m)); > + if (*(m->m_ext.ext_cnt) == 1) > + *(m->m_ext.ext_cnt) += 1; > + else > + atomic_add_int(m->m_ext.ext_cnt, 1); > + } > + > + bcopy(&m->m_ext, &n->m_ext, sizeof(m->m_ext)); > n->m_flags |= M_EXT; > n->m_flags |= m->m_flags & M_RDONLY; > } > > Modified: head/sys/kern/uipc_syscalls.c > ============================================================================== > --- head/sys/kern/uipc_syscalls.c Fri Jul 11 17:31:40 2014 (r268534) > +++ head/sys/kern/uipc_syscalls.c Fri Jul 11 19:40:50 2014 (r268535) > @@ -1983,32 +1983,56 @@ filt_sfsync(struct knote *kn, long hint) > return (ret); > } > > +/* > + * Add more references to a vm_page + sf_buf + sendfile_sync. > + */ > +void > +sf_ext_ref(void *arg1, void *arg2) > +{ > + struct sf_buf *sf = arg1; > + struct sendfile_sync *sfs = arg2; > + vm_page_t pg = sf_buf_page(sf); > + > + /* XXXGL: there should be sf_buf_ref() */ > + sf_buf_alloc(sf_buf_page(sf), SFB_NOWAIT); > + > + vm_page_lock(pg); > + vm_page_wire(pg); > + vm_page_unlock(pg); > + > + if (sfs != NULL) { > + mtx_lock(&sfs->mtx); > + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); > + sfs->count++; > + mtx_unlock(&sfs->mtx); > + } > +} > > /* > * Detach mapped page and release resources back to the system. > */ > void > -sf_buf_mext(struct mbuf *mb, void *addr, void *args) > +sf_ext_free(void *arg1, void *arg2) > { > - vm_page_t m; > - struct sendfile_sync *sfs; > + struct sf_buf *sf = arg1; > + struct sendfile_sync *sfs = arg2; > + vm_page_t pg = sf_buf_page(sf); > + > + sf_buf_free(sf); > > - m = sf_buf_page(args); > - sf_buf_free(args); > - vm_page_lock(m); > - vm_page_unwire(m, PQ_INACTIVE); > + vm_page_lock(pg); > + vm_page_unwire(pg, PQ_INACTIVE); > /* > * Check for the object going away on us. This can > * happen since we don't hold a reference to it. > * If so, we're responsible for freeing the page. > */ > - if (m->wire_count == 0 && m->object == NULL) > - vm_page_free(m); > - vm_page_unlock(m); > - if (addr != NULL) { > - sfs = addr; > + if (pg->wire_count == 0 && pg->object == NULL) > + vm_page_free(pg); > + vm_page_unlock(pg); > + > + if (sfs != NULL) > sf_sync_deref(sfs); > - } > } > > /* > @@ -2124,7 +2148,7 @@ sf_sync_alloc(uint32_t flags) > /* > * Take a reference to a sfsync instance. > * > - * This has to map 1:1 to free calls coming in via sf_buf_mext(), > + * This has to map 1:1 to free calls coming in via sf_ext_free(), > * so typically this will be referenced once for each mbuf allocated. > */ > void > @@ -3062,17 +3086,19 @@ retry_space: > m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); > if (m0 == NULL) { > error = (mnw ? EAGAIN : ENOBUFS); > - sf_buf_mext(NULL, NULL, sf); > - break; > - } > - if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, > - sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, > - (mnw ? M_NOWAIT : M_WAITOK)) != 0) { > - error = (mnw ? EAGAIN : ENOBUFS); > - sf_buf_mext(NULL, NULL, sf); > - m_freem(m0); > + sf_ext_free(sf, NULL); > break; > } > + /* > + * Attach EXT_SFBUF external storage. > + */ > + m0->m_ext.ext_buf = (caddr_t )sf_buf_kva(sf); > + m0->m_ext.ext_size = PAGE_SIZE; > + m0->m_ext.ext_arg1 = sf; > + m0->m_ext.ext_arg2 = sfs; > + m0->m_ext.ext_type = EXT_SFBUF; > + m0->m_ext.ext_flags = 0; > + m0->m_flags |= (M_EXT|M_RDONLY); > m0->m_data = (char *)sf_buf_kva(sf) + pgoff; > m0->m_len = xfsize; > > > Modified: head/sys/sys/mbuf.h > ============================================================================== > --- head/sys/sys/mbuf.h Fri Jul 11 17:31:40 2014 (r268534) > +++ head/sys/sys/mbuf.h Fri Jul 11 19:40:50 2014 (r268535) > @@ -374,6 +374,12 @@ struct mbuf { > "\30EXT_FLAG_EXP4" > > /* > + * External reference/free functions. > + */ > +void sf_ext_ref(void *, void *); > +void sf_ext_free(void *, void *); > + > +/* > * Flags indicating checksum, segmentation and other offload work to be > * done, or already done, by hardware or lower layers. It is split into > * separate inbound and outbound flags. > > Modified: head/sys/sys/sf_buf.h > ============================================================================== > --- head/sys/sys/sf_buf.h Fri Jul 11 17:31:40 2014 (r268534) > +++ head/sys/sys/sf_buf.h Fri Jul 11 19:40:50 2014 (r268535) > @@ -52,7 +52,6 @@ struct sfstat { /* sendfile statistic > #include <machine/sf_buf.h> > #include <sys/systm.h> > #include <sys/counter.h> > -struct mbuf; /* for sf_buf_mext() */ > > extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; > #define SFSTAT_ADD(name, val) \ > @@ -60,7 +59,4 @@ extern counter_u64_t sfstat[sizeof(struc > (val)) > #define SFSTAT_INC(name) SFSTAT_ADD(name, 1) > #endif /* _KERNEL */ > - > -void sf_buf_mext(struct mbuf *mb, void *addr, void *args); > - > #endif /* !_SYS_SF_BUF_H_ */ >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-VmoniXzcs-BGZkqcGXtVcizEX8uSrNw07Hqw__OWhwm9Hxg>