Date: Sun, 3 May 2020 00:37:16 +0000 (UTC) From: Gleb Smirnoff <glebius@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r360583 - in head/sys: kern netinet sys Message-ID: <202005030037.0430bGDX010355@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: glebius Date: Sun May 3 00:37:16 2020 New Revision: 360583 URL: https://svnweb.freebsd.org/changeset/base/360583 Log: Step 4.2: start divorce of M_EXT and M_EXTPG They have more differencies than similarities. For now there is lots of code that would check for M_EXT only and work correctly on M_EXTPG buffers, so still carry M_EXT bit together with M_EXTPG. However, prepare some code for explicit check for M_EXTPG. Reviewed by: gallatin Differential Revision: https://reviews.freebsd.org/D24598 Modified: head/sys/kern/kern_mbuf.c head/sys/kern/kern_sendfile.c head/sys/kern/subr_sglist.c head/sys/kern/uipc_mbuf.c head/sys/kern/uipc_sockbuf.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_pcap.c head/sys/sys/mbuf.h Modified: head/sys/kern/kern_mbuf.c ============================================================================== --- head/sys/kern/kern_mbuf.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/kern/kern_mbuf.c Sun May 3 00:37:16 2020 (r360583) @@ -115,7 +115,7 @@ int nmbjumbop; /* limits number of page size jumbo c int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ -bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile & TLS */ +bool mb_use_ext_pgs; /* use M_EXTPG mbufs for sendfile & TLS */ SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN, &mb_use_ext_pgs, 0, "Use unmapped mbufs for sendfile(2) and TLS offload"); @@ -822,7 +822,7 @@ mb_reclaim(uma_zone_t zone __unused, int pending __unu /* * Free "count" units of I/O from an mbuf chain. They could be held - * in EXT_PGS or just as a normal mbuf. This code is intended to be + * in M_EXTPG or just as a normal mbuf. This code is intended to be * called in an error path (I/O error, closed connection, etc). */ void @@ -831,8 +831,7 @@ mb_free_notready(struct mbuf *m, int count) int i; for (i = 0; i < count && m != NULL; i++) { - if ((m->m_flags & M_EXT) != 0 && - m->m_ext.ext_type == EXT_PGS) { + if ((m->m_flags & M_EXTPG) != 0) { m->m_epg_nrdy--; if (m->m_epg_nrdy != 0) continue; @@ -860,9 +859,8 @@ mb_unmapped_compress(struct mbuf *m) * a packet header, it would only be able to hold MHLEN bytes * and m_data would have to be initialized differently. */ - KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXT) && - m->m_ext.ext_type == EXT_PGS, - ("%s: m %p !M_EXT or !EXT_PGS or M_PKTHDR", __func__, m)); + KASSERT((m->m_flags & M_PKTHDR) == 0 && (m->m_flags & M_EXTPG), + ("%s: m %p !M_EXTPG or M_PKTHDR", __func__, m)); KASSERT(m->m_len <= MLEN, ("m_len too large %p", m)); if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { @@ -902,12 +900,12 @@ mb_unmapped_compress(struct mbuf *m) * unmapped data is stored in an mbuf with an EXT_SFBUF external * cluster. These mbufs use an sf_buf to provide a valid KVA for the * associated physical page. They also hold a reference on the - * original EXT_PGS mbuf to ensure the physical page doesn't go away. + * original M_EXTPG mbuf to ensure the physical page doesn't go away. * Finally, any TLS trailer data is stored in a regular mbuf. * * mb_unmapped_free_mext() is the ext_free handler for the EXT_SFBUF * mbufs. It frees the associated sf_buf and releases its reference - * on the original EXT_PGS mbuf. + * on the original M_EXTPG mbuf. * * _mb_unmapped_to_ext() is a helper function that converts a single * unmapped mbuf into a chain of mbufs. @@ -926,9 +924,9 @@ mb_unmapped_free_mext(struct mbuf *m) sf = m->m_ext.ext_arg1; sf_buf_free(sf); - /* Drop the reference on the backing EXT_PGS mbuf. */ + /* Drop the reference on the backing M_EXTPG mbuf. */ old_m = m->m_ext.ext_arg2; - mb_free_ext(old_m); + mb_free_extpg(old_m); } static struct mbuf * @@ -1109,7 +1107,7 @@ mb_unmapped_to_ext(struct mbuf *top) } /* - * Allocate an empty EXT_PGS mbuf. The ext_free routine is + * Allocate an empty M_EXTPG mbuf. The ext_free routine is * responsible for freeing any pages backing this mbuf when it is * freed. */ @@ -1133,7 +1131,6 @@ mb_alloc_ext_pgs(int how, m_ext_free_t ext_free) m->m_epg_so = NULL; m->m_data = NULL; m->m_flags |= (M_EXT | M_RDONLY | M_EXTPG); - m->m_ext.ext_type = EXT_PGS; m->m_ext.ext_flags = EXT_FLAG_EMBREF; m->m_ext.ext_count = 1; m->m_ext.ext_size = 0; @@ -1206,24 +1203,6 @@ mb_free_ext(struct mbuf *m) uma_zfree(zone_jumbo16, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; - case EXT_PGS: { -#ifdef KERN_TLS - struct ktls_session *tls; -#endif - - KASSERT(mref->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); - mref->m_ext.ext_free(mref); -#ifdef KERN_TLS - tls = mref->m_epg_tls; - if (tls != NULL && - !refcount_release_if_not_last(&tls->refcount)) - ktls_enqueue_to_free(mref); - else -#endif - uma_zfree(zone_mbuf, mref); - break; - } case EXT_SFBUF: case EXT_NET_DRV: case EXT_MOD_TYPE: @@ -1249,6 +1228,48 @@ mb_free_ext(struct mbuf *m) } if (freembuf && m != mref) + uma_zfree(zone_mbuf, m); +} + +/* + * Clean up after mbufs with M_EXTPG storage attached to them if the + * reference count hits 1. + */ +void +mb_free_extpg(struct mbuf *m) +{ + volatile u_int *refcnt; + struct mbuf *mref; + + M_ASSERTEXTPG(m); + + /* See if this is the mbuf that holds the embedded refcount. */ + if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { + refcnt = &m->m_ext.ext_count; + mref = m; + } else { + KASSERT(m->m_ext.ext_cnt != NULL, + ("%s: no refcounting pointer on %p", __func__, m)); + refcnt = m->m_ext.ext_cnt; + mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); + } + + /* Free attached storage if this mbuf is the only reference to it. */ + if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { + KASSERT(mref->m_ext.ext_free != NULL, + ("%s: ext_free not set", __func__)); + + mref->m_ext.ext_free(mref); +#ifdef KERN_TLS + if (mref->m_epg_tls != NULL && + !refcount_release_if_not_last(&mref->m_epg_tls->refcount)) + ktls_enqueue_to_free(mref); + else +#endif + uma_zfree(zone_mbuf, mref); + } + + if (m != mref) uma_zfree(zone_mbuf, m); } Modified: head/sys/kern/kern_sendfile.c ============================================================================== --- head/sys/kern/kern_sendfile.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/kern/kern_sendfile.c Sun May 3 00:37:16 2020 (r360583) @@ -192,8 +192,7 @@ sendfile_free_mext_pg(struct mbuf *m) int flags, i; bool cache_last; - KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_PGS, - ("%s: m %p !M_EXT or !EXT_PGS", __func__, m)); + M_ASSERTEXTPG(m); cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST; flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; @@ -363,8 +362,7 @@ sendfile_iodone(void *arg, vm_page_t *pa, int count, i } #if defined(KERN_TLS) && defined(INVARIANTS) - if ((sfio->m->m_flags & M_EXT) != 0 && - sfio->m->m_ext.ext_type == EXT_PGS) + if ((sfio->m->m_flags & M_EXTPG) != 0) KASSERT(sfio->tls == sfio->m->m_epg_tls, ("TLS session mismatch")); else @@ -1015,13 +1013,7 @@ retry_space: if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; - if (m0->m_ext.ext_type == - EXT_PGS) - m0->m_ext.ext_arg1 = - sfs; - else - m0->m_ext.ext_arg2 = - sfs; + m0->m_ext.ext_arg1 = sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); @@ -1096,10 +1088,6 @@ retry_space: m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; - if (m0->m_ext.ext_type == EXT_PGS) - m0->m_ext.ext_arg1 = sfs; - else - m0->m_ext.ext_arg2 = sfs; m0->m_ext.ext_arg2 = sfs; mtx_lock(&sfs->mtx); sfs->count++; Modified: head/sys/kern/subr_sglist.c ============================================================================== --- head/sys/kern/subr_sglist.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/kern/subr_sglist.c Sun May 3 00:37:16 2020 (r360583) @@ -220,7 +220,7 @@ sglist_count_vmpages(vm_page_t *m, size_t pgoff, size_ /* * Determine the number of scatter/gather list elements needed to - * describe an EXT_PGS buffer. + * describe an M_EXTPG mbuf. */ int sglist_count_mbuf_epg(struct mbuf *m, size_t off, size_t len) Modified: head/sys/kern/uipc_mbuf.c ============================================================================== --- head/sys/kern/uipc_mbuf.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/kern/uipc_mbuf.c Sun May 3 00:37:16 2020 (r360583) @@ -191,8 +191,10 @@ mb_dupcl(struct mbuf *n, struct mbuf *m) { volatile u_int *refcnt; - KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); - KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n)); + KASSERT(m->m_flags & (M_EXT|M_EXTPG), + ("%s: M_EXT|M_EXTPG not set on %p", __func__, m)); + KASSERT(!(n->m_flags & (M_EXT|M_EXTPG)), + ("%s: M_EXT|M_EXTPG set on %p", __func__, n)); /* * Cache access optimization. @@ -200,27 +202,22 @@ mb_dupcl(struct mbuf *n, struct mbuf *m) * o Regular M_EXT storage doesn't need full copy of m_ext, since * the holder of the 'ext_count' is responsible to carry the free * routine and its arguments. - * o EXT_PGS data is split between main part of mbuf and m_ext, the + * o M_EXTPG data is split between main part of mbuf and m_ext, the * main part is copied in full, the m_ext part is similar to M_EXT. * o EXT_EXTREF, where 'ext_cnt' doesn't point into mbuf at all, is * special - it needs full copy of m_ext into each mbuf, since any * copy could end up as the last to free. */ - switch (m->m_ext.ext_type) { - case EXT_PGS: + if (m->m_flags & M_EXTPG) { bcopy(&m->m_epg_startcopy, &n->m_epg_startcopy, __rangeof(struct mbuf, m_epg_startcopy, m_epg_endcopy)); bcopy(&m->m_ext, &n->m_ext, m_epg_ext_copylen); - break; - case EXT_EXTREF: + } else if (m->m_ext.ext_type == EXT_EXTREF) bcopy(&m->m_ext, &n->m_ext, sizeof(struct m_ext)); - break; - default: + else bcopy(&m->m_ext, &n->m_ext, m_ext_copylen); - } - n->m_flags |= M_EXT; - n->m_flags |= m->m_flags & (M_RDONLY | M_EXTPG); + n->m_flags |= m->m_flags & (M_RDONLY | M_EXT | M_EXTPG); /* See if this is the mbuf that holds the embedded refcount. */ if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { @@ -525,7 +522,7 @@ m_copym(struct mbuf *m, int off0, int len, int wait) copyhdr = 0; } n->m_len = min(len, m->m_len - off); - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data + off; mb_dupcl(n, m); } else @@ -567,7 +564,7 @@ m_copypacket(struct mbuf *m, int how) if (!m_dup_pkthdr(n, m, how)) goto nospace; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data; mb_dupcl(n, m); } else { @@ -585,7 +582,7 @@ m_copypacket(struct mbuf *m, int how) n = n->m_next; n->m_len = m->m_len; - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data; mb_dupcl(n, m); } else { @@ -1003,7 +1000,7 @@ m_split(struct mbuf *m0, int len0, int wait) n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; - if (m->m_flags & M_EXT) + if (m->m_flags & (M_EXT|M_EXTPG)) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ @@ -1029,7 +1026,7 @@ m_split(struct mbuf *m0, int len0, int wait) M_ALIGN(n, remain); } extpacket: - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data + len; mb_dupcl(n, m); } else { Modified: head/sys/kern/uipc_sockbuf.c ============================================================================== --- head/sys/kern/uipc_sockbuf.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/kern/uipc_sockbuf.c Sun May 3 00:37:16 2020 (r360583) @@ -148,7 +148,6 @@ sbready_compress(struct sockbuf *sb, struct mbuf *m0, /* Compress small unmapped mbufs into plain mbufs. */ if ((m->m_flags & M_EXTPG) && m->m_len <= MLEN && !mbuf_has_tls_session(m)) { - MPASS(m->m_flags & M_EXT); ext_size = m->m_ext.ext_size; if (mb_unmapped_compress(m) == 0) { sb->sb_mbcnt -= ext_size; @@ -190,8 +189,8 @@ sbready_compress(struct sockbuf *sb, struct mbuf *m0, /* * Mark ready "count" units of I/O starting with "m". Most mbufs - * count as a single unit of I/O except for EXT_PGS-backed mbufs which - * can be backed by multiple pages. + * count as a single unit of I/O except for M_EXTPG mbufs which + * are backed by multiple pages. */ int sbready(struct sockbuf *sb, struct mbuf *m0, int count) @@ -209,8 +208,7 @@ sbready(struct sockbuf *sb, struct mbuf *m0, int count while (count > 0) { KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m)); - if ((m->m_flags & M_EXT) != 0 && - m->m_ext.ext_type == EXT_PGS) { + if ((m->m_flags & M_EXTPG) != 0) { if (count < m->m_epg_nrdy) { m->m_epg_nrdy -= count; count = 0; Modified: head/sys/netinet/tcp_output.c ============================================================================== --- head/sys/netinet/tcp_output.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/netinet/tcp_output.c Sun May 3 00:37:16 2020 (r360583) @@ -2023,7 +2023,7 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *ple } n->m_len = mlen; len_cp += n->m_len; - if (m->m_flags & M_EXT) { + if (m->m_flags & (M_EXT|M_EXTPG)) { n->m_data = m->m_data + off; mb_dupcl(n, m); } else Modified: head/sys/netinet/tcp_pcap.c ============================================================================== --- head/sys/netinet/tcp_pcap.c Sun May 3 00:27:41 2020 (r360582) +++ head/sys/netinet/tcp_pcap.c Sun May 3 00:37:16 2020 (r360583) @@ -308,10 +308,13 @@ tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct * last reference, go through the normal * free-ing process. */ - if (mhead->m_flags & M_EXT) { + if (mhead->m_flags & M_EXTPG) { + /* Don't mess around with these. */ + tcp_pcap_m_freem(mhead); + continue; + } else if (mhead->m_flags & M_EXT) { switch (mhead->m_ext.ext_type) { case EXT_SFBUF: - case EXT_PGS: /* Don't mess around with these. */ tcp_pcap_m_freem(mhead); continue; @@ -339,8 +342,7 @@ tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct tcp_pcap_alloc_reuse_ext++; break; } - } - else { + } else { tcp_pcap_alloc_reuse_mbuf++; } @@ -366,7 +368,8 @@ tcp_pcap_add(struct tcphdr *th, struct mbuf *m, struct * In cases where that isn't possible, settle for what we can * get. */ - if ((m->m_flags & M_EXT) && tcp_pcap_take_cluster_reference()) { + if ((m->m_flags & (M_EXT|M_EXTPG)) && + tcp_pcap_take_cluster_reference()) { n->m_data = m->m_data; n->m_len = m->m_len; mb_dupcl(n, m); Modified: head/sys/sys/mbuf.h ============================================================================== --- head/sys/sys/mbuf.h Sun May 3 00:27:41 2020 (r360582) +++ head/sys/sys/mbuf.h Sun May 3 00:37:16 2020 (r360583) @@ -563,7 +563,6 @@ m_epg_pagelen(const struct mbuf *m, int pidx, int pgof #define EXT_PACKET 6 /* mbuf+cluster from packet zone */ #define EXT_MBUF 7 /* external mbuf reference */ #define EXT_RXRING 8 /* data in NIC receive ring */ -#define EXT_PGS 9 /* array of unmapped pages */ #define EXT_VENDOR1 224 /* for vendor-internal use */ #define EXT_VENDOR2 225 /* for vendor-internal use */ @@ -739,6 +738,7 @@ extern uma_zone_t zone_extpgs; void mb_dupcl(struct mbuf *, struct mbuf *); void mb_free_ext(struct mbuf *); +void mb_free_extpg(struct mbuf *); void mb_free_mext_pgs(struct mbuf *); struct mbuf *mb_alloc_ext_pgs(int, m_ext_free_t); int mb_unmapped_compress(struct mbuf *m); @@ -1044,7 +1044,7 @@ m_extrefcnt(struct mbuf *m) /* Check if mbuf is multipage. */ #define M_ASSERTEXTPG(m) \ - KASSERT(((m)->m_flags & (M_EXT|M_EXTPG)) == (M_EXT|M_EXTPG), \ + KASSERT(((m)->m_flags & (M_EXTPG|M_PKTHDR)) == M_EXTPG, \ ("%s: m %p is not multipage!", __func__, m)) /* @@ -1387,7 +1387,9 @@ m_free(struct mbuf *m) m_tag_delete_chain(m, NULL); if (m->m_flags & M_PKTHDR && m->m_pkthdr.csum_flags & CSUM_SND_TAG) m_snd_tag_rele(m->m_pkthdr.snd_tag); - if (m->m_flags & M_EXT) + if (m->m_flags & M_EXTPG) + mb_free_extpg(m); + else if (m->m_flags & M_EXT) mb_free_ext(m); else if ((m->m_flags & M_NOFREE) == 0) uma_zfree(zone_mbuf, m);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202005030037.0430bGDX010355>