Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 21 May 2022 18:58:39 +0400
From:      Sergey Kandaurov <pluknet@gmail.com>
To:        Mark Johnston <markj@freebsd.org>
Cc:        src-committers@freebsd.org, dev-commits-src-all@freebsd.org,  dev-commits-src-main@freebsd.org
Subject:   Re: git: fcaa890c4469 - main - mbuf: Only allow extpg mbufs if the system has a direct map
Message-ID:  <CAE-mSOKV%2BxU%2BuCDGGwGtjecepC_yP9xeTE1kXF4U9a2%2BVKKRqw@mail.gmail.com>
In-Reply-To: <202111161852.1AGIqhxc017891@gitrepo.freebsd.org>
References:  <202111161852.1AGIqhxc017891@gitrepo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
--0000000000004481c805df86d8e0
Content-Type: text/plain; charset="UTF-8"

On Tue, 16 Nov 2021 at 22:52, Mark Johnston <markj@freebsd.org> wrote:

> The branch main has been updated by markj:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=fcaa890c4469118255d463495b4044eef484fa3e
>
> commit fcaa890c4469118255d463495b4044eef484fa3e
> Author:     Mark Johnston <markj@FreeBSD.org>
> AuthorDate: 2021-11-16 18:31:04 +0000
> Commit:     Mark Johnston <markj@FreeBSD.org>
> CommitDate: 2021-11-16 18:31:04 +0000
>
>     mbuf: Only allow extpg mbufs if the system has a direct map
>
>     Some upcoming changes will modify software checksum routines like
>     in_cksum() to operate using m_apply(), which uses the direct map to
>     access packet data for unmapped mbufs.  This approach of course does
> not
>     work on platforms without a direct map, so we have to disallow the use
>     of unmapped mbufs on such platforms.
>
>     I believe this is the right tradeoff: we only configure KTLS on amd64
>     and arm64 today (and one KTLS consumer, NFS TLS, requires a direct map
>     already), and the use of unmapped mbufs with plain sendfile is a recent
>     optimization.  If need be, m_apply() could be modified to create
>     CPU-private mappings of extpg mbuf pages as a fallback.
>
>     So, change mb_use_ext_pgs to be hard-wired to zero on systems without a
>     direct map.  Note that PMAP_HAS_DMAP is not a compile-time constant on
>     some systems, so the default value of mb_use_ext_pgs has to be
>     determined during boot.
>
>     Reviewed by:    jhb
>     Discussed with: gallatin
>     MFC after:      2 weeks
>     Sponsored by:   The FreeBSD Foundation
>     Differential Revision:  https://reviews.freebsd.org/D32940
> ---
>  sys/kern/kern_mbuf.c             | 32 ++++++++++++++++++++++++++++++--
>  sys/rpc/rpcsec_tls/rpctls_impl.c |  2 +-
>  2 files changed, 31 insertions(+), 3 deletions(-)
>
> diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
> index d1f2fd2bd9e4..78a270189a4b 100644
> --- a/sys/kern/kern_mbuf.c
> +++ b/sys/kern/kern_mbuf.c
> @@ -116,9 +116,26 @@ int nmbjumbop;                     /* limits number
> of page size jumbo clusters */
>  int nmbjumbo9;                 /* limits number of 9k jumbo clusters */
>  int nmbjumbo16;                        /* limits number of 16k jumbo
> clusters */
>
> -bool mb_use_ext_pgs = true;    /* use M_EXTPG mbufs for sendfile & TLS */
> -SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,
> +bool mb_use_ext_pgs = false;   /* use M_EXTPG mbufs for sendfile & TLS */
>

Hi,

Does it mean that mb_use_ext_pgs has to be enabled manually now
in head and releng/13.1 ? (it was on by default in releng/13.0)
I failed to see how it still can be on by default from this change.

What about initializing to true under #if PMAP_HAS_DMAP ?

>
> +
> +static int
> +sysctl_mb_use_ext_pgs(SYSCTL_HANDLER_ARGS)
> +{
> +       int error, extpg;
> +
> +       extpg = mb_use_ext_pgs;
> +       error = sysctl_handle_int(oidp, &extpg, 0, req);
> +       if (error == 0 && req->newptr != NULL) {
> +               if (extpg != 0 && !PMAP_HAS_DMAP)
> +                       error = EOPNOTSUPP;
> +               else
> +                       mb_use_ext_pgs = extpg != 0;
> +       }
> +       return (error);
> +}
> +SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLTYPE_INT | CTLFLAG_RW,
>      &mb_use_ext_pgs, 0,
> +    sysctl_mb_use_ext_pgs, "IU",
>      "Use unmapped mbufs for sendfile(2) and TLS offload");
>
>  static quad_t maxmbufmem;      /* overall real memory limit for all mbufs
> */
> @@ -137,6 +154,7 @@ static void
>  tunable_mbinit(void *dummy)
>  {
>         quad_t realmem;
> +       int extpg;
>
>         /*
>          * The default limit for all mbuf related memory is 1/2 of all
> @@ -173,6 +191,16 @@ tunable_mbinit(void *dummy)
>         if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16)
>                 nmbufs = lmax(maxmbufmem / MSIZE / 5,
>                     nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16);
> +
> +       /*
> +        * Unmapped mbufs can only safely be used on platforms with a
> direct
> +        * map.
> +        */
> +       if (PMAP_HAS_DMAP) {
> +               extpg = mb_use_ext_pgs;
> +               TUNABLE_INT_FETCH("kern.ipc.mb_use_ext_pgs", &extpg);
> +               mb_use_ext_pgs = extpg != 0;
> +       }
>  }
>  SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit,
> NULL);

 [..]

--0000000000004481c805df86d8e0
Content-Type: text/html; charset="UTF-8"
Content-Transfer-Encoding: quoted-printable

<div dir=3D"ltr"><div dir=3D"ltr"><div class=3D"gmail_default" style=3D"fon=
t-family:monospace,monospace"></div></div><br><div class=3D"gmail_quote"><d=
iv dir=3D"ltr" class=3D"gmail_attr">On Tue, 16 Nov 2021 at 22:52, Mark John=
ston &lt;<a href=3D"mailto:markj@freebsd.org">markj@freebsd.org</a>&gt; wro=
te:<br></div><blockquote class=3D"gmail_quote" style=3D"margin:0px 0px 0px =
0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">The branch m=
ain has been updated by markj:<br>
<br>
URL: <a href=3D"https://cgit.FreeBSD.org/src/commit/?id=3Dfcaa890c446911825=
5d463495b4044eef484fa3e" rel=3D"noreferrer" target=3D"_blank">https://cgit.=
FreeBSD.org/src/commit/?id=3Dfcaa890c4469118255d463495b4044eef484fa3e</a><b=
r>
<br>
commit fcaa890c4469118255d463495b4044eef484fa3e<br>
Author:=C2=A0 =C2=A0 =C2=A0Mark Johnston &lt;markj@FreeBSD.org&gt;<br>
AuthorDate: 2021-11-16 18:31:04 +0000<br>
Commit:=C2=A0 =C2=A0 =C2=A0Mark Johnston &lt;markj@FreeBSD.org&gt;<br>
CommitDate: 2021-11-16 18:31:04 +0000<br>
<br>
=C2=A0 =C2=A0 mbuf: Only allow extpg mbufs if the system has a direct map<b=
r>
<br>
=C2=A0 =C2=A0 Some upcoming changes will modify software checksum routines =
like<br>
=C2=A0 =C2=A0 in_cksum() to operate using m_apply(), which uses the direct =
map to<br>
=C2=A0 =C2=A0 access packet data for unmapped mbufs.=C2=A0 This approach of=
 course does not<br>
=C2=A0 =C2=A0 work on platforms without a direct map, so we have to disallo=
w the use<br>
=C2=A0 =C2=A0 of unmapped mbufs on such platforms.<br>
<br>
=C2=A0 =C2=A0 I believe this is the right tradeoff: we only configure KTLS =
on amd64<br>
=C2=A0 =C2=A0 and arm64 today (and one KTLS consumer, NFS TLS, requires a d=
irect map<br>
=C2=A0 =C2=A0 already), and the use of unmapped mbufs with plain sendfile i=
s a recent<br>
=C2=A0 =C2=A0 optimization.=C2=A0 If need be, m_apply() could be modified t=
o create<br>
=C2=A0 =C2=A0 CPU-private mappings of extpg mbuf pages as a fallback.<br>
<br>
=C2=A0 =C2=A0 So, change mb_use_ext_pgs to be hard-wired to zero on systems=
 without a<br>
=C2=A0 =C2=A0 direct map.=C2=A0 Note that PMAP_HAS_DMAP is not a compile-ti=
me constant on<br>
=C2=A0 =C2=A0 some systems, so the default value of mb_use_ext_pgs has to b=
e<br>
=C2=A0 =C2=A0 determined during boot.<br>
<br>
=C2=A0 =C2=A0 Reviewed by:=C2=A0 =C2=A0 jhb<br>
=C2=A0 =C2=A0 Discussed with: gallatin<br>
=C2=A0 =C2=A0 MFC after:=C2=A0 =C2=A0 =C2=A0 2 weeks<br>
=C2=A0 =C2=A0 Sponsored by:=C2=A0 =C2=A0The FreeBSD Foundation<br>
=C2=A0 =C2=A0 Differential Revision:=C2=A0 <a href=3D"https://reviews.freeb=
sd.org/D32940" rel=3D"noreferrer" target=3D"_blank">https://reviews.freebsd=
.org/D32940</a><br>
---<br>
=C2=A0sys/kern/kern_mbuf.c=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0|=
 32 ++++++++++++++++++++++++++++++--<br>
=C2=A0sys/rpc/rpcsec_tls/rpctls_impl.c |=C2=A0 2 +-<br>
=C2=A02 files changed, 31 insertions(+), 3 deletions(-)<br>
<br>
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c<br>
index d1f2fd2bd9e4..78a270189a4b 100644<br>
--- a/sys/kern/kern_mbuf.c<br>
+++ b/sys/kern/kern_mbuf.c<br>
@@ -116,9 +116,26 @@ int nmbjumbop;=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0/* limits number of page size jumbo c=
lusters */<br>
=C2=A0int nmbjumbo9;=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0=
 =C2=A0/* limits number of 9k jumbo clusters */<br>
=C2=A0int nmbjumbo16;=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 /* limits number of 16k jumbo clusters */<b=
r>
<br>
-bool mb_use_ext_pgs =3D true;=C2=A0 =C2=A0 /* use M_EXTPG mbufs for sendfi=
le &amp; TLS */<br>
-SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN,<br>
+bool mb_use_ext_pgs =3D false;=C2=A0 =C2=A0/* use M_EXTPG mbufs for sendfi=
le &amp; TLS */<br></blockquote><div><br></div><div><div style=3D"font-fami=
ly:monospace,monospace" class=3D"gmail_default">Hi,<br></div></div><div><br=
></div><div>Does it mean that mb_use_ext_pgs has to be enabled manually now=
</div><div>in head and releng/13.1 ? (it was on by default in releng/13.0)<=
/div><div>I failed to see how it still can be on by default from this chang=
e.</div><br></div><div class=3D"gmail_quote">What about initializing to tru=
e under #if PMAP_HAS_DMAP ?<br>=C2=A0<blockquote class=3D"gmail_quote" styl=
e=3D"margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);paddin=
g-left:1ex">
+<br>
+static int<br>
+sysctl_mb_use_ext_pgs(SYSCTL_HANDLER_ARGS)<br>
+{<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0int error, extpg;<br>
+<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0extpg =3D mb_use_ext_pgs;<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0error =3D sysctl_handle_int(oidp, &amp;extpg, 0=
, req);<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0if (error =3D=3D 0 &amp;&amp; req-&gt;newptr !=
=3D NULL) {<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0if (extpg !=3D 0 &a=
mp;&amp; !PMAP_HAS_DMAP)<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0error =3D EOPNOTSUPP;<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0else<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=
=A0 =C2=A0mb_use_ext_pgs =3D extpg !=3D 0;<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0}<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0return (error);<br>
+}<br>
+SYSCTL_PROC(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLTYPE_INT | CTLFLAG_RW,=
<br>
=C2=A0 =C2=A0 =C2=A0&amp;mb_use_ext_pgs, 0,<br>
+=C2=A0 =C2=A0 sysctl_mb_use_ext_pgs, &quot;IU&quot;,<br>
=C2=A0 =C2=A0 =C2=A0&quot;Use unmapped mbufs for sendfile(2) and TLS offloa=
d&quot;);<br>
<br>
=C2=A0static quad_t maxmbufmem;=C2=A0 =C2=A0 =C2=A0 /* overall real memory =
limit for all mbufs */<br>
@@ -137,6 +154,7 @@ static void<br>
=C2=A0tunable_mbinit(void *dummy)<br>
=C2=A0{<br>
=C2=A0 =C2=A0 =C2=A0 =C2=A0 quad_t realmem;<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0int extpg;<br>
<br>
=C2=A0 =C2=A0 =C2=A0 =C2=A0 /*<br>
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0* The default limit for all mbuf related =
memory is 1/2 of all<br>
@@ -173,6 +191,16 @@ tunable_mbinit(void *dummy)<br>
=C2=A0 =C2=A0 =C2=A0 =C2=A0 if (nmbufs &lt; nmbclusters + nmbjumbop + nmbju=
mbo9 + nmbjumbo16)<br>
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 nmbufs =3D lmax(max=
mbufmem / MSIZE / 5,<br>
=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 nmbcl=
usters + nmbjumbop + nmbjumbo9 + nmbjumbo16);<br>
+<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0/*<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 * Unmapped mbufs can only safely be used on pl=
atforms with a direct<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 * map.<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 */<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0if (PMAP_HAS_DMAP) {<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0extpg =3D mb_use_ex=
t_pgs;<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0TUNABLE_INT_FETCH(&=
quot;kern.ipc.mb_use_ext_pgs&quot;, &amp;extpg);<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0mb_use_ext_pgs =3D =
extpg !=3D 0;<br>
+=C2=A0 =C2=A0 =C2=A0 =C2=A0}<br>
=C2=A0}<br>
=C2=A0SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit,=
 NULL);<span class=3D"gmail_default" style=3D"font-family:monospace,monospa=
ce"></span></blockquote><div>=C2=A0<span class=3D"gmail_default" style=3D"f=
ont-family:monospace,monospace">[..]</span></div><div><br>
</div></div></div>

--0000000000004481c805df86d8e0--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAE-mSOKV%2BxU%2BuCDGGwGtjecepC_yP9xeTE1kXF4U9a2%2BVKKRqw>