Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 27 Nov 2012 23:12:12 +0100
From:      Andre Oppermann <andre@freebsd.org>
To:        Alfred Perlstein <alfred@FreeBSD.org>
Cc:        svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org
Subject:   Re: svn commit: r243594 - head/sys/netinet
Message-ID:  <50B53ABC.1010304@freebsd.org>
In-Reply-To: <201211270304.qAR34PfD056691@svn.freebsd.org>
References:  <201211270304.qAR34PfD056691@svn.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On 27.11.2012 04:04, Alfred Perlstein wrote:
> Author: alfred
> Date: Tue Nov 27 03:04:24 2012
> New Revision: 243594
> URL: http://svnweb.freebsd.org/changeset/base/243594
>
> Log:
>    Auto size the tcbhashsize structure based on max sockets.
>
>    While here, also make the code that enforces power-of-two more
>    forgiving, instead of just resetting to 512, graciously round-down
>    to the next lower power of two.

The porthash (also a parameter to in_pcbinfo_init()) should not be
scaled the same as tcbhashsize.  It's unlikely that anyone is going
to have more than a thousand listen ports.

The default tcbhashsize scaling factor of maxsockets / 4 is IMHO
excessive.  If someone is running at the limit he's going to up
maxsockets to double the target load.  Dividing by 8 is a better
balance for normal and well used large memory machines.
The hashsize calculation logic is done a bit in a roundabout way.

What do think of the attached patch to fix the porthash and to simplify
the hashsize logic?

-- 
Andre

Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c  (revision 243631)
+++ netinet/tcp_subr.c  (working copy)
@@ -229,14 +229,17 @@
                     void *ip4hdr, const void *ip6hdr);

  /*
- * Target size of TCP PCB hash tables. Must be a power of two.
+ * Minimal size of TCP PCB hash tables. Must be a power of two.
   *
   * Note that this can be overridden by the kernel environment
   * variable net.inet.tcp.tcbhashsize
   */
-#ifndef TCBHASHSIZE
-#define TCBHASHSIZE    0
+#ifndef TCBMINHASHSIZE
+#define TCBMINHASHSIZE 512
  #endif
+#ifndef TCBPORTHASHSIZE
+#define TCBPORTHASHSIZE        512
+#endif

  /*
   * XXX
@@ -282,35 +285,10 @@
         return (0);
  }

-/*
- * Take a value and get the next power of 2 that doesn't overflow.
- * Used to size the tcp_inpcb hash buckets.
- */
-static int
-maketcp_hashsize(int size)
-{
-       int hashsize;
-
-       /*
-        * auto tune.
-        * get the next power of 2 higher than maxsockets.
-        */
-       hashsize = 1 << fls(size);
-       /* catch overflow, and just go one power of 2 smaller */
-       if (hashsize < size) {
-               hashsize = 1 << (fls(size) - 1);
-       }
-       return (hashsize);
-}
-
  void
  tcp_init(void)
  {
-       const char *tcbhash_tuneable;
-       int hashsize;

-       tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
-
         if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
             &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
                 printf("%s: WARNING: unable to register helper hook\n", __func__);
@@ -318,49 +296,17 @@
             &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
                 printf("%s: WARNING: unable to register helper hook\n", __func__);

-       hashsize = TCBHASHSIZE;
-       TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
-       if (hashsize == 0) {
-               /*
-                * Auto tune the hash size based on maxsockets.
-                * A perfect hash would have a 1:1 mapping
-                * (hashsize = maxsockets) however it's been
-                * suggested that O(2) average is better.
-                */
-               hashsize = maketcp_hashsize(maxsockets / 4);
-               /*
-                * Our historical default is 512,
-                * do not autotune lower than this.
-                */
-               if (hashsize < 512)
-                       hashsize = 512;
-               if (bootverbose)
-                       printf("%s: %s auto tuned to %d\n", __func__,
-                           tcbhash_tuneable, hashsize);
+       tcp_tcbhashsize = 0x1 << fls((maxsockets / 8) - 1);
+       TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &tcp_tcbhashsize);
+       if (!powerof2(tcp_tcbhashsize) ||
+           tcp_tcbhashsize < TCBMINHASHSIZE) {
+               printf("WARNING: TCB hash size not a power of 2 or too small\n");
+               tcp_tcbhashsize = TCBMINHASHSIZE;
         }
-       /*
-        * We require a hashsize to be a power of two.
-        * Previously if it was not a power of two we would just reset it
-        * back to 512, which could be a nasty surprise if you did not notice
-        * the error message.
-        * Instead what we do is clip it to the closest power of two lower
-        * than the specified hash value.
-        */
-       if (!powerof2(hashsize)) {
-               int oldhashsize = hashsize;
+       in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, tcp_tcbhashsize,
+           TCBPORTHASHSIZE, "tcp_inpcb", tcp_inpcb_init, NULL,
+           UMA_ZONE_NOFREE, IPI_HASHFIELDS_4TUPLE);

-               hashsize = maketcp_hashsize(hashsize);
-               /* prevent absurdly low value */
-               if (hashsize < 16)
-                       hashsize = 16;
-               printf("%s: WARNING: TCB hash size not a power of 2, "
-                   "clipped from %d to %d.\n", __func__, oldhashsize,
-                   hashsize);
-       }
-       in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
-           "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
-           IPI_HASHFIELDS_4TUPLE);
-
         /*
          * These have to be type stable for the benefit of the timers.
          */
@@ -393,7 +339,6 @@
                 tcp_rexmit_min = 1;
         tcp_rexmit_slop = TCPTV_CPU_VAR;
         tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
-       tcp_tcbhashsize = hashsize;

         TUNABLE_INT_FETCH("net.inet.tcp.soreceive_stream", &tcp_soreceive_stream);
         if (tcp_soreceive_stream) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?50B53ABC.1010304>