Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 8 Dec 2001 18:30:26 -0500 (EST)
From:      Mike Silbersack <silby@silby.com>
To:        <net@freebsd.org>
Cc:        David Xu <davidx@viasoft.com.cn>, Mike Barcroft <mike@FreeBSD.ORG>, Leo Bicknell <bicknell@ufp.org>
Subject:   mbuf / maxfiles / maxsockets / etc autoscaling patch
Message-ID:  <Pine.BSF.4.30.0112081756370.61906-200000@niwun.pair.com>

index | next in thread | raw e-mail

[-- Attachment #1 --]

Here's the autoscaling patch I was mumbling about earlier this week.
With this patch applied, the necessity of tuning maxusers when one
upgrades to a machine with more ram should be removed in most cases.
(This patch is only to -current, the mbuf changes will make it not apply
cleanly to -stable patch if there is sufficient demand right now.)

Here's a quick look at the size of various memory allocations with various
maxusers sizes and with the autoscaling patch:

With maxusers:

musers  mproc   mfiles  msocket callout nmbcl   nsfbuf  tcp hash size
32	532	1064	1064	1612	1024	1024	512
64	1044	2088	2088	3148	1536	1536	512
128	2068	4136	4136	6220	2560	2560	512
256	4116	8232	8232	12364	4608	4608	512

With autoscaling:

MB ram  mproc   mfiles  msocket callout nmbcl   nsfbuf  tcp hash size
32	512	4096	2048	4624	1024	1024	512
64	1024	8192	4096	9232	2048	1024	512
128	2048	16384	8192	18448	4096	2048	1024
256	4096	32768	16384	36880	8192	4096	2048
384	6144	49152	24576	55312	12288	6144	3072
512	8192	65536	32767	73744	16384	8192	4096
(Values above this start to flatten out due to #defined maximums)

Note that in general calculations are of the following form:

value = max(maxusers-derived value, autoscale-derived value);
value = loader tuned value if present

As such, under no circumstances will people suddenly see a decrease in
various parameters when they upgrade to an autoscaling kernel; only
increases.

I'm sure that there will be much commotion about what scaling factors are
correct.  To make changes to these easy, I have grouped all the mins,
scaling factors, and maxes in param.h - tweaking them is quite simple.

I included mins and maxes to make sure that autoscaling doesn't cause
problems by creating low values on small memory machines and also so that
it does not specify really high values on 2GB+ machines.  The high case is
what worries me; I have not heard much about how well maxsockets /
nmbclusters > 32767 really works.  If people running high volume systems
that actively use that many simultaneous sockets + clusters + files, I'd
be glad to bump up the maxes.

Oh, there's one more kicker thrown in; I changed maxfilesperproc to equal
9/10ths of maxfiles, and maxprocperuid to equal 9/10 maxproc; this'll help
to prevent a single process or user from forkbombing the system or running
it out of file handles with a default configuration.

Please review.

Thanks,

Mike "Silby" Silbersack

[-- Attachment #2 --]
diff -u -r sys.old/alpha/alpha/machdep.c sys/alpha/alpha/machdep.c
--- sys.old/alpha/alpha/machdep.c	Sat Dec  8 16:05:15 2001
+++ sys/alpha/alpha/machdep.c	Sat Dec  8 16:05:28 2001
@@ -556,7 +556,7 @@
 		kern_envp = bootinfo.envp;
 
 	/* Do basic tuning, hz etc */
-	init_param();
+	init_hz();
 
 	/*
 	 * Initalize the (temporary) bootstrap console interface, so
@@ -861,6 +861,9 @@
 			physmem -= (sz - nsz);
 		}
 	}
+
+	/* Init basic tunables */
+	init_param(alpha_ptob(physmem));
 
 	/*
 	 * Initialize error message buffer (at end of core).
diff -u -r sys.old/i386/i386/machdep.c sys/i386/i386/machdep.c
--- sys.old/i386/i386/machdep.c	Sat Dec  8 16:04:54 2001
+++ sys/i386/i386/machdep.c	Sat Dec  8 16:43:20 2001
@@ -1691,8 +1691,8 @@
 	else if (bootinfo.bi_envp)
 		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
 
-	/* Init basic tunables, hz etc */
-	init_param();
+	/* Init hz */
+	init_hz();
 
 	/*
 	 * make gdt memory segments, the code segment goes up to end of the
@@ -1871,6 +1871,9 @@
 	getmemsize(first);
 
 	/* now running on new page tables, configured,and u/iom is accessible */
+
+	/* Init basic tunables */
+	init_param(ptoa(Maxmem));
 
 	/* Map the message buffer. */
 	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
diff -u -r sys.old/ia64/ia64/machdep.c sys/ia64/ia64/machdep.c
--- sys.old/ia64/ia64/machdep.c	Sat Dec  8 16:04:52 2001
+++ sys/ia64/ia64/machdep.c	Sat Dec  8 16:05:28 2001
@@ -522,8 +522,8 @@
 	/* get fpswa interface */
 	fpswa_interface = (FPSWA_INTERFACE*)IA64_PHYS_TO_RR7(bootinfo.bi_fpswa);
 
-	/* Init basic tunables, including hz */
-	init_param();
+	/* Init hz */
+	init_hz();
 
 	p = getenv("kernelname");
 	if (p)
@@ -623,6 +623,9 @@
 	phys_avail[phys_avail_cnt] = 0;
 
 	Maxmem = physmem;
+
+	/* Init basic tunables */
+	init_param(ia64_ptob(physmem));
 
 	/*
 	 * Initialize error message buffer (at end of core).
diff -u -r sys.old/kern/subr_mbuf.c sys/kern/subr_mbuf.c
--- sys.old/kern/subr_mbuf.c	Sat Dec  8 16:04:51 2001
+++ sys/kern/subr_mbuf.c	Sat Dec  8 16:09:17 2001
@@ -151,15 +151,21 @@
 static void
 tunable_mbinit(void *dummy)
 {
+	int automcls, autosfbuf;
 
+	/* Calculate autoscaled values, choose if greater. */
+
+	automcls = min(MAXAUTOMCLS, max(MINAUTOMCLS, MCLSPERMB * physmemMB));
+	nmbclusters = max(automcls, NMBCLUSTERS);
+	autosfbuf = min(MAXAUTOSFBUF, max(MINAUTOSFBUF, SFBUFPERMB * physmemMB));
+	nsfbufs = max(autosfbuf, NSFBUFS);
+	
 	/*
 	 * This has to be done before VM init.
 	 */
-	nmbclusters = NMBCLUSTERS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
 	nmbufs = NMBUFS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
-	nsfbufs = NSFBUFS;
 	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
 	nmbcnt = NMBCNTS;
 	TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt);
diff -u -r sys.old/kern/subr_param.c sys/kern/subr_param.c
--- sys.old/kern/subr_param.c	Sat Dec  8 16:04:51 2001
+++ sys/kern/subr_param.c	Sat Dec  8 16:10:08 2001
@@ -90,39 +90,46 @@
  */
 struct	buf *swbuf;
 
+int physmemMB;
+
 /*
  * Boot time overrides
  */
 void
-init_param(void)
+init_param(u_int64_t membytes)
 {
+	int memsizemb;
+	int autoproc, autofiles;
+
+	physmemMB = membytes / 1048576;
 
-	/* Base parameters */
+	/* Calculate maxusers-derived values. */
 	maxusers = MAXUSERS;
 	TUNABLE_INT_FETCH("kern.maxusers", &maxusers);
-	hz = HZ;
-	TUNABLE_INT_FETCH("kern.hz", &hz);
-	tick = 1000000 / hz;
-	tickadj = howmany(30000, 60 * hz);	/* can adjust 30ms in 60s */
-
-	/* The following can be overridden after boot via sysctl */
+	nbuf = NBUF;
 	maxproc = NPROC;
-	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
 	maxfiles = MAXFILES;
-	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
-	maxprocperuid = maxproc - 1;
-	maxfilesperproc = maxfiles;
-
-	/* Cannot be changed after boot */
-	nbuf = NBUF;
-	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
 #ifdef VM_SWZONE_SIZE_MAX
 	maxswzone = VM_SWZONE_SIZE_MAX;
 #endif
-	TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
 #ifdef VM_BCACHE_SIZE_MAX
 	maxbcache = VM_BCACHE_SIZE_MAX;
 #endif
+
+	/* Calculate autoscaled values, choose them if greater than above. */
+	autoproc = min(MAXAUTOPROC, max(MINAUTOPROC, PROCPERMB * physmemMB));
+	maxproc = max(maxproc, autoproc);
+	autofiles = min(MAXAUTOFILES, max(MINAUTOFILES, FILESPERMB * physmemMB));
+	maxfiles = max(maxfiles, autofiles);
+
+	/* Allow loader-specified tuneables to take effect. */
+	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
+	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
+	maxprocperuid = (maxproc * 9) / 10;
+	maxfilesperproc = (maxfiles * 9) / 10;
+
+	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
+	TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
 	TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
 	ncallout = 16 + maxproc + maxfiles;
 	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
@@ -139,4 +146,16 @@
 	TUNABLE_QUAD_FETCH("kern.maxssiz", &maxssiz);
 	sgrowsiz = SGROWSIZ;
 	TUNABLE_QUAD_FETCH("kern.sgrowsiz", &sgrowsiz);
+}
+
+/*
+ * Set hz.  This must be called earlier in machdep.c than init_param().
+ */
+void
+init_hz(void)
+{
+	hz = HZ;
+	TUNABLE_INT_FETCH("kern.hz", &hz);
+	tick = 1000000 / hz;
+	tickadj = howmany(30000, 60 * hz);	/* can adjust 30ms in 60s */
 }
diff -u -r sys.old/kern/uipc_socket2.c sys/kern/uipc_socket2.c
--- sys.old/kern/uipc_socket2.c	Sat Dec  8 16:04:50 2001
+++ sys/kern/uipc_socket2.c	Sat Dec  8 16:08:43 2001
@@ -1026,7 +1026,12 @@
  */
 static void init_maxsockets(void *ignored)
 {
+	int autosockets, maxuserssockets;
+	
+	autosockets = physmemMB * SOCKETSPERMB;
+	autosockets = min(MAXAUTOSOCKETS, max(MINAUTOSOCKETS, autosockets));
+	maxuserssockets = 2 * (20 + (16 * maxusers)); 
+	maxsockets = max(maxuserssockets, max(autosockets, nmbclusters));
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
-	maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
diff -u -r sys.old/netinet/tcp_subr.c sys/netinet/tcp_subr.c
--- sys.old/netinet/tcp_subr.c	Sat Dec  8 16:04:42 2001
+++ sys/netinet/tcp_subr.c	Sat Dec  8 16:10:31 2001
@@ -190,6 +190,7 @@
 tcp_init()
 {
 	int hashsize = TCBHASHSIZE;
+	int autohashsize;
 	
 	tcp_ccgen = 1;
 	tcp_cleartaocache();
@@ -203,6 +204,13 @@
 
 	LIST_INIT(&tcb);
 	tcbinfo.listhead = &tcb;
+
+	/* Calculate autoscaled hash size, use if > default hash size. */
+	autohashsize = physmemMB * TCBHASHPERMB;
+	autohashsize = min(MAXAUTOTCBHASH, max(MINAUTOTCBHASH, autohashsize));
+	while (!powerof2(autohashsize))
+		autohashsize++;
+	hashsize = max(hashsize, autohashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
 	if (!powerof2(hashsize)) {
 		printf("WARNING: TCB hash size not a power of 2\n");
diff -u -r sys.old/powerpc/powerpc/machdep.c sys/powerpc/powerpc/machdep.c
--- sys.old/powerpc/powerpc/machdep.c	Sat Dec  8 16:04:39 2001
+++ sys/powerpc/powerpc/machdep.c	Sat Dec  8 16:48:30 2001
@@ -436,7 +436,8 @@
 	__asm ("mtsprg 0, %0" :: "r"(globalp));
 
 	/* Init basic tunables, hz etc */
-	init_param();
+	init_hz();
+	init_param(0); /* XXX - needs to be fed physmem for proper autoscaling */
 
 	/* setup curproc so the mutexes work */
 
diff -u -r sys.old/sparc64/sparc64/machdep.c sys/sparc64/sparc64/machdep.c
--- sys.old/sparc64/sparc64/machdep.c	Sat Dec  8 16:04:38 2001
+++ sys/sparc64/sparc64/machdep.c	Sat Dec  8 16:47:29 2001
@@ -249,10 +249,10 @@
 		end = (vm_offset_t)_end;
 	}
 
-	/*
-	 * Initialize tunables.
-	 */
-	init_param();
+	/* Init hz */
+	init_hz();
+	/* Init basic tuneables - XXX - this needs to be moved once maxmem exists here. */
+	init_param(0);
 
 #ifdef DDB
 	kdb_init();
diff -u -r sys.old/sys/param.h sys/sys/param.h
--- sys.old/sys/param.h	Sat Dec  8 16:04:37 2001
+++ sys/sys/param.h	Sat Dec  8 16:05:28 2001
@@ -230,6 +230,44 @@
 #define ctodb(db)			/* calculates pages to devblks */ \
 	((db) << (PAGE_SHIFT - DEV_BSHIFT))
 
+/*
+ * Values used in autoscaling system structures based on RAM size.
+ *
+ * Although settings are scattered across various subsystems, a
+ * common formula is followed.  Generally, there are three
+ * possible values to choose from:  The value suggested by maxusers,
+ * the value suggested by the autoscaling formula, and a manually
+ * tuned value from loader.conf.  If a manually tuned value is specified,
+ * this value will be used.  Otherwise, the maximum of the maxusers
+ * and autoscaled setting will be used.
+ *
+ */
+
+/* Max processes, files.  These are set in subr_param.c */
+#define PROCPERMB 16
+#define MINAUTOPROC 256
+#define MAXAUTOPROC 32000
+#define FILESPERMB 128
+#define MINAUTOFILES 1024
+#define MAXAUTOFILES 65536
+
+/* Max sockets.  These are set in uipc_socket2.c */
+#define SOCKETSPERMB 64
+#define MINAUTOSOCKETS 512
+#define MAXAUTOSOCKETS 32000
+
+/* Max mbuf clusters, sendfile buffers.  These are set in subr_mbuf.c */
+#define MCLSPERMB 32
+#define MINAUTOMCLS 512
+#define MAXAUTOMCLS 32000
+#define SFBUFPERMB 16
+#define MINAUTOSFBUF 1024
+#define MAXAUTOSFBUF 32000
+
+/* Number of TCP hash buckets.  These are set in tcp_subr.c */
+#define TCBHASHPERMB 8
+#define MINAUTOTCBHASH 512
+#define MAXAUTOTCBHASH 8192
 
 /*
  * Make this available for most of the kernel.  There were too many
diff -u -r sys.old/sys/systm.h sys/sys/systm.h
--- sys.old/sys/systm.h	Sat Dec  8 16:04:37 2001
+++ sys/sys/systm.h	Sat Dec  8 16:07:45 2001
@@ -60,6 +60,7 @@
 extern struct cv selwait;	/* select conditional variable */
 
 extern int physmem;		/* physical memory */
+extern int physmemMB;		/* physical memory size in megabytes */
 
 extern dev_t dumpdev;		/* dump device */
 extern long dumplo;		/* offset into dumpdev */
@@ -121,7 +122,8 @@
 
 void	cpu_boot __P((int));
 void	cpu_rootconf __P((void));
-void	init_param __P((void));
+void	init_hz __P((void));
+void	init_param __P((u_int64_t));
 void	tablefull __P((const char *));
 int	kvprintf __P((char const *, void (*)(int, void*), void *, int,
 		      _BSD_VA_LIST_)) __printflike(1, 0);
help

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.4.30.0112081756370.61906-200000>