Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 8 Dec 2001 19:59:13 -0500
From:      Bosko Milekic <bmilekic@technokratis.com>
To:        Mike Silbersack <silby@silby.com>
Cc:        net@FreeBSD.ORG, David Xu <davidx@viasoft.com.cn>, Mike Barcroft <mike@FreeBSD.ORG>, Leo Bicknell <bicknell@ufp.org>
Subject:   Re: mbuf / maxfiles / maxsockets / etc autoscaling patch
Message-ID:  <20011208195913.A55885@technokratis.com>
In-Reply-To: <Pine.BSF.4.30.0112081756370.61906-200000@niwun.pair.com>; from silby@silby.com on Sat, Dec 08, 2001 at 06:30:26PM -0500
References:  <Pine.BSF.4.30.0112081756370.61906-200000@niwun.pair.com>

next in thread | previous in thread | raw e-mail | index | archive | help

  Looks good to me. Right on!

On Sat, Dec 08, 2001 at 06:30:26PM -0500, Mike Silbersack wrote:
> 
> Here's the autoscaling patch I was mumbling about earlier this week.
> With this patch applied, the necessity of tuning maxusers when one
> upgrades to a machine with more ram should be removed in most cases.
> (This patch is only to -current, the mbuf changes will make it not apply
> cleanly to -stable patch if there is sufficient demand right now.)
> 
> Here's a quick look at the size of various memory allocations with various
> maxusers sizes and with the autoscaling patch:
> 
> With maxusers:
> 
> musers  mproc   mfiles  msocket callout nmbcl   nsfbuf  tcp hash size
> 32	532	1064	1064	1612	1024	1024	512
> 64	1044	2088	2088	3148	1536	1536	512
> 128	2068	4136	4136	6220	2560	2560	512
> 256	4116	8232	8232	12364	4608	4608	512
> 
> With autoscaling:
> 
> MB ram  mproc   mfiles  msocket callout nmbcl   nsfbuf  tcp hash size
> 32	512	4096	2048	4624	1024	1024	512
> 64	1024	8192	4096	9232	2048	1024	512
> 128	2048	16384	8192	18448	4096	2048	1024
> 256	4096	32768	16384	36880	8192	4096	2048
> 384	6144	49152	24576	55312	12288	6144	3072
> 512	8192	65536	32767	73744	16384	8192	4096
> (Values above this start to flatten out due to #defined maximums)
> 
> Note that in general calculations are of the following form:
> 
> value = max(maxusers-derived value, autoscale-derived value);
> value = loader tuned value if present
> 
> As such, under no circumstances will people suddenly see a decrease in
> various parameters when they upgrade to an autoscaling kernel; only
> increases.
> 
> I'm sure that there will be much commotion about what scaling factors are
> correct.  To make changes to these easy, I have grouped all the mins,
> scaling factors, and maxes in param.h - tweaking them is quite simple.
> 
> I included mins and maxes to make sure that autoscaling doesn't cause
> problems by creating low values on small memory machines and also so that
> it does not specify really high values on 2GB+ machines.  The high case is
> what worries me; I have not heard much about how well maxsockets /
> nmbclusters > 32767 really works.  If people running high volume systems
> that actively use that many simultaneous sockets + clusters + files, I'd
> be glad to bump up the maxes.
> 
> Oh, there's one more kicker thrown in; I changed maxfilesperproc to equal
> 9/10ths of maxfiles, and maxprocperuid to equal 9/10 maxproc; this'll help
> to prevent a single process or user from forkbombing the system or running
> it out of file handles with a default configuration.
> 
> Please review.
> 
> Thanks,
> 
> Mike "Silby" Silbersack

> diff -u -r sys.old/alpha/alpha/machdep.c sys/alpha/alpha/machdep.c
> --- sys.old/alpha/alpha/machdep.c	Sat Dec  8 16:05:15 2001
> +++ sys/alpha/alpha/machdep.c	Sat Dec  8 16:05:28 2001
> @@ -556,7 +556,7 @@
>  		kern_envp = bootinfo.envp;
>  
>  	/* Do basic tuning, hz etc */
> -	init_param();
> +	init_hz();
>  
>  	/*
>  	 * Initalize the (temporary) bootstrap console interface, so
> @@ -861,6 +861,9 @@
>  			physmem -= (sz - nsz);
>  		}
>  	}
> +
> +	/* Init basic tunables */
> +	init_param(alpha_ptob(physmem));
>  
>  	/*
>  	 * Initialize error message buffer (at end of core).
> diff -u -r sys.old/i386/i386/machdep.c sys/i386/i386/machdep.c
> --- sys.old/i386/i386/machdep.c	Sat Dec  8 16:04:54 2001
> +++ sys/i386/i386/machdep.c	Sat Dec  8 16:43:20 2001
> @@ -1691,8 +1691,8 @@
>  	else if (bootinfo.bi_envp)
>  		kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE;
>  
> -	/* Init basic tunables, hz etc */
> -	init_param();
> +	/* Init hz */
> +	init_hz();
>  
>  	/*
>  	 * make gdt memory segments, the code segment goes up to end of the
> @@ -1871,6 +1871,9 @@
>  	getmemsize(first);
>  
>  	/* now running on new page tables, configured,and u/iom is accessible */
> +
> +	/* Init basic tunables */
> +	init_param(ptoa(Maxmem));
>  
>  	/* Map the message buffer. */
>  	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
> diff -u -r sys.old/ia64/ia64/machdep.c sys/ia64/ia64/machdep.c
> --- sys.old/ia64/ia64/machdep.c	Sat Dec  8 16:04:52 2001
> +++ sys/ia64/ia64/machdep.c	Sat Dec  8 16:05:28 2001
> @@ -522,8 +522,8 @@
>  	/* get fpswa interface */
>  	fpswa_interface = (FPSWA_INTERFACE*)IA64_PHYS_TO_RR7(bootinfo.bi_fpswa);
>  
> -	/* Init basic tunables, including hz */
> -	init_param();
> +	/* Init hz */
> +	init_hz();
>  
>  	p = getenv("kernelname");
>  	if (p)
> @@ -623,6 +623,9 @@
>  	phys_avail[phys_avail_cnt] = 0;
>  
>  	Maxmem = physmem;
> +
> +	/* Init basic tunables */
> +	init_param(ia64_ptob(physmem));
>  
>  	/*
>  	 * Initialize error message buffer (at end of core).
> diff -u -r sys.old/kern/subr_mbuf.c sys/kern/subr_mbuf.c
> --- sys.old/kern/subr_mbuf.c	Sat Dec  8 16:04:51 2001
> +++ sys/kern/subr_mbuf.c	Sat Dec  8 16:09:17 2001
> @@ -151,15 +151,21 @@
>  static void
>  tunable_mbinit(void *dummy)
>  {
> +	int automcls, autosfbuf;
>  
> +	/* Calculate autoscaled values, choose if greater. */
> +
> +	automcls = min(MAXAUTOMCLS, max(MINAUTOMCLS, MCLSPERMB * physmemMB));
> +	nmbclusters = max(automcls, NMBCLUSTERS);
> +	autosfbuf = min(MAXAUTOSFBUF, max(MINAUTOSFBUF, SFBUFPERMB * physmemMB));
> +	nsfbufs = max(autosfbuf, NSFBUFS);
> +	
>  	/*
>  	 * This has to be done before VM init.
>  	 */
> -	nmbclusters = NMBCLUSTERS;
>  	TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
>  	nmbufs = NMBUFS;
>  	TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs);
> -	nsfbufs = NSFBUFS;
>  	TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
>  	nmbcnt = NMBCNTS;
>  	TUNABLE_INT_FETCH("kern.ipc.nmbcnt", &nmbcnt);
> diff -u -r sys.old/kern/subr_param.c sys/kern/subr_param.c
> --- sys.old/kern/subr_param.c	Sat Dec  8 16:04:51 2001
> +++ sys/kern/subr_param.c	Sat Dec  8 16:10:08 2001
> @@ -90,39 +90,46 @@
>   */
>  struct	buf *swbuf;
>  
> +int physmemMB;
> +
>  /*
>   * Boot time overrides
>   */
>  void
> -init_param(void)
> +init_param(u_int64_t membytes)
>  {
> +	int memsizemb;
> +	int autoproc, autofiles;
> +
> +	physmemMB = membytes / 1048576;
>  
> -	/* Base parameters */
> +	/* Calculate maxusers-derived values. */
>  	maxusers = MAXUSERS;
>  	TUNABLE_INT_FETCH("kern.maxusers", &maxusers);
> -	hz = HZ;
> -	TUNABLE_INT_FETCH("kern.hz", &hz);
> -	tick = 1000000 / hz;
> -	tickadj = howmany(30000, 60 * hz);	/* can adjust 30ms in 60s */
> -
> -	/* The following can be overridden after boot via sysctl */
> +	nbuf = NBUF;
>  	maxproc = NPROC;
> -	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
>  	maxfiles = MAXFILES;
> -	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
> -	maxprocperuid = maxproc - 1;
> -	maxfilesperproc = maxfiles;
> -
> -	/* Cannot be changed after boot */
> -	nbuf = NBUF;
> -	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
>  #ifdef VM_SWZONE_SIZE_MAX
>  	maxswzone = VM_SWZONE_SIZE_MAX;
>  #endif
> -	TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
>  #ifdef VM_BCACHE_SIZE_MAX
>  	maxbcache = VM_BCACHE_SIZE_MAX;
>  #endif
> +
> +	/* Calculate autoscaled values, choose them if greater than above. */
> +	autoproc = min(MAXAUTOPROC, max(MINAUTOPROC, PROCPERMB * physmemMB));
> +	maxproc = max(maxproc, autoproc);
> +	autofiles = min(MAXAUTOFILES, max(MINAUTOFILES, FILESPERMB * physmemMB));
> +	maxfiles = max(maxfiles, autofiles);
> +
> +	/* Allow loader-specified tuneables to take effect. */
> +	TUNABLE_INT_FETCH("kern.maxproc", &maxproc);
> +	TUNABLE_INT_FETCH("kern.maxfiles", &maxfiles);
> +	maxprocperuid = (maxproc * 9) / 10;
> +	maxfilesperproc = (maxfiles * 9) / 10;
> +
> +	TUNABLE_INT_FETCH("kern.nbuf", &nbuf);
> +	TUNABLE_INT_FETCH("kern.maxswzone", &maxswzone);
>  	TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
>  	ncallout = 16 + maxproc + maxfiles;
>  	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
> @@ -139,4 +146,16 @@
>  	TUNABLE_QUAD_FETCH("kern.maxssiz", &maxssiz);
>  	sgrowsiz = SGROWSIZ;
>  	TUNABLE_QUAD_FETCH("kern.sgrowsiz", &sgrowsiz);
> +}
> +
> +/*
> + * Set hz.  This must be called earlier in machdep.c than init_param().
> + */
> +void
> +init_hz(void)
> +{
> +	hz = HZ;
> +	TUNABLE_INT_FETCH("kern.hz", &hz);
> +	tick = 1000000 / hz;
> +	tickadj = howmany(30000, 60 * hz);	/* can adjust 30ms in 60s */
>  }
> diff -u -r sys.old/kern/uipc_socket2.c sys/kern/uipc_socket2.c
> --- sys.old/kern/uipc_socket2.c	Sat Dec  8 16:04:50 2001
> +++ sys/kern/uipc_socket2.c	Sat Dec  8 16:08:43 2001
> @@ -1026,7 +1026,12 @@
>   */
>  static void init_maxsockets(void *ignored)
>  {
> +	int autosockets, maxuserssockets;
> +	
> +	autosockets = physmemMB * SOCKETSPERMB;
> +	autosockets = min(MAXAUTOSOCKETS, max(MINAUTOSOCKETS, autosockets));
> +	maxuserssockets = 2 * (20 + (16 * maxusers)); 
> +	maxsockets = max(maxuserssockets, max(autosockets, nmbclusters));
>  	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
> -	maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters));
>  }
>  SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
> diff -u -r sys.old/netinet/tcp_subr.c sys/netinet/tcp_subr.c
> --- sys.old/netinet/tcp_subr.c	Sat Dec  8 16:04:42 2001
> +++ sys/netinet/tcp_subr.c	Sat Dec  8 16:10:31 2001
> @@ -190,6 +190,7 @@
>  tcp_init()
>  {
>  	int hashsize = TCBHASHSIZE;
> +	int autohashsize;
>  	
>  	tcp_ccgen = 1;
>  	tcp_cleartaocache();
> @@ -203,6 +204,13 @@
>  
>  	LIST_INIT(&tcb);
>  	tcbinfo.listhead = &tcb;
> +
> +	/* Calculate autoscaled hash size, use if > default hash size. */
> +	autohashsize = physmemMB * TCBHASHPERMB;
> +	autohashsize = min(MAXAUTOTCBHASH, max(MINAUTOTCBHASH, autohashsize));
> +	while (!powerof2(autohashsize))
> +		autohashsize++;
> +	hashsize = max(hashsize, autohashsize);
>  	TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize);
>  	if (!powerof2(hashsize)) {
>  		printf("WARNING: TCB hash size not a power of 2\n");
> diff -u -r sys.old/powerpc/powerpc/machdep.c sys/powerpc/powerpc/machdep.c
> --- sys.old/powerpc/powerpc/machdep.c	Sat Dec  8 16:04:39 2001
> +++ sys/powerpc/powerpc/machdep.c	Sat Dec  8 16:48:30 2001
> @@ -436,7 +436,8 @@
>  	__asm ("mtsprg 0, %0" :: "r"(globalp));
>  
>  	/* Init basic tunables, hz etc */
> -	init_param();
> +	init_hz();
> +	init_param(0); /* XXX - needs to be fed physmem for proper autoscaling */
>  
>  	/* setup curproc so the mutexes work */
>  
> diff -u -r sys.old/sparc64/sparc64/machdep.c sys/sparc64/sparc64/machdep.c
> --- sys.old/sparc64/sparc64/machdep.c	Sat Dec  8 16:04:38 2001
> +++ sys/sparc64/sparc64/machdep.c	Sat Dec  8 16:47:29 2001
> @@ -249,10 +249,10 @@
>  		end = (vm_offset_t)_end;
>  	}
>  
> -	/*
> -	 * Initialize tunables.
> -	 */
> -	init_param();
> +	/* Init hz */
> +	init_hz();
> +	/* Init basic tuneables - XXX - this needs to be moved once maxmem exists here. */
> +	init_param(0);
>  
>  #ifdef DDB
>  	kdb_init();
> diff -u -r sys.old/sys/param.h sys/sys/param.h
> --- sys.old/sys/param.h	Sat Dec  8 16:04:37 2001
> +++ sys/sys/param.h	Sat Dec  8 16:05:28 2001
> @@ -230,6 +230,44 @@
>  #define ctodb(db)			/* calculates pages to devblks */ \
>  	((db) << (PAGE_SHIFT - DEV_BSHIFT))
>  
> +/*
> + * Values used in autoscaling system structures based on RAM size.
> + *
> + * Although settings are scattered across various subsystems, a
> + * common formula is followed.  Generally, there are three
> + * possible values to choose from:  The value suggested by maxusers,
> + * the value suggested by the autoscaling formula, and a manually
> + * tuned value from loader.conf.  If a manually tuned value is specified,
> + * this value will be used.  Otherwise, the maximum of the maxusers
> + * and autoscaled setting will be used.
> + *
> + */
> +
> +/* Max processes, files.  These are set in subr_param.c */
> +#define PROCPERMB 16
> +#define MINAUTOPROC 256
> +#define MAXAUTOPROC 32000
> +#define FILESPERMB 128
> +#define MINAUTOFILES 1024
> +#define MAXAUTOFILES 65536
> +
> +/* Max sockets.  These are set in uipc_socket2.c */
> +#define SOCKETSPERMB 64
> +#define MINAUTOSOCKETS 512
> +#define MAXAUTOSOCKETS 32000
> +
> +/* Max mbuf clusters, sendfile buffers.  These are set in subr_mbuf.c */
> +#define MCLSPERMB 32
> +#define MINAUTOMCLS 512
> +#define MAXAUTOMCLS 32000
> +#define SFBUFPERMB 16
> +#define MINAUTOSFBUF 1024
> +#define MAXAUTOSFBUF 32000
> +
> +/* Number of TCP hash buckets.  These are set in tcp_subr.c */
> +#define TCBHASHPERMB 8
> +#define MINAUTOTCBHASH 512
> +#define MAXAUTOTCBHASH 8192
>  
>  /*
>   * Make this available for most of the kernel.  There were too many
> diff -u -r sys.old/sys/systm.h sys/sys/systm.h
> --- sys.old/sys/systm.h	Sat Dec  8 16:04:37 2001
> +++ sys/sys/systm.h	Sat Dec  8 16:07:45 2001
> @@ -60,6 +60,7 @@
>  extern struct cv selwait;	/* select conditional variable */
>  
>  extern int physmem;		/* physical memory */
> +extern int physmemMB;		/* physical memory size in megabytes */
>  
>  extern dev_t dumpdev;		/* dump device */
>  extern long dumplo;		/* offset into dumpdev */
> @@ -121,7 +122,8 @@
>  
>  void	cpu_boot __P((int));
>  void	cpu_rootconf __P((void));
> -void	init_param __P((void));
> +void	init_hz __P((void));
> +void	init_param __P((u_int64_t));
>  void	tablefull __P((const char *));
>  int	kvprintf __P((char const *, void (*)(int, void*), void *, int,
>  		      _BSD_VA_LIST_)) __printflike(1, 0);


-- 
 Bosko Milekic
 bmilekic@technokratis.com


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20011208195913.A55885>