Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 11 Mar 1999 10:36:35 -0500 (EST)
From:      shalunov@lynxhub.lz.att.com
To:        FreeBSD-gnats-submit@freebsd.org
Subject:   kern/10545: kernel lock-up with fork/exec stress test
Message-ID:  <199903111536.KAA00419@tuzik.lz.att.com>

next in thread | raw e-mail | index | archive | help

>Number:         10545
>Category:       kern
>Synopsis:       When a fork/exec stress test is run, the machine locks up
>Confidential:   yes
>Severity:       critical
>Priority:       high
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Mar 11 08:40:00 PST 1999
>Closed-Date:
>Last-Modified:
>Originator:     stanislav shalunov
>Release:        FreeBSD 3.1-RELEASE i386
>Organization:
AT&T
>Environment:

	Stock 3.1-RELEASE system, with recompiled kernel.
	Hardware is Dell-assembled, details are hopefully clear from dmesg.
	Kernel config file with comments removed follows
	(I did not change /etc/make.conf and nothing
	unusual is set in the environment):

machine		"i386"
cpu		"I686_CPU"
ident		TUZIK
maxusers	512

options		INET			#InterNETworking
options		FFS			#Berkeley Fast Filesystem
options		FFS_ROOT		#FFS usable as root device [keep this!]
options		MFS			#Memory Filesystem
options		NFS			#Network Filesystem
options		MSDOSFS			#MSDOS Filesystem
options		"CD9660"		#ISO 9660 Filesystem
options		PROCFS			#Process filesystem
options		"COMPAT_43"		#Compatible with BSD 4.3 [KEEP THIS!]
options		IDE_DELAY=5000
options		UCONSOLE		#Allow users to grab the console
options		FAILSAFE		#Be conservative
options		USERCONFIG		#boot -c editor
options		VISUAL_USERCONFIG	#visual boot -c editor

config		kernel	root on wd0s2a

controller	isa0
controller	eisa0
controller	pci0

controller	fdc0	at isa? port "IO_FD1" bio irq 6 drq 2
disk		fd0	at fdc0 drive 0
disk		fd1	at fdc0 drive 1

options		"CMD640"	# work around CMD640 chip deficiency
controller	wdc0	at isa? port "IO_WD1" bio irq 14
disk		wd0	at wdc0 drive 0
disk		wd1	at wdc0 drive 1

controller	wdc1	at isa? port "IO_WD2" bio irq 15
disk		wd2	at wdc1 drive 0
disk		wd3	at wdc1 drive 1

options		ATAPI		#Enable ATAPI support for IDE bus
options		ATAPI_STATIC	#Don't do it as an LKM
device		acd0		#IDE CD-ROM

controller	atkbdc0	at isa? port IO_KBD tty
device		atkbd0	at isa? tty irq 1
device		psm0	at isa? tty irq 12

device		vga0	at isa? port ? conflicts

pseudo-device	splash

device		sc0	at isa? tty

device		npx0	at isa? port IO_NPX irq 13

device		sio0	at isa? port "IO_COM1" flags 0x10 tty irq 4
device		sio1	at isa? port "IO_COM2" tty irq 3

device		ppc0	at isa? port? net irq 7
controller	ppbus0
device		nlpt0	at ppbus?
device		plip0	at ppbus?
device		ppi0	at ppbus?

device xl0

pseudo-device	loop
pseudo-device	ether
pseudo-device	sl	1
pseudo-device	ppp	1
pseudo-device	tun	1
pseudo-device	pty	16
pseudo-device	gzip		# Exec gzipped a.out's

options		KTRACE		#kernel tracing

options		SYSVSHM
options		SYSVMSG

pseudo-device	bpfilter 4	#Berkeley packet filter

	Maxusers is set high because I needed the machine to be able to
	handle about 20000 simultaneous TCP connections (mostly in
	TIME_WAIT state).  These are for Apache.

	The output of dmesg(1) follows:

Copyright (c) 1992-1999 FreeBSD Inc.
Copyright (c) 1982, 1986, 1989, 1991, 1993
	The Regents of the University of California. All rights reserved.
FreeBSD 3.1-RELEASE #0: Wed Mar 10 11:57:33 EST 1999
    shalunov@tuzik.lz.att.com:/usr/src/sys/compile/TUZIK
Timecounter "i8254"  frequency 1193182 Hz
Timecounter "TSC"  frequency 398776065 Hz
CPU: Pentium II/Xeon/Celeron (398.78-MHz 686-class CPU)
  Origin = "GenuineIntel"  Id = 0x652  Stepping=2
  Features=0x183f9ff<FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,MMX,<b24>>
real memory  = 67108864 (65536K bytes)
avail memory = 61865984 (60416K bytes)
Preloaded elf kernel "kernel" at 0xf0288000.
Probing for devices on PCI bus 0:
chip0: <Intel 82443BX host to PCI bridge> rev 0x02 on pci0.0.0
chip1: <Intel 82443BX host to AGP bridge> rev 0x02 on pci0.1.0
chip2: <Intel 82371AB PCI to ISA bridge> rev 0x02 on pci0.7.0
ide_pci0: <Intel PIIX4 Bus-master IDE controller> rev 0x01 on pci0.7.1
chip3: <Intel 82371AB Power management controller> rev 0x02 on pci0.7.3
chip4: <PCI to PCI bridge (vendor=1011 device=0024)> rev 0x03 on pci0.15.0
xl0: <3Com 3c905B Fast Etherlink XL 10/100BaseTX> rev 0x24 int a irq 11 on pci0.17.0
xl0: Ethernet address: 00:c0:4f:6e:f9:37
xl0: autoneg complete, link status good (half-duplex, 10Mbps)
Probing for devices on PCI bus 1:
vga0: <ATI model 4742 graphics accelerator> rev 0x5c int a irq 9 on pci1.0.0
Probing for devices on PCI bus 2:
Probing for devices on the ISA bus:
sc0 on isa
sc0: VGA color <16 virtual consoles, flags=0x0>
atkbdc0 at 0x60-0x6f on motherboard
atkbd0 irq 1 on isa
psm0 irq 12 on isa
psm0: model IntelliMouse, device ID 3
sio0 at 0x3f8-0x3ff irq 4 flags 0x10 on isa
sio0: type 16550A
sio1 at 0x2f8-0x2ff irq 3 on isa
sio1: type 16550A
fdc0 at 0x3f0-0x3f7 irq 6 drq 2 on isa
fdc0: FIFO enabled, 8 bytes threshold
fd0: 1.44MB 3.5in
wdc0 at 0x1f0-0x1f7 irq 14 on isa
wdc0: unit 0 (wd0): <Maxtor 90645D3>
wd0: 6149MB (12594960 sectors), 12495 cyls, 16 heads, 63 S/T, 512 B/S
wdc1 at 0x170-0x177 irq 15 on isa
wdc1: unit 0 (atapi): <TOSHIBA CD-ROM XM-6302B/1017>, removable, accel, dma, iordis
acd0: drive speed 5512KB/sec, 256KB cache
acd0: supported read types: CD-R, CD-RW, CD-DA
acd0: Audio: play, 16 volume levels
acd0: Mechanism: ejectable tray
acd0: Medium: no/blank disc inside, unlocked
ppc0 at 0x378 irq 7 on isa
ppc0: SMC-like chipset (ECP/EPP/PS2/NIBBLE) in COMPATIBLE mode
ppc0: FIFO with 16/16/8 bytes threshold
nlpt0: <generic printer> on ppbus 0
nlpt0: Interrupt-driven port
ppi0: <generic parallel i/o> on ppbus 0
plip0: <PLIP network interface> on ppbus 0
vga0 at 0x3b0-0x3df maddr 0xa0000 msize 131072 on isa
npx0 on motherboard
npx0: INT 16 interface
WARNING: / was not properly dismounted

	The file /etc/rc.conf follows:

# This file now contains just the overrides from /etc/defaults/rc.conf
# please make all changes to this file.

nfs_client_enable="YES"
network_interfaces="xl0 lo0"
ifconfig_xl0="inet 135.25.200.84  netmask 255.255.255.0"
defaultrouter="135.25.200.1"
hostname="tuzik.lz.att.com"
linux_enable="YES"
accounting_enable="YES"
lpd_enable="YES"
moused_port="/dev/psm0"
moused_enable="YES"
saver="logo"
blanktime="300"
font8x8="koi8-r-8x8"
font8x14="koi8-r-8x14"
font8x16="koi8-r-8x16"
keyrate="fast"
keymap="ru.koi8-r"
named_enable="YES"
sendmail_flags="-bd -q7m"
dumpdev="/dev/wd0s2b"

	The only non-system daemon running is Apache (apache13 from the
	ports collection).

	I will be happy to provide any additional enviromental information
	if you cannot reproduce the problem.

>Description:

	When I run a program to benchmark the system's ability to do
	fork/exec's, the kernel reproduceably locks up.  The program
	is attached below.  The program allows to specify for how long
	to run the test.  If a small number is choosen (up to 5 seconds),
	everything is OK and the system shows truly unbelievable rates
	(900--700 fork/exec's per second).  If I run the program for
	the default duration (60 seconds) the console that the program
	was run from (I do not use X because of unsupported video card)
	accepts input, but ^C, ^\ and ^Z do nothing (just get printed);
	the other virtual consoles can be switched to, but the display
	of programs such as top(1) is not updated and input is ignored
	completely.  Network connections are accepted, but not served
	(I tried to telnet in to reboot the machine; the connection
	was accepted, but the login prompt never appeared).  The
	screensaver worked all right: it turned on and then turned off
	when I pressed a key.  The three finger salute resulted in
	nothing.  The machine had to be cold booted.

>How-To-Repeat:

	The file fork-exec.c follows:


/* Test program that simply generates lots of fork/exec's.
   We execute `/bin/sh -c ""'.

   Written by Stanislav Shalunov. */

#include <stdio.h>
#include <errno.h>
#include <signal.h>
#include <unistd.h>
#include <stdlib.h>

volatile int more_forking;

void
handler(sig)
	int sig;
{
	switch (sig) {
	case SIGINT:
	case SIGALRM:
		more_forking = 0;
		break;
	default:
		;
	}
}

int
main(argc, argv)
	int argc;
	char *argv[];
{
	int pid;
	int attempts_to_fork, forks;
	int start, duration;
	int time_to_fork;
	int delay;
	struct sigaction sa;

	delay = 0;
	switch (argc) {
	case 1:	time_to_fork = 60;
		break;
	case 2:
	case 3:	time_to_fork = atoi(argv[1]);
		if (time_to_fork < 10)
			fprintf(stderr, "Warning: time value of %ds is too "
				"small to get adequate results.\n",
				time_to_fork);
		if (argc > 2)
			delay = atoi(argv[2]);
		break;
	default:
		fprintf(stderr, "Usage: fork-exec [time to fork in seconds"
			" [delay in milliseconds]]\n"
			"Default time is 60 seconds.  You can always send an"
			" interrupt earlier.\nDefault delay is zero.\n");
		exit(1);
	}
	
	sa.sa_handler = handler;
	sa.sa_flags = 0;
	sigemptyset(&sa.sa_mask);
	sigaction(SIGINT, &sa, NULL);
	sigaction(SIGALRM, &sa, NULL);

	alarm(time_to_fork);

	attempts_to_fork = forks = 0;
	more_forking = 1;
	system("uptime");
	start = time(NULL);
	while (more_forking) {
		attempts_to_fork++;
		/* poll(0, 0, 0) should be basically harmless, but we want to
		   eliminate (1) the overhead of a system call  (2) the
		   possibility to sleep for a long time because of an
		   overloaded system. */
		if (delay)
			poll(0, 0, delay);
		pid = fork();
		if (pid == 0) {
			/* XXX: On different systems on can have different
			   shell startup files, etc. */
			execl("/bin/sh", "sh", "-c", "", 0);
			perror("execl");
			exit(1);
		}
		else if (pid > 0)
			forks++;
	}
	duration = time(NULL) - start;
	if (! duration)
		duration = 1;
	printf("In %d secs made %d attempts to fork, of which %d succeeded.\n",
	       duration, attempts_to_fork, forks);
	printf("%d%% of forks succeeded; %d forks/second.\n",
	       forks*100/attempts_to_fork, forks/(duration));
	fflush(stdout);
	execlp("uptime", "uptime", 0);
	fprintf(stderr, "Either ``uptime'' is not in your PATH, or this test\n"
		"has brought the system to its knees, see what happens:\n");
	perror("execlp(uptime)");
	exit(1);
}

	In order to reproduce the problem, compile the program (I tried
	``gcc -o fork-exec fork-exec.c'' and
	``gcc -O6 -o fork-exec fork-exec.c''), and run it with
	``./fork-exec''.  Have your finger on the RESET button.

	You might want to play with the arguments, for small values of the
	first argument the machine doesn't crash.

>Fix:
	
	Unknown.

	Workaround might be to set rlimits, but that's not what I
	am looking for: other Unix systems (such as Solaris, etc.)
	do not misbehave in any way when I test them this way.

>Release-Note:
>Audit-Trail:
>Unformatted:


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199903111536.KAA00419>