Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 13 Apr 1995 07:39:50 +1000
From:      Bruce Evans <bde@zeta.org.au>
To:        gwk@cray.com
Cc:        bugs@FreeBSD.org, nate@sneezy.sri.com, smace@metal-mail.neosoft.com
Subject:   Re: 80387 hangs system at divide by zero
Message-ID:  <199504122139.HAA26990@godzilla.zeta.org.au>

next in thread | raw e-mail | index | archive | help
>If I generate an overflow or divide by zero on MS-DOG(tm), the result
>prints out as "infinity".  I tried with Symantec C++ 6.11 as well as
>gcc 2.6.3 (the EMX port).  Both environments mask off all exceptions
>by default.  If I enable exceptions, the program will hang just as

Interesting.  An old version of Turbo C++ (1.0) unmasks some exceptions
by default.  Turbo Debugger mishandles the resulting exceptions.

>with FreeBSD.  In case of gcc/MS-DOG I can ctrl-break out of the hang,
>but the system gets somewhat instable--the next invocation of the gcc
>compiler will hang irreversibly.

This might be because the IRQ13 for the exception never occurred, so
the FPU busy latch didn't get cleared.  The next FPU instruction will
then hang (I'm not sure if it hangs irreversibly).

>With Linux [Slackware 2.1 binaries with a 1.1.78 kernel, gcc 2.5.8]
>both overflow and divide by zero are reported as floating point
>exception.  Linux can handle it!  When I mask off exceptions, results
>are printed as infinity just as with MS-DOG.

I think it clears the busy latch after an FPU hang.  You could try
this under DOS: break out of a hang, then use DEBUG to clear the hang
"o f0 0" command, then see if gcc still hangs.

>Is there a way that I can mask off exceptions globally for every
>program on my system?  Or would you think there is a chance that

You can change the #define of __INITIAL_NPXCW__ in <machine/npx.h>
from __BDE_NPXCW__ to __BETTER_BDE_NPXCW__, or arrange to set the
control word in crt0 like Linux does.  There is no way to stop an
application from changing it to something harmful.

>P.S.: For the curious I will append my test program here.  Yes, it's a
>      quick and dirty one...
>...
>#elif __FreeBSD__
>#define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
>  /* previous #define stolen from npx.c--why is it missing from
>     floatingpoint.h ?? */

floatingpoint.h uses `fnstenv(...); modify_env(); fldenv();' instead.  This
is more general but slower.

Here is a better test program.  It uses direct asm() statements.
Unfortunately this is more portable than the C interfaces to the special
FPU registers.  You won't be able to run this under FreeBSD until the
hangs are fixed; under Linux it should be a good test for the workarounds
for the hangs.
---
/* fpetest.c - test x87/486 floating point error handling */

#ifndef __FreeBSD__
#include <getopt.h>
#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

#define CW_ZM	(1 << 2)	/* divide by zero mask */
#define SW_BUSY	(1 << 15)	/* FPU busy */
#define SW_ES	(1 << 7)	/* exception summary */
#define SW_ZE	(1 << 2)	/* divide by zero (pending) exception seen */

#define NR_TESTS	15
#define TEST(n, x, y)	( setup(), ({ asm(x); 0; }), check(n, x, y) )

static double double_in_mem;
static unsigned short fp_cw;
static unsigned fp_env[7];
static unsigned fp_state[7 + 8 * 10 / sizeof(unsigned)];
static unsigned short fp_sw;
static unsigned long max_failures;
static unsigned long max_tests;
static unsigned short msw;
static unsigned char silent;
static volatile sig_atomic_t sigfpe_handled;
static volatile sig_atomic_t sigint_handled;

static void delay(void);
static int check(int test_nr, char *insn, int sigfpe_expected);
static void report_control_words(char *when);
static void setup(void);
static void sigfpe_handler(int sig_num);
static void sigint_handler(int sig_num);

static int check(int test_nr, char *insn, int sigfpe_expected)
{
    static char *insns[NR_TESTS];
    static unsigned long failures[NR_TESTS];
    static int prev_passed[NR_TESTS];
    static unsigned long tests[NR_TESTS];
    static unsigned long total_failures;
    static unsigned long total_tests;
    int passed;

    if (sigfpe_handled)
    {
	signal(SIGFPE, sigfpe_handler);
	sigfpe_handled = 0;
	passed = sigfpe_expected;
    }
    else
	passed = !sigfpe_expected;
    insns[test_nr] = insn;
    ++tests[test_nr];
    ++total_tests;
    if (!passed)
    {
	++failures[test_nr];
	++total_failures;
	if (!silent
	    && (failures[test_nr] != 1 || passed == prev_passed[test_nr]))
	    fprintf(stderr, "T %lu F %lu: %s SIGFPE for `%s'\n",
		    total_tests, total_failures,
		    sigfpe_expected ? "no" : "  ", insn);
    }
    if (total_failures >= max_failures || total_tests >= max_tests
	|| sigint_handled)
    {
	fprintf(stderr, "  Tests Failures  Test\n");
	for (test_nr = 0; test_nr < NR_TESTS; ++test_nr)
	    if (insns[test_nr] != NULL)
		fprintf(stderr, "%7lu  %7lu  `%s'\n",
			tests[test_nr], failures[test_nr], insns[test_nr]);
	fprintf(stderr, "%7lu  %7lu  totals\n",
		total_tests, total_failures);
	report_control_words("Final");
	exit(total_failures ? 1 : 0);
    }
    prev_passed[test_nr] = passed;
    return passed;
}

static void delay(void)
{
    volatile unsigned countdown;

    for (countdown = 100; countdown != 0; --countdown)
	;
}

int main(argc, argv)
int argc;
char **argv;
{
    unsigned char skip3;
    unsigned char skip4;
    int opt;

    report_control_words("Initial");
    max_failures = 2 * NR_TESTS;
    max_tests = 2 * NR_TESTS;
    skip3 = 0;
    skip4 = 0;
    silent = 0;
    while ((opt = getopt(argc, argv, "34f:st:")) != EOF)
	switch(opt)
	{
	case '3':
	    skip3 = 1;
	    break;
	case '4':
	    skip3 = 1;
	    skip4 = 1;
	    break;
	case 'f':
	    max_failures = strtoul(optarg, (char **) NULL, 0);
	    break;
	case 's':
	    silent = 1;
	    break;
	case 't':
	    max_tests = strtoul(optarg, (char **) NULL, 0);
	    break;
	default:
	    fprintf(stderr,
		    "usage: %s [-34s] [-f max-failures] [-t max-tests]\n",
		    argv[0]);
	    fprintf(stderr,
		    "       -3 = skip tests that often fail on 386's\n");
	    fprintf(stderr,
		    "       -4 = skip tests that often fail on [3-4]86's\n");
	    fprintf(stderr, "       -s = silent\n");
	    exit(2);
	}
    signal(SIGINT, sigint_handler);
    while (1)
    {
	/*
	 * Cause a divide by zero error.  This should not trigger an exception.
	 * The next no-wait FP instruction should trigger the exception.
	 */
	if (TEST(0, "fldz; fld1; fdiv %st,%st(1)", 0))
	{
	    /*
	     * The fwait instruction should always trigger a pending exception.
	     */
	    TEST(1, "fldz; fld1; fdiv %st,%st(1); fwait", 1);

	    /*
	     * One way for this to fail is if the kernel uses CR0_EM instead
	     * of CR0_TS | CR0_MP to handle FP context switching.  This fails
	     * to trap fwaits immediately after an FP context switch.  It is
	     * especially bad when FP is being emulated.  Then all fwaits are
	     * ignored!
	     */
	    TEST(2, "fldz; fld1; fdiv %st,%st(1); nop; fwait", 1);
	    TEST(3, "fldz; fld1; fdiv %st,%st(1); call _delay; fwait", 1);

	    /*
	     * No-wait instructions should never trigger a pending exception.
	     *
	     * On my 486 system, they are all broken when the IRQ13 FP
	     * exception reporting method is used.  On at least one 386 system,
	     * fnclex usually works but some of the others are broken, and
	     * fnclex fails after a context switch, presumably because frstor
	     * fails in the kernel.
	     */
	    if (!skip3)
	    {
		TEST(4, "fldz; fld1; fdiv %st,%st(1); fninit", 0);
		TEST(5, "fldz; fld1; fdiv %st,%st(1); fnstcw _fp_cw", 0);
		TEST(6, "fldz; fld1; fdiv %st,%st(1); fnstsw _fp_sw", 0);
	    }
	    if (!skip4)
		TEST(7, "fldz; fld1; fdiv %st,%st(1); fnclex", 0);
	    if (!skip3)
	    {
		TEST(8, "fldz; fld1; fdiv %st,%st(1); fnstenv _fp_env", 0);
		TEST(9, "fldz; fld1; fdiv %st,%st(1); fnsave _fp_state", 0);
	    }
	}

	/*
	 * fldenv and frstor of an error state should not trigger an exception,
	 * and they should not lose the pending exception.  Fake the pending
	 * exception so that these tests can be done even if the tests for
	 * fnstenv and fnsave of the pending exception failed.
	 */
	if (!skip3)
	{
	    setup();
	    asm("fnstenv _fp_env");	/* an almost clean env */
	    fp_env[1] |= SW_BUSY | SW_ES | SW_ZE;	/* fake excepttion */
	    asm("fldenv _fp_env");
	    if (check(10, "fldenv of pending exception", 0))
	    {
		delay();
		asm("fwait");
		check(11, "fwait after fldenv of pending exception", 1);
	    }
	    setup();
	    asm("fnsave _fp_state");	/* an almost clean state */
	    fp_state[1] |= SW_BUSY | SW_ES | SW_ZE;	/* fake excepttion */
	    asm("frstor _fp_state");
	    if (check(12, "frstor of pending exception", 0))
	    {
		delay();
		asm("fwait");
		check(13, "fwait after frstor of pending exception", 1);
	    }
	}

	/*
	 * fstpl to memory when the FP stack is empty sometimes causes an
	 * IRQ13 a little after the intstruction.  When the fstpl is traced,
	 * the exception appears to come from the trace trap handler!  frstor
	 * of a pending error may also cause an IRQ13 after the instruction.
	 *
	 * XXX - this test has never been observed to fail.
	 */
	TEST(14, "fstpl _double_in_mem", 0);
    }
}

static void report_control_words(char *when)
{
    asm("smsw _msw");
    asm("fnstcw _fp_cw");
    fprintf(stderr, "%7s machine status word = %04x\n", when, msw);
    fprintf(stderr, "%7s FP control word =     %04x\n", when, fp_cw);
}

static void setup(void)
{
    asm("fninit; fnstcw _fp_cw");
    fp_cw &= ~CW_ZM;
    asm("fldcw _fp_cw");
    signal(SIGFPE, sigfpe_handler);
    sigfpe_handled = 0;
}

static void sigfpe_handler(int sig_num)
{
    sigfpe_handled = 1;
}

static void sigint_handler(int sig_num)
{
    sigint_handled = 1;
}
---

Bruce



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199504122139.HAA26990>