From owner-freebsd-bugs Wed Apr 12 14:44:17 1995 Return-Path: bugs-owner Received: (from majordom@localhost) by freefall.cdrom.com (8.6.10/8.6.6) id OAA26677 for bugs-outgoing; Wed, 12 Apr 1995 14:44:17 -0700 Received: from godzilla.zeta.org.au (godzilla.zeta.org.au [203.2.228.34]) by freefall.cdrom.com (8.6.10/8.6.6) with ESMTP id OAA26664 for ; Wed, 12 Apr 1995 14:43:47 -0700 Received: (from bde@localhost) by godzilla.zeta.org.au (8.6.9/8.6.9) id HAA26990; Thu, 13 Apr 1995 07:39:50 +1000 Date: Thu, 13 Apr 1995 07:39:50 +1000 From: Bruce Evans Message-Id: <199504122139.HAA26990@godzilla.zeta.org.au> To: gwk@cray.com Subject: Re: 80387 hangs system at divide by zero Cc: bugs@FreeBSD.org, nate@sneezy.sri.com, smace@metal-mail.neosoft.com Sender: bugs-owner@FreeBSD.org Precedence: bulk >If I generate an overflow or divide by zero on MS-DOG(tm), the result >prints out as "infinity". I tried with Symantec C++ 6.11 as well as >gcc 2.6.3 (the EMX port). Both environments mask off all exceptions >by default. If I enable exceptions, the program will hang just as Interesting. An old version of Turbo C++ (1.0) unmasks some exceptions by default. Turbo Debugger mishandles the resulting exceptions. >with FreeBSD. In case of gcc/MS-DOG I can ctrl-break out of the hang, >but the system gets somewhat instable--the next invocation of the gcc >compiler will hang irreversibly. This might be because the IRQ13 for the exception never occurred, so the FPU busy latch didn't get cleared. The next FPU instruction will then hang (I'm not sure if it hangs irreversibly). >With Linux [Slackware 2.1 binaries with a 1.1.78 kernel, gcc 2.5.8] >both overflow and divide by zero are reported as floating point >exception. Linux can handle it! When I mask off exceptions, results >are printed as infinity just as with MS-DOG. I think it clears the busy latch after an FPU hang. You could try this under DOS: break out of a hang, then use DEBUG to clear the hang "o f0 0" command, then see if gcc still hangs. >Is there a way that I can mask off exceptions globally for every >program on my system? Or would you think there is a chance that You can change the #define of __INITIAL_NPXCW__ in from __BDE_NPXCW__ to __BETTER_BDE_NPXCW__, or arrange to set the control word in crt0 like Linux does. There is no way to stop an application from changing it to something harmful. >P.S.: For the curious I will append my test program here. Yes, it's a > quick and dirty one... >... >#elif __FreeBSD__ >#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) > /* previous #define stolen from npx.c--why is it missing from > floatingpoint.h ?? */ floatingpoint.h uses `fnstenv(...); modify_env(); fldenv();' instead. This is more general but slower. Here is a better test program. It uses direct asm() statements. Unfortunately this is more portable than the C interfaces to the special FPU registers. You won't be able to run this under FreeBSD until the hangs are fixed; under Linux it should be a good test for the workarounds for the hangs. --- /* fpetest.c - test x87/486 floating point error handling */ #ifndef __FreeBSD__ #include #endif #include #include #include #define CW_ZM (1 << 2) /* divide by zero mask */ #define SW_BUSY (1 << 15) /* FPU busy */ #define SW_ES (1 << 7) /* exception summary */ #define SW_ZE (1 << 2) /* divide by zero (pending) exception seen */ #define NR_TESTS 15 #define TEST(n, x, y) ( setup(), ({ asm(x); 0; }), check(n, x, y) ) static double double_in_mem; static unsigned short fp_cw; static unsigned fp_env[7]; static unsigned fp_state[7 + 8 * 10 / sizeof(unsigned)]; static unsigned short fp_sw; static unsigned long max_failures; static unsigned long max_tests; static unsigned short msw; static unsigned char silent; static volatile sig_atomic_t sigfpe_handled; static volatile sig_atomic_t sigint_handled; static void delay(void); static int check(int test_nr, char *insn, int sigfpe_expected); static void report_control_words(char *when); static void setup(void); static void sigfpe_handler(int sig_num); static void sigint_handler(int sig_num); static int check(int test_nr, char *insn, int sigfpe_expected) { static char *insns[NR_TESTS]; static unsigned long failures[NR_TESTS]; static int prev_passed[NR_TESTS]; static unsigned long tests[NR_TESTS]; static unsigned long total_failures; static unsigned long total_tests; int passed; if (sigfpe_handled) { signal(SIGFPE, sigfpe_handler); sigfpe_handled = 0; passed = sigfpe_expected; } else passed = !sigfpe_expected; insns[test_nr] = insn; ++tests[test_nr]; ++total_tests; if (!passed) { ++failures[test_nr]; ++total_failures; if (!silent && (failures[test_nr] != 1 || passed == prev_passed[test_nr])) fprintf(stderr, "T %lu F %lu: %s SIGFPE for `%s'\n", total_tests, total_failures, sigfpe_expected ? "no" : " ", insn); } if (total_failures >= max_failures || total_tests >= max_tests || sigint_handled) { fprintf(stderr, " Tests Failures Test\n"); for (test_nr = 0; test_nr < NR_TESTS; ++test_nr) if (insns[test_nr] != NULL) fprintf(stderr, "%7lu %7lu `%s'\n", tests[test_nr], failures[test_nr], insns[test_nr]); fprintf(stderr, "%7lu %7lu totals\n", total_tests, total_failures); report_control_words("Final"); exit(total_failures ? 1 : 0); } prev_passed[test_nr] = passed; return passed; } static void delay(void) { volatile unsigned countdown; for (countdown = 100; countdown != 0; --countdown) ; } int main(argc, argv) int argc; char **argv; { unsigned char skip3; unsigned char skip4; int opt; report_control_words("Initial"); max_failures = 2 * NR_TESTS; max_tests = 2 * NR_TESTS; skip3 = 0; skip4 = 0; silent = 0; while ((opt = getopt(argc, argv, "34f:st:")) != EOF) switch(opt) { case '3': skip3 = 1; break; case '4': skip3 = 1; skip4 = 1; break; case 'f': max_failures = strtoul(optarg, (char **) NULL, 0); break; case 's': silent = 1; break; case 't': max_tests = strtoul(optarg, (char **) NULL, 0); break; default: fprintf(stderr, "usage: %s [-34s] [-f max-failures] [-t max-tests]\n", argv[0]); fprintf(stderr, " -3 = skip tests that often fail on 386's\n"); fprintf(stderr, " -4 = skip tests that often fail on [3-4]86's\n"); fprintf(stderr, " -s = silent\n"); exit(2); } signal(SIGINT, sigint_handler); while (1) { /* * Cause a divide by zero error. This should not trigger an exception. * The next no-wait FP instruction should trigger the exception. */ if (TEST(0, "fldz; fld1; fdiv %st,%st(1)", 0)) { /* * The fwait instruction should always trigger a pending exception. */ TEST(1, "fldz; fld1; fdiv %st,%st(1); fwait", 1); /* * One way for this to fail is if the kernel uses CR0_EM instead * of CR0_TS | CR0_MP to handle FP context switching. This fails * to trap fwaits immediately after an FP context switch. It is * especially bad when FP is being emulated. Then all fwaits are * ignored! */ TEST(2, "fldz; fld1; fdiv %st,%st(1); nop; fwait", 1); TEST(3, "fldz; fld1; fdiv %st,%st(1); call _delay; fwait", 1); /* * No-wait instructions should never trigger a pending exception. * * On my 486 system, they are all broken when the IRQ13 FP * exception reporting method is used. On at least one 386 system, * fnclex usually works but some of the others are broken, and * fnclex fails after a context switch, presumably because frstor * fails in the kernel. */ if (!skip3) { TEST(4, "fldz; fld1; fdiv %st,%st(1); fninit", 0); TEST(5, "fldz; fld1; fdiv %st,%st(1); fnstcw _fp_cw", 0); TEST(6, "fldz; fld1; fdiv %st,%st(1); fnstsw _fp_sw", 0); } if (!skip4) TEST(7, "fldz; fld1; fdiv %st,%st(1); fnclex", 0); if (!skip3) { TEST(8, "fldz; fld1; fdiv %st,%st(1); fnstenv _fp_env", 0); TEST(9, "fldz; fld1; fdiv %st,%st(1); fnsave _fp_state", 0); } } /* * fldenv and frstor of an error state should not trigger an exception, * and they should not lose the pending exception. Fake the pending * exception so that these tests can be done even if the tests for * fnstenv and fnsave of the pending exception failed. */ if (!skip3) { setup(); asm("fnstenv _fp_env"); /* an almost clean env */ fp_env[1] |= SW_BUSY | SW_ES | SW_ZE; /* fake excepttion */ asm("fldenv _fp_env"); if (check(10, "fldenv of pending exception", 0)) { delay(); asm("fwait"); check(11, "fwait after fldenv of pending exception", 1); } setup(); asm("fnsave _fp_state"); /* an almost clean state */ fp_state[1] |= SW_BUSY | SW_ES | SW_ZE; /* fake excepttion */ asm("frstor _fp_state"); if (check(12, "frstor of pending exception", 0)) { delay(); asm("fwait"); check(13, "fwait after frstor of pending exception", 1); } } /* * fstpl to memory when the FP stack is empty sometimes causes an * IRQ13 a little after the intstruction. When the fstpl is traced, * the exception appears to come from the trace trap handler! frstor * of a pending error may also cause an IRQ13 after the instruction. * * XXX - this test has never been observed to fail. */ TEST(14, "fstpl _double_in_mem", 0); } } static void report_control_words(char *when) { asm("smsw _msw"); asm("fnstcw _fp_cw"); fprintf(stderr, "%7s machine status word = %04x\n", when, msw); fprintf(stderr, "%7s FP control word = %04x\n", when, fp_cw); } static void setup(void) { asm("fninit; fnstcw _fp_cw"); fp_cw &= ~CW_ZM; asm("fldcw _fp_cw"); signal(SIGFPE, sigfpe_handler); sigfpe_handled = 0; } static void sigfpe_handler(int sig_num) { sigfpe_handled = 1; } static void sigint_handler(int sig_num) { sigint_handled = 1; } --- Bruce