Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 7 May 2003 09:28:48 -0400 
From:      Don Bowman <don@sandvine.com>
To:        "'Holger.Kipp@alogis.com'" <Holger.Kipp@alogis.com>, john.cagle@hp.com, david@landgren.net, freebsd-current@freebsd.org
Subject:   RE: HP ProLiant DL360G3 rebuttal... ;-)
Message-ID:  <FE045D4D9F7AED4CBFF1B3B813C8533701B3656B@mail.sandvine.com>

index | next in thread | raw e-mail

[-- Attachment #1 --]
From: Holger Kipp [mailto:Holger.Kipp@alogis.com]
 ...
> 
> BTW: about 13% (4 out of 30) of the shipped DL360G3 dual xeon systems
> had one heatsink not properly mounted (gave no problems with W2K, but
> FreeBSD SMP got ungraceful shutdowns after a few minutes). Remounting
> cpu-heatsinks helped :-)

Just a guess @ what might be happening here (observed on supermicro
x5dpr dual xeon systems...)
The XEON has a mode where it drops to 1/8 clock speed when it
gets too hot. It also has other clock modulation.
These require some SMM (system management mode) bios support.
I've found that in our systems when we enter SMM that it doesn't
usually come back to the OS very well. Win2k has SMM support I would
guess. On our systems this is something that is somewhat controllable
via the 'installed os: win2k/other/...' option in the BIOS.
Problems observed on reentry to the OS: uptime issues (due to
unknown time in SMM), our hardware watchdog sometimes times out
(since the OS has no control), watchdog timeouts on lan chips.

I hacked lmmon to add in the xeon temperature sensor support.
I attached my hack if anyone cares. I hard-coded the SMB address
for the supermicro board i use (char cid[] = { 0x18,0x1a };) so
you might need to change that. You'll need ichsmb in your kernel.

--don


[-- Attachment #2 --]
--- lmmon.c.orig	Mon Sep 23 09:29:56 2002
+++ lmmon.c	Sat Jul 20 12:39:56 2002
@@ -51,14 +51,54 @@
 void
 usage()
 {
 	(void)fprintf(stderr, "%s%s\n%s\n",
 		      "lmmon v", LMMON_VERSION,
-		      "usage: lmmon [-hipsvm] [-r delay]");
+		      "usage: lmmon [-hipsvmx] [-r delay]");
 	exit(1);
 }
 
+int
+doioctl(int fd, int cmd, caddr_t param)
+{
+    int error = 1;
+    int retry = 3;
+
+    while (error && retry--) 
+    {
+	usleep(200);
+	error = ioctl(fd, cmd, param);
+    }
+    
+    return (error);
+}
+
+void
+GetXeon(int fd,int cpu, int *inf)
+{
+    char cid[] = { 0x18,0x1a };
+    int i;
+    signed char byte;
+    struct smbcmd cmd;
+    bzero(&cmd, sizeof(cmd));
+    cmd.data.byte_ptr = &byte;
+    cmd.slave=(u_char)cid[cpu];
+    cmd.count=1;
+    inf[0] = inf[1] = 0;
+
+    cmd.cmd = 0; /* internal -- chip temperature */
+    if (doioctl(fd, SMB_READB, (caddr_t)&cmd)!=-1)
+    {
+	inf[0] = (int )byte;
+    }
+    cmd.cmd = 1; /* External -- Die temperature */
+    if (doioctl(fd, SMB_READB, (caddr_t)&cmd)!=-1)
+    {
+	inf[1] = (int ) byte;
+    }
+}
+
 u_char
 get_data(int iodev, u_char command, int interface)
 {
 	u_char return_val;
 
@@ -97,10 +137,11 @@
 }
 
 int
 main(int argc, char *argv[])
 {
+	int xeon;
 	double voltage;
 	int i, iodev, interface, byte;
 	int ch, delay, cflag, fflag, kflag, nocurses, mflag;
 	int fandiv[3], fanspeed, temperature[3];
 	char *device_name;
@@ -112,14 +153,15 @@
 	/* Get command-line options */
 	cflag = 0;
 	fflag = 0;
 	kflag = 0;
 	mflag = 0;
+	xeon  = 0;
 	delay = DELAY;
 	interface = INTERFACE_SMB;
 	nocurses = 0;
-	while ((ch = getopt(argc, argv, "hipmr:sv")) != -1)
+	while ((ch = getopt(argc, argv, "hipmr:svx")) != -1)
 		switch(ch) {
 		case 'i':
 			interface = INTERFACE_IO;
 			break;
 		case 'p':
@@ -134,10 +176,13 @@
 		case 'm':
 			mflag = 1;
 			nocurses = 1;
 			delay = -1;
 			break;
+		case 'x':
+			xeon = 1;
+			break;
 		default:
 			usage();
 			break;
 		}
 	argc -= optind;
@@ -169,10 +214,19 @@
 			fprintf(stderr, "Check ");
 		}
 		fprintf(stderr, "the permissions of %s.\n", device_name);
 		exit(1);
 	}
+	if (xeon)
+	{
+	    xeon = open("/dev/smb0", O_RDONLY);
+	    if (xeon < 0)
+	    {
+		perror("open /dev/smb0");
+		xeon = 0;
+	    }
+	}
 
 	/* Get fan divisors */
 	byte      = get_data(iodev, LM78_FANDIV, interface);
 	fandiv[0] = LM78_DIV_FROM_DATA((byte >> 4) & 0x03);
 	fandiv[1] = LM78_DIV_FROM_DATA(byte >> 6);
@@ -194,10 +248,14 @@
 		printw("%s", "Voltages");
 		for (i = 0; i < 7; i++) {
 			move(i + 3, 27);
 			printw("%s:         V", Vname[i]);
 		}
+		if (xeon) {
+		    move(1,50);
+		    printw("%s", "XEON CPU Info");
+		}
 	}
 
 	/* Start our loop (if applicable) */
 	do {
 		/* Get motherboard temp */
@@ -241,10 +299,32 @@
 				printf("  %s :%+8.3fV\n", Vname[i], voltage);
 			else {
 				move(i + 3, 35);
 				printw("%+8.3f", voltage);
 			}
+		}
+		if (xeon)
+		{
+		    int inf[2];
+		    GetXeon(xeon,0, inf);
+		    if (nocurses)
+			printf("CPU0: mod temp: %dC, module die: %dC\n",inf[0],inf[1]);
+		    else {
+			move(3, 50);
+			printw("CPU 0 mod temp: %dC", inf[0]);
+			move(4, 50);
+			printw("CPU 0 die temp: %dC", inf[1]);
+		    }
+		    GetXeon(xeon,1, inf);
+		    if (nocurses)
+			printf("CPU1: mod temp: %dC, die temp: %dC\n",inf[0],inf[1]);
+		    else {
+			move(5, 50);
+			printw("CPU 1 mod temp: %dC",inf[0]);
+			move(6, 50);
+			printw("CPU 1 die temp: %dC",inf[1]);
+		    }
 		}
 
 		if (!nocurses) {
 			move(11, 0);
 			refresh();
home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?FE045D4D9F7AED4CBFF1B3B813C8533701B3656B>