Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 23 Nov 2008 17:41:38 +0200
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Kip Macy <kmacy@freebsd.org>
Cc:        svn-src-head@freebsd.org, Scott Long <scottl@samsco.org>, src-committers@freebsd.org, svn-src-all@freebsd.org
Subject:   Re: svn commit: r185162 - in head: . sys/amd64/include sys/arm/include sys/conf sys/dev/bce sys/dev/cxgb sys/dev/cxgb/sys sys/dev/cxgb/ulp/iw_cxgb sys/dev/mxge sys/dev/nxge sys/i386/include sys/i386/in...
Message-ID:  <20081123154138.GS6408@deviant.kiev.zoral.com.ua>
In-Reply-To: <3c1674c90811221651u338294frcdbd99b386733851@mail.gmail.com>
References:  <200811220555.mAM5tuIJ007781@svn.freebsd.org> <20081122112949.GA6408@deviant.kiev.zoral.com.ua> <3c1674c90811221326m41e229f7p6abbc0eb473e900e@mail.gmail.com> <49288222.5060205@samsco.org> <20081122221953.GO6408@deviant.kiev.zoral.com.ua> <4928910B.1020403@samsco.org> <3c1674c90811221651u338294frcdbd99b386733851@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--PkEWctFf+8E2rcii
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Sun, Nov 23, 2008 at 12:51:58AM +0000, Kip Macy wrote:
> On Sat, Nov 22, 2008 at 11:08 PM, Scott Long <scottl@samsco.org> wrote:
> > Kostik Belousov wrote:
> >>
> >> On Sat, Nov 22, 2008 at 03:05:22PM -0700, Scott Long wrote:
> >>>
> >>> A neat hack would be for the kernel linker to scan the text and do a
> >>> drop-in replacement of the opcode that is appropriate for the platfor=
m.
> >>> I can't see how a CPU_XXX definition would work because it's just a
> >>> compile time construct, one that can be included with any kernel
> >>> compile.
> >>
> >> Yes, it is possible to do that. Less drastic change is to directly
> >> check features. I moved slow code to separate section to eliminate
> >> unconditional jump in fast path.
> >> Only compile-tested.
> >>
> >
> > As long as it works, I think it's a step in the right direction; I'm
> > assuming that cpu_feature is a symbol filled in at runtime and not a
> > macro for the cpuid instruction, right?
> >
> > Scott
> >
>=20
> i386/include/md_var.h:
> <..>
> extern  u_int   cpu_exthigh;
> extern  u_int   cpu_feature;
> extern  u_int   cpu_feature2;
> extern  u_int   amd_feature;
> extern  u_int   amd_feature2;
> <...>
>=20
> I'm not thrilled with it, but we can revisit the issue if it makes a
> measurable difference on someone's workload.

Below is the updated patch. It includes changes made after private comments
by bde@ and uses symbolic definitions for the bits in the features words.
I thought about accessing a per-CPU word for serialized instruction in the
slow path, but decided that it does not beneficial.

diff --git a/sys/conf/ldscript.i386 b/sys/conf/ldscript.i386
index a94f32f..49d9636 100644
--- a/sys/conf/ldscript.i386
+++ b/sys/conf/ldscript.i386
@@ -45,6 +45,7 @@ SECTIONS
   .text      :
   {
     *(.text)
+    *(.text.offpath)
     *(.stub)
     /* .gnu.warning sections are handled specially by elf32.em.  */
     *(.gnu.warning)
diff --git a/sys/dev/iir/iir.c b/sys/dev/iir/iir.c
index cbcb449..a01b685 100644
--- a/sys/dev/iir/iir.c
+++ b/sys/dev/iir/iir.c
@@ -403,7 +403,7 @@ iir_init(struct gdt_softc *gdt)
 		gdt->oem_name[7]=3D'\0';
 	} else {
 		/* Old method, based on PCI ID */
-		if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID)
+		if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID_IIR)
             strcpy(gdt->oem_name,"Intel  ");
         else=20
        	    strcpy(gdt->oem_name,"ICP    ");
@@ -1447,7 +1447,7 @@ iir_action( struct cam_sim *sim, union ccb *ccb )
                   (bus =3D=3D gdt->sc_virt_bus ? 127 : gdt->sc_bus_id[bus]=
);
               cpi->base_transfer_speed =3D 3300;
               strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
-              if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID)
+              if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID_IIR)
                   strncpy(cpi->hba_vid, "Intel Corp.", HBA_IDLEN);
               else
                   strncpy(cpi->hba_vid, "ICP vortex ", HBA_IDLEN);
diff --git a/sys/dev/iir/iir.h b/sys/dev/iir/iir.h
index dca493d..dbae7d2 100644
--- a/sys/dev/iir/iir.h
+++ b/sys/dev/iir/iir.h
@@ -63,7 +63,7 @@
 #define GDT_DEVICE_ID_MAX       0x2ff
 #define GDT_DEVICE_ID_NEWRX     0x300
=20
-#define INTEL_VENDOR_ID         0x8086
+#define INTEL_VENDOR_ID_IIR     0x8086
 #define INTEL_DEVICE_ID_IIR     0x600
=20
 #define GDT_MAXBUS              6       /* XXX Why not 5? */
diff --git a/sys/dev/iir/iir_ctrl.c b/sys/dev/iir/iir_ctrl.c
index e5fbac9..2af2aad 100644
--- a/sys/dev/iir/iir_ctrl.c
+++ b/sys/dev/iir/iir_ctrl.c
@@ -278,7 +278,7 @@ iir_ioctl(struct cdev *dev, u_long cmd, caddr_t cmdarg,=
 int flags, d_thread_t *
                 return (ENXIO);
             /* only RP controllers */
             p->ext_type =3D 0x6000 | gdt->sc_device;
-            if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID) {
+            if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID_IIR) {
                 p->oem_id =3D OEM_ID_INTEL;
                 p->type =3D 0xfd;
                 /* new -> subdevice into ext_type */
diff --git a/sys/dev/iir/iir_pci.c b/sys/dev/iir/iir_pci.c
index 528b7d7..0f420a1 100644
--- a/sys/dev/iir/iir_pci.c
+++ b/sys/dev/iir/iir_pci.c
@@ -164,7 +164,7 @@ MODULE_DEPEND(iir, cam, 1, 1, 1);
 static int
 iir_pci_probe(device_t dev)
 {
-    if (pci_get_vendor(dev) =3D=3D INTEL_VENDOR_ID &&
+    if (pci_get_vendor(dev) =3D=3D INTEL_VENDOR_ID_IIR &&
         pci_get_device(dev) =3D=3D INTEL_DEVICE_ID_IIR) {
         device_set_desc(dev, "Intel Integrated RAID Controller");
         return (BUS_PROBE_DEFAULT);
diff --git a/sys/dev/ixgb/if_ixgb.c b/sys/dev/ixgb/if_ixgb.c
index c1d6858..df2f51a 100644
--- a/sys/dev/ixgb/if_ixgb.c
+++ b/sys/dev/ixgb/if_ixgb.c
@@ -72,8 +72,8 @@ char            ixgb_copyright[] =3D "Copyright (c) 2001-=
2004 Intel Corporation.";
 static ixgb_vendor_info_t ixgb_vendor_info_array[] =3D
 {
 	/* Intel(R) PRO/10000 Network Connection */
-	{INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0},
-	{INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0},
+	{INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID,=
 0},
 	/* required last entry */
 	{0, 0, 0, 0, 0}
 };
diff --git a/sys/dev/ixgb/ixgb_ids.h b/sys/dev/ixgb/ixgb_ids.h
index a224f63..aa7dab8 100644
--- a/sys/dev/ixgb/ixgb_ids.h
+++ b/sys/dev/ixgb/ixgb_ids.h
@@ -40,7 +40,7 @@
 ** The Device and Vendor IDs for 10 Gigabit MACs
 **********************************************************************/
=20
-#define INTEL_VENDOR_ID         0x8086
+#define INTEL_VENDOR_ID_IXGB    0x8086
 #define INTEL_SUBVENDOR_ID      0x8086
=20
=20
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index f6bcf0c..dbdc945 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,21 +32,47 @@
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
=20
-
-#if defined(I686_CPU)
-#define mb()	__asm__ __volatile__ ("mfence;": : :"memory")
-#define wmb()	__asm__ __volatile__ ("sfence;": : :"memory")
-#define rmb()	__asm__ __volatile__ ("lfence;": : :"memory")
-#else
-/*
- * do we need a serializing instruction?
- */
-#define mb()
-#define wmb()
-#define rmb()
+#if defined(_KERNEL)
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#define	mb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	mfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_SSE2) : "memory")
+#define	wmb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	sfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_XMM) : "memory")
+#define	rmb()	__asm __volatile(			\
+	"testl	%0,cpu_feature				\n\
+	je	2f					\n\
+	lfence						\n\
+1:							\n\
+	.section .text.offpath				\n\
+2:	lock						\n\
+	addl	$0,cpu_feature				\n\
+	jmp	1b					\n\
+	.text						\n\
+"							\
+	: : "i"(CPUID_SSE2) : "memory")
 #endif
=20
-
 /*
  * Various simple operations on memory, each of which is atomic in the
  * presence of interrupts and multiple processors.

--PkEWctFf+8E2rcii
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (FreeBSD)

iEYEARECAAYFAkkpebIACgkQC3+MBN1Mb4gTugCeLELBS+EYwgiZhaY4hQ/Csh/o
5G8AoNMkU0eT4Wj6c5/tSIFyYpZOLM8u
=R8rE
-----END PGP SIGNATURE-----

--PkEWctFf+8E2rcii--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20081123154138.GS6408>