Date: Sun, 23 Nov 2008 17:41:38 +0200 From: Kostik Belousov <kostikbel@gmail.com> To: Kip Macy <kmacy@freebsd.org> Cc: svn-src-head@freebsd.org, Scott Long <scottl@samsco.org>, src-committers@freebsd.org, svn-src-all@freebsd.org Subject: Re: svn commit: r185162 - in head: . sys/amd64/include sys/arm/include sys/conf sys/dev/bce sys/dev/cxgb sys/dev/cxgb/sys sys/dev/cxgb/ulp/iw_cxgb sys/dev/mxge sys/dev/nxge sys/i386/include sys/i386/in... Message-ID: <20081123154138.GS6408@deviant.kiev.zoral.com.ua> In-Reply-To: <3c1674c90811221651u338294frcdbd99b386733851@mail.gmail.com> References: <200811220555.mAM5tuIJ007781@svn.freebsd.org> <20081122112949.GA6408@deviant.kiev.zoral.com.ua> <3c1674c90811221326m41e229f7p6abbc0eb473e900e@mail.gmail.com> <49288222.5060205@samsco.org> <20081122221953.GO6408@deviant.kiev.zoral.com.ua> <4928910B.1020403@samsco.org> <3c1674c90811221651u338294frcdbd99b386733851@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
--PkEWctFf+8E2rcii Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Sun, Nov 23, 2008 at 12:51:58AM +0000, Kip Macy wrote: > On Sat, Nov 22, 2008 at 11:08 PM, Scott Long <scottl@samsco.org> wrote: > > Kostik Belousov wrote: > >> > >> On Sat, Nov 22, 2008 at 03:05:22PM -0700, Scott Long wrote: > >>> > >>> A neat hack would be for the kernel linker to scan the text and do a > >>> drop-in replacement of the opcode that is appropriate for the platfor= m. > >>> I can't see how a CPU_XXX definition would work because it's just a > >>> compile time construct, one that can be included with any kernel > >>> compile. > >> > >> Yes, it is possible to do that. Less drastic change is to directly > >> check features. I moved slow code to separate section to eliminate > >> unconditional jump in fast path. > >> Only compile-tested. > >> > > > > As long as it works, I think it's a step in the right direction; I'm > > assuming that cpu_feature is a symbol filled in at runtime and not a > > macro for the cpuid instruction, right? > > > > Scott > > >=20 > i386/include/md_var.h: > <..> > extern u_int cpu_exthigh; > extern u_int cpu_feature; > extern u_int cpu_feature2; > extern u_int amd_feature; > extern u_int amd_feature2; > <...> >=20 > I'm not thrilled with it, but we can revisit the issue if it makes a > measurable difference on someone's workload. Below is the updated patch. It includes changes made after private comments by bde@ and uses symbolic definitions for the bits in the features words. I thought about accessing a per-CPU word for serialized instruction in the slow path, but decided that it does not beneficial. diff --git a/sys/conf/ldscript.i386 b/sys/conf/ldscript.i386 index a94f32f..49d9636 100644 --- a/sys/conf/ldscript.i386 +++ b/sys/conf/ldscript.i386 @@ -45,6 +45,7 @@ SECTIONS .text : { *(.text) + *(.text.offpath) *(.stub) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) diff --git a/sys/dev/iir/iir.c b/sys/dev/iir/iir.c index cbcb449..a01b685 100644 --- a/sys/dev/iir/iir.c +++ b/sys/dev/iir/iir.c @@ -403,7 +403,7 @@ iir_init(struct gdt_softc *gdt) gdt->oem_name[7]=3D'\0'; } else { /* Old method, based on PCI ID */ - if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID) + if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID_IIR) strcpy(gdt->oem_name,"Intel "); else=20 strcpy(gdt->oem_name,"ICP "); @@ -1447,7 +1447,7 @@ iir_action( struct cam_sim *sim, union ccb *ccb ) (bus =3D=3D gdt->sc_virt_bus ? 127 : gdt->sc_bus_id[bus]= ); cpi->base_transfer_speed =3D 3300; strncpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); - if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID) + if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID_IIR) strncpy(cpi->hba_vid, "Intel Corp.", HBA_IDLEN); else strncpy(cpi->hba_vid, "ICP vortex ", HBA_IDLEN); diff --git a/sys/dev/iir/iir.h b/sys/dev/iir/iir.h index dca493d..dbae7d2 100644 --- a/sys/dev/iir/iir.h +++ b/sys/dev/iir/iir.h @@ -63,7 +63,7 @@ #define GDT_DEVICE_ID_MAX 0x2ff #define GDT_DEVICE_ID_NEWRX 0x300 =20 -#define INTEL_VENDOR_ID 0x8086 +#define INTEL_VENDOR_ID_IIR 0x8086 #define INTEL_DEVICE_ID_IIR 0x600 =20 #define GDT_MAXBUS 6 /* XXX Why not 5? */ diff --git a/sys/dev/iir/iir_ctrl.c b/sys/dev/iir/iir_ctrl.c index e5fbac9..2af2aad 100644 --- a/sys/dev/iir/iir_ctrl.c +++ b/sys/dev/iir/iir_ctrl.c @@ -278,7 +278,7 @@ iir_ioctl(struct cdev *dev, u_long cmd, caddr_t cmdarg,= int flags, d_thread_t * return (ENXIO); /* only RP controllers */ p->ext_type =3D 0x6000 | gdt->sc_device; - if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID) { + if (gdt->sc_vendor =3D=3D INTEL_VENDOR_ID_IIR) { p->oem_id =3D OEM_ID_INTEL; p->type =3D 0xfd; /* new -> subdevice into ext_type */ diff --git a/sys/dev/iir/iir_pci.c b/sys/dev/iir/iir_pci.c index 528b7d7..0f420a1 100644 --- a/sys/dev/iir/iir_pci.c +++ b/sys/dev/iir/iir_pci.c @@ -164,7 +164,7 @@ MODULE_DEPEND(iir, cam, 1, 1, 1); static int iir_pci_probe(device_t dev) { - if (pci_get_vendor(dev) =3D=3D INTEL_VENDOR_ID && + if (pci_get_vendor(dev) =3D=3D INTEL_VENDOR_ID_IIR && pci_get_device(dev) =3D=3D INTEL_DEVICE_ID_IIR) { device_set_desc(dev, "Intel Integrated RAID Controller"); return (BUS_PROBE_DEFAULT); diff --git a/sys/dev/ixgb/if_ixgb.c b/sys/dev/ixgb/if_ixgb.c index c1d6858..df2f51a 100644 --- a/sys/dev/ixgb/if_ixgb.c +++ b/sys/dev/ixgb/if_ixgb.c @@ -72,8 +72,8 @@ char ixgb_copyright[] =3D "Copyright (c) 2001-= 2004 Intel Corporation."; static ixgb_vendor_info_t ixgb_vendor_info_array[] =3D { /* Intel(R) PRO/10000 Network Connection */ - {INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0}, - {INTEL_VENDOR_ID, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID, 0}, + {INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX, PCI_ANY_ID, PCI_ANY_ID, 0}, + {INTEL_VENDOR_ID_IXGB, IXGB_DEVICE_ID_82597EX_SR, PCI_ANY_ID, PCI_ANY_ID,= 0}, /* required last entry */ {0, 0, 0, 0, 0} }; diff --git a/sys/dev/ixgb/ixgb_ids.h b/sys/dev/ixgb/ixgb_ids.h index a224f63..aa7dab8 100644 --- a/sys/dev/ixgb/ixgb_ids.h +++ b/sys/dev/ixgb/ixgb_ids.h @@ -40,7 +40,7 @@ ** The Device and Vendor IDs for 10 Gigabit MACs **********************************************************************/ =20 -#define INTEL_VENDOR_ID 0x8086 +#define INTEL_VENDOR_ID_IXGB 0x8086 #define INTEL_SUBVENDOR_ID 0x8086 =20 =20 diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index f6bcf0c..dbdc945 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -32,21 +32,47 @@ #error this file needs sys/cdefs.h as a prerequisite #endif =20 - -#if defined(I686_CPU) -#define mb() __asm__ __volatile__ ("mfence;": : :"memory") -#define wmb() __asm__ __volatile__ ("sfence;": : :"memory") -#define rmb() __asm__ __volatile__ ("lfence;": : :"memory") -#else -/* - * do we need a serializing instruction? - */ -#define mb() -#define wmb() -#define rmb() +#if defined(_KERNEL) +#include <machine/cpufunc.h> +#include <machine/specialreg.h> +#define mb() __asm __volatile( \ + "testl %0,cpu_feature \n\ + je 2f \n\ + mfence \n\ +1: \n\ + .section .text.offpath \n\ +2: lock \n\ + addl $0,cpu_feature \n\ + jmp 1b \n\ + .text \n\ +" \ + : : "i"(CPUID_SSE2) : "memory") +#define wmb() __asm __volatile( \ + "testl %0,cpu_feature \n\ + je 2f \n\ + sfence \n\ +1: \n\ + .section .text.offpath \n\ +2: lock \n\ + addl $0,cpu_feature \n\ + jmp 1b \n\ + .text \n\ +" \ + : : "i"(CPUID_XMM) : "memory") +#define rmb() __asm __volatile( \ + "testl %0,cpu_feature \n\ + je 2f \n\ + lfence \n\ +1: \n\ + .section .text.offpath \n\ +2: lock \n\ + addl $0,cpu_feature \n\ + jmp 1b \n\ + .text \n\ +" \ + : : "i"(CPUID_SSE2) : "memory") #endif =20 - /* * Various simple operations on memory, each of which is atomic in the * presence of interrupts and multiple processors. --PkEWctFf+8E2rcii Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.9 (FreeBSD) iEYEARECAAYFAkkpebIACgkQC3+MBN1Mb4gTugCeLELBS+EYwgiZhaY4hQ/Csh/o 5G8AoNMkU0eT4Wj6c5/tSIFyYpZOLM8u =R8rE -----END PGP SIGNATURE----- --PkEWctFf+8E2rcii--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20081123154138.GS6408>