Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 2 May 2009 08:44:31 GMT
From:      Marko Zec <zec@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 161467 for review
Message-ID:  <200905020844.n428iVBU010553@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=161467

Change 161467 by zec@zec_amdx2 on 2009/05/02 08:44:24

	IFC @ 161464

Affected files ...

.. //depot/projects/vimage/src/sys/amd64/amd64/local_apic.c#17 integrate
.. //depot/projects/vimage/src/sys/amd64/amd64/mp_machdep.c#21 integrate
.. //depot/projects/vimage/src/sys/amd64/conf/GENERIC#29 integrate
.. //depot/projects/vimage/src/sys/amd64/isa/clock.c#10 integrate
.. //depot/projects/vimage/src/sys/amd64/linux32/linux32_sysvec.c#15 integrate
.. //depot/projects/vimage/src/sys/compat/linux/linux_futex.c#10 integrate
.. //depot/projects/vimage/src/sys/conf/NOTES#48 integrate
.. //depot/projects/vimage/src/sys/conf/files#65 integrate
.. //depot/projects/vimage/src/sys/conf/options#47 integrate
.. //depot/projects/vimage/src/sys/dev/acpica/acpi.c#19 integrate
.. //depot/projects/vimage/src/sys/dev/ata/ata-all.h#14 integrate
.. //depot/projects/vimage/src/sys/dev/ata/ata-disk.c#12 integrate
.. //depot/projects/vimage/src/sys/dev/ata/ata-queue.c#12 integrate
.. //depot/projects/vimage/src/sys/dev/sk/if_sk.c#9 integrate
.. //depot/projects/vimage/src/sys/dev/usb/wlan/if_ural.c#7 integrate
.. //depot/projects/vimage/src/sys/i386/conf/GENERIC#32 integrate
.. //depot/projects/vimage/src/sys/i386/i386/local_apic.c#17 integrate
.. //depot/projects/vimage/src/sys/i386/i386/mp_machdep.c#20 integrate
.. //depot/projects/vimage/src/sys/i386/linux/linux_sysvec.c#11 integrate
.. //depot/projects/vimage/src/sys/isa/atrtc.c#2 integrate
.. //depot/projects/vimage/src/sys/kern/kern_osd.c#4 integrate
.. //depot/projects/vimage/src/sys/mips/include/pmap.h#4 integrate
.. //depot/projects/vimage/src/sys/mips/mips/pmap.c#11 integrate
.. //depot/projects/vimage/src/sys/modules/Makefile#46 integrate
.. //depot/projects/vimage/src/sys/net/ieee8023ad_lacp.c#11 integrate
.. //depot/projects/vimage/src/sys/net/if_bridge.c#29 integrate
.. //depot/projects/vimage/src/sys/net/if_gif.h#13 integrate
.. //depot/projects/vimage/src/sys/net/if_lagg.c#22 integrate
.. //depot/projects/vimage/src/sys/net/route.c#49 integrate
.. //depot/projects/vimage/src/sys/net/vnet.h#24 integrate
.. //depot/projects/vimage/src/sys/net80211/ieee80211_ddb.c#22 integrate
.. //depot/projects/vimage/src/sys/netinet/igmp.c#37 integrate
.. //depot/projects/vimage/src/sys/netinet/ip_fw.h#30 integrate
.. //depot/projects/vimage/src/sys/netinet/tcp_timewait.c#32 integrate
.. //depot/projects/vimage/src/sys/netinet/vinet.h#57 integrate
.. //depot/projects/vimage/src/sys/netinet6/udp6_usrreq.c#42 integrate
.. //depot/projects/vimage/src/sys/netinet6/vinet6.h#36 integrate
.. //depot/projects/vimage/src/sys/netipsec/vipsec.h#26 integrate
.. //depot/projects/vimage/src/sys/pc98/conf/GENERIC#21 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_atalk.c#3 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_audit.c#6 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_cred.c#3 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_inet.c#13 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_inet6.c#5 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_internal.h#10 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_net.c#8 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_pipe.c#8 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_posix_sem.c#9 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_posix_shm.c#4 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_priv.c#5 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_process.c#11 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_socket.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_system.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_sysv_msg.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_sysv_sem.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_sysv_shm.c#7 integrate
.. //depot/projects/vimage/src/sys/security/mac/mac_vfs.c#11 integrate

Differences ...

==== //depot/projects/vimage/src/sys/amd64/amd64/local_apic.c#17 (text+ko) ====

@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/amd64/local_apic.c,v 1.52 2009/02/21 23:15:34 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/amd64/local_apic.c,v 1.54 2009/05/01 20:53:37 mav Exp $");
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_kdtrace.h"
@@ -112,7 +112,7 @@
 	u_long la_stat_ticks;
 	u_long la_prof_ticks;
 	/* Include IDT_SYSCALL to make indexing easier. */
-	u_int la_ioint_irqs[APIC_NUM_IOINTS + 1];
+	int la_ioint_irqs[APIC_NUM_IOINTS + 1];
 } static lapics[MAX_APIC_ID + 1];
 
 /* XXX: should thermal be an NMI? */
@@ -254,6 +254,8 @@
 		lapics[apic_id].la_lvts[i] = lvts[i];
 		lapics[apic_id].la_lvts[i].lvt_active = 0;
 	}
+	for (i = 0; i <= APIC_NUM_IOINTS; i++)
+	    lapics[apic_id].la_ioint_irqs[i] = -1;
 	lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL;
 	lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] =
 	    IRQ_TIMER;
@@ -363,11 +365,15 @@
 lapic_setup_clock(void)
 {
 	u_long value;
+	int i;
 
 	/* Can't drive the timer without a local APIC. */
 	if (lapic == NULL)
 		return (0);
 
+	if (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)
+		return (0);
+
 	/* Start off with a divisor of 2 (power on reset default). */
 	lapic_timer_divisor = 2;
 
@@ -807,7 +813,7 @@
 	 */
 	mtx_lock_spin(&icu_lock);
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
-		if (lapics[apic_id].la_ioint_irqs[vector] != 0)
+		if (lapics[apic_id].la_ioint_irqs[vector] != -1)
 			continue;
 		lapics[apic_id].la_ioint_irqs[vector] = irq;
 		mtx_unlock_spin(&icu_lock);
@@ -847,7 +853,7 @@
 	for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
 
 		/* Vector is in use, end run. */
-		if (lapics[apic_id].la_ioint_irqs[vector] != 0) {
+		if (lapics[apic_id].la_ioint_irqs[vector] != -1) {
 			run = 0;
 			first = 0;
 			continue;
@@ -932,7 +938,7 @@
 	sched_bind(td, apic_cpuid(apic_id));
 	thread_unlock(td);
 	mtx_lock_spin(&icu_lock);
-	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0;
+	lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1;
 	mtx_unlock_spin(&icu_lock);
 	thread_lock(td);
 	sched_unbind(td);
@@ -944,11 +950,15 @@
 u_int
 apic_idt_to_irq(u_int apic_id, u_int vector)
 {
+	int irq;
 
 	KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL &&
 	    vector <= APIC_IO_INTS + APIC_NUM_IOINTS,
 	    ("Vector %u does not map to an IRQ line", vector));
-	return (lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS]);
+	irq = lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS];
+	if (irq < 0)
+		irq = 0;
+	return (irq);
 }
 
 #ifdef DDB
@@ -974,7 +984,7 @@
 		db_printf("Interrupts bound to lapic %u\n", apic_id);
 		for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) {
 			irq = lapics[apic_id].la_ioint_irqs[i];
-			if (irq == 0 || irq == IRQ_SYSCALL)
+			if (irq == -1 || irq == IRQ_SYSCALL)
 				continue;
 			db_printf("vec 0x%2x -> ", i + APIC_IO_INTS);
 			if (irq == IRQ_TIMER)

==== //depot/projects/vimage/src/sys/amd64/amd64/mp_machdep.c#21 (text+ko) ====

@@ -25,7 +25,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/amd64/mp_machdep.c,v 1.304 2009/04/29 06:54:40 jeff Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/amd64/mp_machdep.c,v 1.305 2009/04/30 22:10:04 jkim Exp $");
 
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
@@ -292,6 +292,10 @@
 static void
 topo_probe(void)
 {
+	static int cpu_topo_probed = 0;
+
+	if (cpu_topo_probed)
+		return;
 
 	logical_cpus = logical_cpus_mask = 0;
 	if (cpu_high >= 0xb)
@@ -299,9 +303,10 @@
 	else if (cpu_high)
 		topo_probe_0x4();
 	if (cpu_cores == 0)
-		cpu_cores = mp_ncpus;
+		cpu_cores = mp_ncpus > 0 ? mp_ncpus : 1;
 	if (cpu_logical == 0)
 		cpu_logical = 1;
+	cpu_topo_probed = 1;
 }
 
 struct cpu_group *
@@ -313,6 +318,7 @@
 	 * Determine whether any threading flags are
 	 * necessry.
 	 */
+	topo_probe();
 	if (cpu_logical > 1 && hyperthreading_cpus)
 		cg_flags = CG_FLAG_HTT;
 	else if (cpu_logical > 1)

==== //depot/projects/vimage/src/sys/amd64/conf/GENERIC#29 (text+ko) ====

@@ -16,7 +16,7 @@
 # If you are in doubt as to the purpose or necessity of a line, check first
 # in NOTES.
 #
-# $FreeBSD: src/sys/amd64/conf/GENERIC,v 1.523 2009/04/10 00:40:48 jfv Exp $
+# $FreeBSD: src/sys/amd64/conf/GENERIC,v 1.524 2009/05/01 17:20:16 sam Exp $
 
 cpu		HAMMER
 ident		GENERIC
@@ -292,8 +292,10 @@
 device		ulpt		# Printer
 device		umass		# Disks/Mass storage - Requires scbus and da
 device		ums		# Mouse
+device		rum		# Ralink Technology RT2501USB wireless NICs
+device		uath		# Atheros AR5523 wireless NICs
 device		ural		# Ralink Technology RT2500USB wireless NICs
-device		rum		# Ralink Technology RT2501USB wireless NICs
+device		zyd		# ZyDAS zb1211/zb1211b wireless NICs
 device		urio		# Diamond Rio 500 MP3 player
 # USB Serial devices
 device		uark		# Technologies ARK3116 based serial adapters

==== //depot/projects/vimage/src/sys/amd64/isa/clock.c#10 (text+ko) ====

@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/isa/clock.c,v 1.243 2008/04/22 19:38:27 phk Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/isa/clock.c,v 1.244 2009/05/01 21:43:04 mav Exp $");
 
 /*
  * Routines to handle clock hardware.
@@ -376,6 +376,17 @@
 	mtx_unlock_spin(&clock_lock);
 }
 
+static void
+i8254_restore(void)
+{
+
+	mtx_lock_spin(&clock_lock);
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
+	outb(TIMER_CNTR0, i8254_real_max_count & 0xff);
+	outb(TIMER_CNTR0, i8254_real_max_count >> 8);
+	mtx_unlock_spin(&clock_lock);
+}
+
 /* This is separate from startrtclock() so that it can be called early. */
 void
 i8254_init(void)
@@ -558,6 +569,14 @@
 	return(0);
 }
 
+static int
+attimer_resume(device_t dev)
+{
+
+	i8254_restore();
+	return(0);
+}
+
 static device_method_t attimer_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		attimer_probe),
@@ -565,7 +584,7 @@
 	DEVMETHOD(device_detach,	bus_generic_detach),
 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
 	DEVMETHOD(device_suspend,	bus_generic_suspend),
-	DEVMETHOD(device_resume,	bus_generic_resume),
+	DEVMETHOD(device_resume,	attimer_resume),
 	{ 0, 0 }
 };
 

==== //depot/projects/vimage/src/sys/amd64/linux32/linux32_sysvec.c#15 (text+ko) ====

@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.46 2009/04/05 09:27:19 dchagin Exp $");
+__FBSDID("$FreeBSD: src/sys/amd64/linux32/linux32_sysvec.c,v 1.47 2009/05/01 15:36:02 dchagin Exp $");
 #include "opt_compat.h"
 
 #ifndef COMPAT_IA32
@@ -128,7 +128,7 @@
 static void	linux32_fixlimit(struct rlimit *rl, int which);
 
 extern LIST_HEAD(futex_list, futex) futex_list;
-extern struct sx futex_sx;
+extern struct mtx futex_mtx;
 
 static eventhandler_tag linux_exit_tag;
 static eventhandler_tag linux_schedtail_tag;
@@ -1117,7 +1117,7 @@
 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
 			sx_init(&emul_shared_lock, "emuldata->shared lock");
 			LIST_INIT(&futex_list);
-			sx_init(&futex_sx, "futex protection lock");
+			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
 			    linux_proc_exit, NULL, 1000);
 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
@@ -1149,7 +1149,7 @@
 				linux_device_unregister_handler(*ldhp);
 			mtx_destroy(&emul_lock);
 			sx_destroy(&emul_shared_lock);
-			sx_destroy(&futex_sx);
+			mtx_destroy(&futex_mtx);
 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);

==== //depot/projects/vimage/src/sys/compat/linux/linux_futex.c#10 (text+ko) ====

@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/compat/linux/linux_futex.c,v 1.19 2009/04/19 13:48:42 dchagin Exp $");
+__FBSDID("$FreeBSD: src/sys/compat/linux/linux_futex.c,v 1.20 2009/05/01 15:36:02 dchagin Exp $");
 #if 0
 __KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.7 2006/07/24 19:01:49 manu Exp $");
 #endif
@@ -62,419 +62,284 @@
 #include <compat/linux/linux_futex.h>
 #include <compat/linux/linux_emul.h>
 
+MALLOC_DEFINE(M_FUTEX, "futex", "Linux futexes");
+MALLOC_DEFINE(M_FUTEX_WP, "futex wp", "Linux futexes wp");
+
 struct futex;
 
 struct waiting_proc {
-	struct thread *wp_t;
-	struct futex *wp_new_futex;
+	uint32_t	wp_flags;
+	struct futex	*wp_futex;
 	TAILQ_ENTRY(waiting_proc) wp_list;
 };
+
 struct futex {
-	void   *f_uaddr;
-	int	f_refcount;
+	struct sx	f_lck;
+	uint32_t	*f_uaddr;
+	uint32_t	f_refcount;
 	LIST_ENTRY(futex) f_list;
 	TAILQ_HEAD(lf_waiting_proc, waiting_proc) f_waiting_proc;
 };
 
 LIST_HEAD(futex_list, futex) futex_list;
-struct sx futex_sx;		/* this protects the LIST of futexes */
 
-#define FUTEX_LOCK sx_xlock(&futex_sx)
-#define FUTEX_UNLOCK sx_xunlock(&futex_sx)
+#define FUTEX_LOCK(f)		sx_xlock(&(f)->f_lck)
+#define FUTEX_UNLOCK(f)		sx_xunlock(&(f)->f_lck)
+#define FUTEX_INIT(f)		sx_init_flags(&(f)->f_lck, "ftlk", 0)
+#define FUTEX_DESTROY(f)	sx_destroy(&(f)->f_lck)
+#define FUTEX_ASSERT_LOCKED(f)	sx_assert(&(f)->f_lck, SA_XLOCKED)
 
-#define FUTEX_LOCKED	1
-#define FUTEX_UNLOCKED	0
+struct mtx futex_mtx;			/* protects the futex list */
+#define FUTEXES_LOCK		mtx_lock(&futex_mtx)
+#define FUTEXES_UNLOCK		mtx_unlock(&futex_mtx)
 
-#define FUTEX_SYSTEM_LOCK mtx_lock(&Giant)
-#define FUTEX_SYSTEM_UNLOCK mtx_unlock(&Giant)
+/* flags for futex_get() */
+#define FUTEX_CREATE_WP		0x1	/* create waiting_proc */
+#define FUTEX_DONTCREATE	0x2	/* don't create futex if not exists */
+#define FUTEX_DONTEXISTS	0x4	/* return EINVAL if futex exists */
 
-static struct futex	*futex_get(void *, int);
-static void futex_put(struct futex *);
-static int futex_sleep(struct futex *, struct thread *, unsigned long);
-static int futex_wake(struct futex *, int, struct futex *, int);
-static int futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr);
+/* wp_flags */
+#define FUTEX_WP_REQUEUED	0x1	/* wp requeued - wp moved from wp_list
+					 * of futex where thread sleep to wp_list
+					 * of another futex.
+					 */
+#define FUTEX_WP_REMOVED	0x2	/* wp is woken up and removed from futex
+					 * wp_list to prevent double wakeup.
+					 */
 
 /* support.s */
-int futex_xchgl(int oparg, caddr_t uaddr, int *oldval);
-int futex_addl(int oparg, caddr_t uaddr, int *oldval);
-int futex_orl(int oparg, caddr_t uaddr, int *oldval);
-int futex_andl(int oparg, caddr_t uaddr, int *oldval);
-int futex_xorl(int oparg, caddr_t uaddr, int *oldval);
+int futex_xchgl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_addl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_orl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_andl(int oparg, uint32_t *uaddr, int *oldval);
+int futex_xorl(int oparg, uint32_t *uaddr, int *oldval);
 
-int
-linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
+static void
+futex_put(struct futex *f, struct waiting_proc *wp)
 {
-	int val;
-	int ret;
-	struct l_timespec timeout = {0, 0};
-	int error = 0;
-	struct futex *f;
-	struct futex *newf;
-	int timeout_hz;
-	struct timeval tv = {0, 0};
-	struct futex *f2;
-	int op_ret;
-	struct linux_emuldata *em;
+
+	FUTEX_ASSERT_LOCKED(f);
+	if (wp != NULL) {
+		if ((wp->wp_flags & FUTEX_WP_REMOVED) == 0)
+			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+		free(wp, M_FUTEX_WP);
+	}
 
-#ifdef	DEBUG
-	if (ldebug(sys_futex))
-		printf(ARGS(futex, "%p, %i, %i, *, %p, %i"), args->uaddr, args->op,
-		    args->val, args->uaddr2, args->val3);
-#endif
+	FUTEXES_LOCK;
+	if (--f->f_refcount == 0) {
+		LIST_REMOVE(f, f_list);
+		FUTEXES_UNLOCK;
+		FUTEX_UNLOCK(f);
 
-	/* 
-	 * Our implementation provides only privates futexes. Most of the apps
-	 * should use private futexes but don't claim so. Therefore we treat
-	 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
-	 * in most cases (ie. when futexes are not shared on file descriptor
-	 * or between different processes.).
-	 */
-	args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG);
+		FUTEX_DESTROY(f);
+		free(f, M_FUTEX);
+		return;
+	}
 
-	switch (args->op) {
-	case LINUX_FUTEX_WAIT:
-		FUTEX_SYSTEM_LOCK;
+	FUTEXES_UNLOCK;
+	FUTEX_UNLOCK(f);
+}
 
-		if ((error = copyin(args->uaddr,
-		    &val, sizeof(val))) != 0) {
-			FUTEX_SYSTEM_UNLOCK;
-			return error;
-		}
+static int
+futex_get0(uint32_t *uaddr, struct futex **newf, uint32_t flags)
+{
+	struct futex *f, *tmpf;
 
-		if (val != args->val) {
-			FUTEX_SYSTEM_UNLOCK;
-			return EWOULDBLOCK;
-		}
+	*newf = tmpf = NULL;
 
-		if (args->timeout != NULL) {
-			if ((error = copyin(args->timeout,
-			    &timeout, sizeof(timeout))) != 0) {
-				FUTEX_SYSTEM_UNLOCK;
-				return error;
+retry:
+	FUTEXES_LOCK;
+	LIST_FOREACH(f, &futex_list, f_list) {
+		if (f->f_uaddr == uaddr) {
+			if (tmpf != NULL) {
+				FUTEX_UNLOCK(tmpf);
+				FUTEX_DESTROY(tmpf);
+				free(tmpf, M_FUTEX);
+			}
+			if (flags & FUTEX_DONTEXISTS) {
+				FUTEXES_UNLOCK;
+				return (EINVAL);
 			}
-		}
 
-#ifdef DEBUG
-		if (ldebug(sys_futex))
-			printf("FUTEX_WAIT %d: val = %d, uaddr = %p, "
-			    "*uaddr = %d, timeout = %d.%09lu\n",
-			    td->td_proc->p_pid, args->val,
-			    args->uaddr, val, timeout.tv_sec,
-			    (unsigned long)timeout.tv_nsec);
-#endif
-		tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000;
-		timeout_hz = tvtohz(&tv);
+			/*
+			 * Increment refcount of the found futex to
+			 * prevent it from deallocation before FUTEX_LOCK()
+			 */
+			++f->f_refcount;
+			FUTEXES_UNLOCK;
 
-		if (timeout.tv_sec == 0 && timeout.tv_nsec == 0)
-			timeout_hz = 0;
-		/*
-		 * If the user process requests a non null timeout,
-		 * make sure we do not turn it into an infinite
-		 * timeout because timeout_hz gets null.
-		 *
-		 * We use a minimal timeout of 1/hz. Maybe it would
-		 * make sense to just return ETIMEDOUT without sleeping.
-		 */
-		if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) &&
-		    (timeout_hz == 0))
-			timeout_hz = 1;
-
-
-		f = futex_get(args->uaddr, FUTEX_UNLOCKED);
-		ret = futex_sleep(f, td, timeout_hz);
-		futex_put(f);
-
-#ifdef DEBUG
-		if (ldebug(sys_futex))
-			printf("FUTEX_WAIT %d: uaddr = %p, "
-			    "ret = %d\n", td->td_proc->p_pid, args->uaddr, ret);
-#endif
-
-		FUTEX_SYSTEM_UNLOCK;
-		switch (ret) {
-		case EWOULDBLOCK:	/* timeout */
-			return ETIMEDOUT;
-			break;
-		case EINTR:		/* signal */
-			return EINTR;
-			break;
-		case 0:		/* FUTEX_WAKE received */
-#ifdef DEBUG
-			if (ldebug(sys_futex))
-				printf("FUTEX_WAIT %d: uaddr = %p, "
-				    "got FUTEX_WAKE\n",
-				    td->td_proc->p_pid, args->uaddr);
-#endif
-			return 0;
-			break;
-		default:
-#ifdef DEBUG
-			if (ldebug(sys_futex))
-				printf("FUTEX_WAIT: unexpected ret = %d\n",
-				    ret);
-#endif
-			break;
+			FUTEX_LOCK(f);
+			*newf = f;
+			return (0);
 		}
+	}
 
-		/* NOTREACHED */
-		break;
+	if (flags & FUTEX_DONTCREATE) {
+		FUTEXES_UNLOCK;
+		return (0);
+	}
 
-	case LINUX_FUTEX_WAKE:
-		FUTEX_SYSTEM_LOCK;
+	if (tmpf == NULL) {
+		FUTEXES_UNLOCK;
+		tmpf = malloc(sizeof(*tmpf), M_FUTEX, M_WAITOK | M_ZERO);
+		tmpf->f_uaddr = uaddr;
+		tmpf->f_refcount = 1;
+		FUTEX_INIT(tmpf);
+		TAILQ_INIT(&tmpf->f_waiting_proc);
 
 		/*
-		 * XXX: Linux is able to cope with different addresses
-		 * corresponding to the same mapped memory in the sleeping
-		 * and waker process(es).
+		 * Lock the new futex before an insert into the futex_list
+		 * to prevent futex usage by other.
 		 */
-#ifdef DEBUG
-		if (ldebug(sys_futex))
-			printf("FUTEX_WAKE %d: uaddr = %p, val = %d\n",
-			    td->td_proc->p_pid, args->uaddr, args->val);
-#endif
-		f = futex_get(args->uaddr, FUTEX_UNLOCKED);
-		td->td_retval[0] = futex_wake(f, args->val, NULL, 0);
-		futex_put(f);
+		FUTEX_LOCK(tmpf);
+		goto retry;
+	}
 
-		FUTEX_SYSTEM_UNLOCK;
-		break;
+	LIST_INSERT_HEAD(&futex_list, tmpf, f_list);
+	FUTEXES_UNLOCK;
 
-	case LINUX_FUTEX_CMP_REQUEUE:
-		FUTEX_SYSTEM_LOCK;
+	*newf = tmpf;
+	return (0);
+}
 
-		if ((error = copyin(args->uaddr,
-		    &val, sizeof(val))) != 0) {
-			FUTEX_SYSTEM_UNLOCK;
-			return error;
-		}
+static int
+futex_get(uint32_t *uaddr, struct waiting_proc **wp, struct futex **f,
+    uint32_t flags)
+{
+	int error;
 
-		if (val != args->val3) {
-			FUTEX_SYSTEM_UNLOCK;
-			return EAGAIN;
-		}
+	if (flags & FUTEX_CREATE_WP) {
+		*wp = malloc(sizeof(struct waiting_proc), M_FUTEX_WP, M_WAITOK);
+		(*wp)->wp_flags = 0;
+	}
+	error = futex_get0(uaddr, f, flags);
+	if (error) {
+		if (flags & FUTEX_CREATE_WP)
+			free(*wp, M_FUTEX_WP);
+		return (error);
+	}
+	if (flags & FUTEX_CREATE_WP) {
+		TAILQ_INSERT_HEAD(&(*f)->f_waiting_proc, *wp, wp_list);
+		(*wp)->wp_futex = *f;
+	}
 
-		f = futex_get(args->uaddr, FUTEX_UNLOCKED);
-		newf = futex_get(args->uaddr2, FUTEX_UNLOCKED);
-		td->td_retval[0] = futex_wake(f, args->val, newf,
-		    (int)(unsigned long)args->timeout);
-		futex_put(f);
-		futex_put(newf);
-
-		FUTEX_SYSTEM_UNLOCK;
-		break;
-
-	case LINUX_FUTEX_WAKE_OP:
-		FUTEX_SYSTEM_LOCK;
-#ifdef DEBUG
-		if (ldebug(sys_futex))
-			printf("FUTEX_WAKE_OP: %d: uaddr = %p, op = %d, "
-			    "val = %x, uaddr2 = %p, val3 = %x\n",
-			    td->td_proc->p_pid, args->uaddr, args->op,
-			    args->val, args->uaddr2, args->val3);
-#endif
-		f = futex_get(args->uaddr, FUTEX_UNLOCKED);
-		f2 = futex_get(args->uaddr2, FUTEX_UNLOCKED);
-
-		/*
-		 * This function returns positive number as results and
-		 * negative as errors
-		 */
-		op_ret = futex_atomic_op(td, args->val3, args->uaddr2);
-#ifdef DEBUG
-		if (ldebug(sys_futex))
-			printf("futex_atomic_op ret %d\n", op_ret);
-#endif
-		if (op_ret < 0) {
-			/* XXX: We don't handle the EFAULT yet. */
-			if (op_ret != -EFAULT) {
-				futex_put(f);
-				futex_put(f2);
-				FUTEX_SYSTEM_UNLOCK;
-				return (-op_ret);
-			}
-
-			futex_put(f);
-			futex_put(f2);
-
-			FUTEX_SYSTEM_UNLOCK;
-			return (EFAULT);
-		}
-
-		ret = futex_wake(f, args->val, NULL, 0);
-		futex_put(f);
-		if (op_ret > 0) {
-			op_ret = 0;
-			/*
-			 * Linux abuses the address of the timespec parameter
-			 * as the number of retries.
-			 */
-			op_ret += futex_wake(f2,
-			    (int)(unsigned long)args->timeout, NULL, 0);
-			ret += op_ret;
-		}
-		futex_put(f2);
-		td->td_retval[0] = ret;
-
-		FUTEX_SYSTEM_UNLOCK;
-		break;
-
-	case LINUX_FUTEX_LOCK_PI:
-		/* not yet implemented */
-		return (ENOSYS);
-
-	case LINUX_FUTEX_UNLOCK_PI:
-		/* not yet implemented */
-		return (ENOSYS);
-
-	case LINUX_FUTEX_TRYLOCK_PI:
-		/* not yet implemented */
-		return (ENOSYS);
-
-	case LINUX_FUTEX_REQUEUE:
-
-		/*
-		 * Glibc does not use this operation since Jun 2004 (2.3.3),
-		 * as it is racy and replaced by FUTEX_CMP_REQUEUE operation.
-		 * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when
-		 * FUTEX_REQUEUE returned EINVAL.
-		 */
-		em = em_find(td->td_proc, EMUL_DONTLOCK);
-		if (em->used_requeue == 0) {
-			printf("linux(%s (%d)) sys_futex: "
-			    "unsupported futex_requeue op\n",
-			    td->td_proc->p_comm, td->td_proc->p_pid);
-			em->used_requeue = 1;
-		}
-		return (EINVAL);
-
-	default:
-		printf("linux_sys_futex: unknown op %d\n",
-		    args->op);
-		return (ENOSYS);
-	}
-	return (0);
+	return (error);
 }
 
-static struct futex *
-futex_get(void *uaddr, int locked)
+static int
+futex_sleep(struct futex *f, struct waiting_proc *wp, unsigned long timeout)
 {
-	struct futex *f;
+	int error;
 
-	if (locked == FUTEX_UNLOCKED)
-		FUTEX_LOCK;
-	LIST_FOREACH(f, &futex_list, f_list) {
-		if (f->f_uaddr == uaddr) {
-			f->f_refcount++;
-			if (locked == FUTEX_UNLOCKED)
-				FUTEX_UNLOCK;
-			return f;
-		}
+	FUTEX_ASSERT_LOCKED(f);
+	error = sx_sleep(wp, &f->f_lck, PCATCH, "futex", timeout);
+	if (wp->wp_flags & FUTEX_WP_REQUEUED) {
+		KASSERT(f != wp->wp_futex, ("futex != wp_futex"));
+		futex_put(f, NULL);
+		f = wp->wp_futex;
+		FUTEX_LOCK(f);
 	}
 
-	f = malloc(sizeof(*f), M_LINUX, M_WAITOK);
-	f->f_uaddr = uaddr;
-	f->f_refcount = 1;
-	TAILQ_INIT(&f->f_waiting_proc);
-	LIST_INSERT_HEAD(&futex_list, f, f_list);
-	if (locked == FUTEX_UNLOCKED)
-		FUTEX_UNLOCK;
-
-	return f;
+	futex_put(f, wp);
+	return (error);
 }
 
-static void
-futex_put(f)
-	struct futex *f;
+static int
+futex_wake(struct futex *f, int n)
 {
-	FUTEX_LOCK;
-	f->f_refcount--;
-	if (f->f_refcount == 0) {
-		LIST_REMOVE(f, f_list);
-		free(f, M_LINUX);
+	struct waiting_proc *wp, *wpt;
+	int count = 0;
+
+	FUTEX_ASSERT_LOCKED(f);
+	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
+		wp->wp_flags |= FUTEX_WP_REMOVED;
+		TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+		wakeup_one(wp);
+		if (++count == n)
+			break;
 	}
-	FUTEX_UNLOCK;
 
-	return;
+	return (count);
 }
 
 static int
-futex_sleep(struct futex *f, struct thread *td, unsigned long timeout)
+futex_requeue(struct futex *f, int n, struct futex *f2, int n2)
 {
-	struct waiting_proc *wp;
-	int ret;
+	struct waiting_proc *wp, *wpt;
+	int count = 0;
 
-	wp = malloc(sizeof(*wp), M_LINUX, M_WAITOK);
-	wp->wp_t = td;
-	wp->wp_new_futex = NULL;
-	FUTEX_LOCK;
-	TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list);
-	FUTEX_UNLOCK;
+	FUTEX_ASSERT_LOCKED(f);
+	FUTEX_ASSERT_LOCKED(f2);
 
-#ifdef DEBUG
-	if (ldebug(sys_futex))
-		printf("FUTEX --> %d tlseep timeout = %ld\n",
-		    td->td_proc->p_pid, timeout);
-#endif
-	ret = tsleep(wp, PCATCH | PZERO, "linuxfutex", timeout);
-#ifdef DEBUG
-	if (ldebug(sys_futex))
-		printf("FUTEX -> %d tsleep returns %d\n",
-		    td->td_proc->p_pid, ret);
-#endif
+	TAILQ_FOREACH_SAFE(wp, &f->f_waiting_proc, wp_list, wpt) {
+		if (++count <= n) {
+			wp->wp_flags |= FUTEX_WP_REMOVED;
+			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+			wakeup_one(wp);
+		} else {
+			wp->wp_flags |= FUTEX_WP_REQUEUED;
+			/* Move wp to wp_list of f2 futex */
+			TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
+			TAILQ_INSERT_HEAD(&f2->f_waiting_proc, wp, wp_list);
 
-	FUTEX_LOCK;
-	TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list);
-	FUTEX_UNLOCK;
-
-	/* if we got woken up in futex_wake */
-	if ((ret == 0) && (wp->wp_new_futex != NULL)) {
-		/* suspend us on the new futex */
-		ret = futex_sleep(wp->wp_new_futex, td, timeout);
-		/* and release the old one */
-		futex_put(wp->wp_new_futex);
+			/*
+			 * Thread which sleeps on wp after waking should
+			 * acquire f2 lock, so increment refcount of f2 to
+			 * prevent it from premature deallocation.
+			 */
+			wp->wp_futex = f2;
+			FUTEXES_LOCK;
+			++f2->f_refcount;
+			FUTEXES_UNLOCK;
+			if (count - n >= n2)
+				break;
+		}
 	}
 
-	free(wp, M_LINUX);
-
-	return ret;
+	return (count);
 }
 
 static int
-futex_wake(struct futex *f, int n, struct futex *newf, int n2)
+futex_wait(struct futex *f, struct waiting_proc *wp, struct l_timespec *ts)
 {
-	struct waiting_proc *wp;
-	int count;
+	struct l_timespec timeout = {0, 0};
+	struct timeval tv = {0, 0};
+	int timeout_hz;
+	int error;
+
+	if (ts != NULL) {
+		error = copyin(ts, &timeout, sizeof(timeout));
+		if (error)
+			return (error);
+	}
+
+	tv.tv_usec = timeout.tv_sec * 1000000 + timeout.tv_nsec / 1000;
+	timeout_hz = tvtohz(&tv);
+
+	if (timeout.tv_sec == 0 && timeout.tv_nsec == 0)
+		timeout_hz = 0;
 
 	/*
-	 * Linux is very strange it wakes up N threads for
-	 * all operations BUT requeue ones where its N+1
-	 * mimic this.
+	 * If the user process requests a non null timeout,
+	 * make sure we do not turn it into an infinite
+	 * timeout because timeout_hz gets null.
+	 *
+	 * We use a minimal timeout of 1/hz. Maybe it would
+	 * make sense to just return ETIMEDOUT without sleeping.
 	 */
-	count = newf ? 0 : 1;
+	if (((timeout.tv_sec != 0) || (timeout.tv_nsec != 0)) &&
+	    (timeout_hz == 0))
+		timeout_hz = 1;
 
-	FUTEX_LOCK;
-	TAILQ_FOREACH(wp, &f->f_waiting_proc, wp_list) {
-		if (count <= n) {
-			wakeup_one(wp);
-			count++;
-		} else {
-			if (newf != NULL) {
-				/* futex_put called after tsleep */
-				wp->wp_new_futex = futex_get(newf->f_uaddr,
-				    FUTEX_LOCKED);
-				wakeup_one(wp);
-				if (count - n >= n2)
-					break;
-			}
-		}
-	}
-	FUTEX_UNLOCK;
+	error = futex_sleep(f, wp, timeout_hz);
+	if (error == EWOULDBLOCK)
+		error = ETIMEDOUT;
 
-	return count;
+	return (error);
 }
 
 static int
-futex_atomic_op(struct thread *td, int encoded_op, caddr_t uaddr)
+futex_atomic_op(struct thread *td, int encoded_op, uint32_t *uaddr)
 {
 	int op = (encoded_op >> 28) & 7;
 	int cmp = (encoded_op >> 24) & 15;
@@ -536,14 +401,237 @@
 }
 
 int
+linux_sys_futex(struct thread *td, struct linux_sys_futex_args *args)
+{
+	int op_ret, val, ret, nrwake;
+	struct linux_emuldata *em;
+	struct waiting_proc *wp;
+	struct futex *f, *f2;
+	int error = 0;
+
+	/*
+	 * Our implementation provides only privates futexes. Most of the apps
+	 * should use private futexes but don't claim so. Therefore we treat
+	 * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works
+	 * in most cases (ie. when futexes are not shared on file descriptor
+	 * or between different processes.).
+	 */
+	args->op = (args->op & ~LINUX_FUTEX_PRIVATE_FLAG);
+
+	switch (args->op) {
+	case LINUX_FUTEX_WAIT:
+
+#ifdef DEBUG
+		if (ldebug(sys_futex))
+			printf(ARGS(sys_futex, "futex_wait val %d uaddr %p"),
+			    args->val, args->uaddr);
+#endif
+		error = futex_get(args->uaddr, &wp, &f, FUTEX_CREATE_WP);
+		if (error)
+			return (error);
+		error = copyin(args->uaddr, &val, sizeof(val));
+		if (error) {
+			futex_put(f, wp);
+			return (error);
+		}

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200905020844.n428iVBU010553>