Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 24 May 2015 14:53:17 +0000 (UTC)
From:      Dmitry Chagin <dchagin@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r283383 - in head/sys: amd64/linux32 compat/linux i386/linux
Message-ID:  <201505241453.t4OErHG2034385@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: dchagin
Date: Sun May 24 14:53:16 2015
New Revision: 283383
URL: https://svnweb.freebsd.org/changeset/base/283383

Log:
  Switch linuxulator to use the native 1:1 threads.
  
  The reasons:
  1. Get rid of the stubs/quirks with process dethreading,
     process reparent when the process group leader exits and close
     to this problems on wait(), waitpid(), etc.
  2. Reuse our kernel code instead of writing excessive thread
     managment routines in Linuxulator.
  
  Implementation details:
  
  1. The thread is created via kern_thr_new() in the clone() call with
     the CLONE_THREAD parameter. Thus, everything else is a process.
  2. The test that the process has a threads is done via P_HADTHREADS
     bit p_flag of struct proc.
  3. Per thread emulator state data structure is now located in the
     struct thread and freed in the thread_dtor() hook.
     Mandatory holdig of the p_mtx required when referencing emuldata
     from the other threads.
  4. PID mangling has changed. Now Linux pid is the native tid
     and Linux tgid is the native pid, with the exception of the first
     thread in the process where tid and pid are one and the same.
  
  Ugliness:
  
     In case when the Linux thread is the initial thread in the thread
     group thread id is equal to the process id. Glibc depends on this
     magic (assert in pthread_getattr_np.c). So for system calls that
     take thread id as a parameter we should use the special method
     to reference struct thread.
  
  Differential Revision:	https://reviews.freebsd.org/D1039

Modified:
  head/sys/amd64/linux32/linux32_machdep.c
  head/sys/amd64/linux32/linux32_sysvec.c
  head/sys/compat/linux/check_error.d
  head/sys/compat/linux/check_internal_locks.d
  head/sys/compat/linux/linux_emul.c
  head/sys/compat/linux/linux_emul.h
  head/sys/compat/linux/linux_fork.c
  head/sys/compat/linux/linux_futex.c
  head/sys/compat/linux/linux_futex.h
  head/sys/compat/linux/linux_misc.c
  head/sys/compat/linux/linux_misc.h
  head/sys/compat/linux/linux_signal.c
  head/sys/compat/linux/stats_timing.d
  head/sys/i386/linux/linux_machdep.c
  head/sys/i386/linux/linux_sysvec.c

Modified: head/sys/amd64/linux32/linux32_machdep.c
==============================================================================
--- head/sys/amd64/linux32/linux32_machdep.c	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/amd64/linux32/linux32_machdep.c	Sun May 24 14:53:16 2015	(r283383)
@@ -157,15 +157,8 @@ linux_execve(struct thread *td, struct l
 	free(path, M_TEMP);
 	if (error == 0)
 		error = kern_execve(td, &eargs, NULL);
-	if (error == 0) {
-		/* Linux process can execute FreeBSD one, do not attempt
-		 * to create emuldata for such process using
-		 * linux_proc_init, this leads to a panic on KASSERT
-		 * because such process has p->p_emuldata == NULL.
-		 */
-		if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX)
-			error = linux_proc_init(td, 0, 0);
-	}
+	if (error == 0)
+		error = linux_common_execve(td, &eargs);
 	post_execve(td, error, oldvmspace);
 	return (error);
 }
@@ -464,8 +457,14 @@ int
 linux_set_upcall_kse(struct thread *td, register_t stack)
 {
 
-	td->td_frame->tf_rsp = stack;
+	if (stack)
+		td->td_frame->tf_rsp = stack;
 
+	/*
+	 * The newly created Linux thread returns
+	 * to the user space by the same path that a parent do.
+	 */
+	td->td_frame->tf_rax = 0;
 	return (0);
 }
 

Modified: head/sys/amd64/linux32/linux32_sysvec.c
==============================================================================
--- head/sys/amd64/linux32/linux32_sysvec.c	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/amd64/linux32/linux32_sysvec.c	Sun May 24 14:53:16 2015	(r283383)
@@ -130,6 +130,7 @@ static boolean_t linux32_trans_osrel(con
 
 static eventhandler_tag linux_exit_tag;
 static eventhandler_tag linux_exec_tag;
+static eventhandler_tag linux_thread_dtor_tag;
 
 /*
  * Linux syscalls return negative errno's, we do positive and map them
@@ -1037,6 +1038,7 @@ struct sysentvec elf_linux_sysvec = {
 	.sv_shared_page_base = LINUX32_SHAREDPAGE,
 	.sv_shared_page_len = PAGE_SIZE,
 	.sv_schedtail	= linux_schedtail,
+	.sv_thread_detach = linux_thread_detach,
 };
 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
 
@@ -1125,14 +1127,14 @@ linux_elf_modevent(module_t mod, int typ
 				linux_ioctl_register_handler(*lihp);
 			SET_FOREACH(ldhp, linux_device_handler_set)
 				linux_device_register_handler(*ldhp);
-			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
-			sx_init(&emul_shared_lock, "emuldata->shared lock");
 			LIST_INIT(&futex_list);
 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
 			    linux_proc_exit, NULL, 1000);
 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
 			    linux_proc_exec, NULL, 1000);
+			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
+			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
 			linux_szplatform = roundup(strlen(linux_platform) + 1,
 			    sizeof(char *));
 			linux_osd_jail_register();
@@ -1158,11 +1160,10 @@ linux_elf_modevent(module_t mod, int typ
 				linux_ioctl_unregister_handler(*lihp);
 			SET_FOREACH(ldhp, linux_device_handler_set)
 				linux_device_unregister_handler(*ldhp);
-			mtx_destroy(&emul_lock);
-			sx_destroy(&emul_shared_lock);
 			mtx_destroy(&futex_mtx);
 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
+			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
 			linux_osd_jail_deregister();
 			if (bootverbose)
 				printf("Linux ELF exec handler removed\n");

Modified: head/sys/compat/linux/check_error.d
==============================================================================
--- head/sys/compat/linux/check_error.d	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/compat/linux/check_error.d	Sun May 24 14:53:16 2015	(r283383)
@@ -36,8 +36,8 @@
  */
 
 linuxulator*:dummy::not_implemented,
-linuxulator*:emul:proc_exit:child_clear_tid_error,
-linuxulator*:emul:proc_exit:futex_failed,
+linuxulator*:emul:linux_thread_detach:child_clear_tid_error,
+linuxulator*:emul:linux_thread_detach:futex_failed,
 linuxulator*:emul:linux_schedtail:copyout_error,
 linuxulator*:futex:futex_get:error,
 linuxulator*:futex:futex_sleep:requeue_error,

Modified: head/sys/compat/linux/check_internal_locks.d
==============================================================================
--- head/sys/compat/linux/check_internal_locks.d	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/compat/linux/check_internal_locks.d	Sun May 24 14:53:16 2015	(r283383)
@@ -41,14 +41,9 @@
 
 BEGIN
 {
-	check["emul_lock"] = 0;
-	check["emul_shared_rlock"] = 0;
-	check["emul_shared_wlock"] = 0;
 	check["futex_mtx"] = 0;
 }
 
-linuxulator*:locks:emul_lock:locked,
-linuxulator*:locks:emul_shared_wlock:locked,
 linuxulator*:locks:futex_mtx:locked
 /check[probefunc] > 0/
 {
@@ -57,9 +52,6 @@ linuxulator*:locks:futex_mtx:locked
 	stack();
 }
 
-linuxulator*:locks:emul_lock:locked,
-linuxulator*:locks:emul_shared_rlock:locked,
-linuxulator*:locks:emul_shared_wlock:locked,
 linuxulator*:locks:futex_mtx:locked
 {
 	++check[probefunc];
@@ -69,9 +61,6 @@ linuxulator*:locks:futex_mtx:locked
 	spec[probefunc] = speculation();
 }
 
-linuxulator*:locks:emul_lock:unlock,
-linuxulator*:locks:emul_shared_rlock:unlock,
-linuxulator*:locks:emul_shared_wlock:unlock,
 linuxulator*:locks:futex_mtx:unlock
 /check[probefunc] == 0/
 {
@@ -82,9 +71,6 @@ linuxulator*:locks:futex_mtx:unlock
 	stack();
 }
 
-linuxulator*:locks:emul_lock:unlock,
-linuxulator*:locks:emul_shared_rlock:unlock,
-linuxulator*:locks:emul_shared_wlock:unlock,
 linuxulator*:locks:futex_mtx:unlock
 {
 	discard(spec[probefunc]);
@@ -95,27 +81,6 @@ linuxulator*:locks:futex_mtx:unlock
 /* Timeout handling */
 
 tick-10s
-/spec["emul_lock"] != 0 && timestamp - ts["emul_lock"] >= 9999999000/
-{
-	commit(spec["emul_lock"]);
-	spec["emul_lock"] = 0;
-}
-
-tick-10s
-/spec["emul_shared_wlock"] != 0 && timestamp - ts["emul_shared_wlock"] >= 9999999000/
-{
-	commit(spec["emul_shared_wlock"]);
-	spec["emul_shared_wlock"] = 0;
-}
-
-tick-10s
-/spec["emul_shared_rlock"] != 0 && timestamp - ts["emul_shared_rlock"] >= 9999999000/
-{
-	commit(spec["emul_shared_rlock"]);
-	spec["emul_shared_rlock"] = 0;
-}
-
-tick-10s
 /spec["futex_mtx"] != 0 && timestamp - ts["futex_mtx"] >= 9999999000/
 {
 	commit(spec["futex_mtx"]);

Modified: head/sys/compat/linux/linux_emul.c
==============================================================================
--- head/sys/compat/linux/linux_emul.c	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/compat/linux/linux_emul.c	Sun May 24 14:53:16 2015	(r283383)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2006 Roman Divacky
+ * Copyright (c) 2013 Dmitry Chagin
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -35,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
+#include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
@@ -58,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_futex.h>
 #include <compat/linux/linux_misc.h>
+#include <compat/linux/linux_util.h>
 
 /**
  * Special DTrace provider for the linuxulator.
@@ -73,33 +76,21 @@ __FBSDID("$FreeBSD$");
 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
 
 /**
- * Special DTrace module "locks", it covers some linuxulator internal
- * locks.
- */
-LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *");
-LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *");
-
-/**
  * DTrace probes in this module.
  */
-LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int");
+LIN_SDT_PROBE_DEFINE1(emul, em_find, entry, "struct thread *");
 LIN_SDT_PROBE_DEFINE0(emul, em_find, return);
-LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t",
-    "int");
+LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *",
+    "struct thread *", "int");
 LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread);
 LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork);
 LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec);
 LIN_SDT_PROBE_DEFINE0(emul, proc_init, return);
 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *");
-LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed);
-LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t",
-    "struct proc *");
-LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int");
-LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return);
+LIN_SDT_PROBE_DEFINE1(emul, linux_thread_detach, entry, "struct thread *");
+LIN_SDT_PROBE_DEFINE0(emul, linux_thread_detach, futex_failed);
+LIN_SDT_PROBE_DEFINE1(emul, linux_thread_detach, child_clear_tid_error, "int");
+LIN_SDT_PROBE_DEFINE0(emul, linux_thread_detach, return);
 LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *",
     "struct image_params *");
 LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return);
@@ -108,284 +99,208 @@ LIN_SDT_PROBE_DEFINE1(emul, linux_schedt
 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return);
 LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *");
 LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return);
-LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *",
-    "int");
-LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t");
-LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return);
-
-struct sx	emul_shared_lock;
-struct mtx	emul_lock;
 
-/* this returns locked reference to the emuldata entry (if found) */
+/*
+ * This returns reference to the emuldata entry (if found)
+ *
+ * Hold PROC_LOCK when referencing emuldata from other threads.
+ */
 struct linux_emuldata *
-em_find(struct proc *p, int locked)
+em_find(struct thread *td)
 {
 	struct linux_emuldata *em;
 
-	LIN_SDT_PROBE2(emul, em_find, entry, p, locked);
-
-	if (locked == EMUL_DOLOCK)
-		EMUL_LOCK(&emul_lock);
+	LIN_SDT_PROBE1(emul, em_find, entry, td);
 
-	em = p->p_emuldata;
-
-	if (em == NULL && locked == EMUL_DOLOCK)
-		EMUL_UNLOCK(&emul_lock);
+	em = td->td_emuldata;
 
 	LIN_SDT_PROBE1(emul, em_find, return, em);
+
 	return (em);
 }
 
-int
-linux_proc_init(struct thread *td, pid_t child, int flags)
+void
+linux_proc_init(struct thread *td, struct thread *newtd, int flags)
 {
-	struct linux_emuldata *em, *p_em;
-	struct proc *p;
+	struct linux_emuldata *em;
 
-	LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags);
+	LIN_SDT_PROBE3(emul, proc_init, entry, td, newtd, flags);
 
-	if (child != 0) {
-		/* fork or create a thread */
-		em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
-		em->pid = child;
+	if (newtd != NULL) {
+		/* non-exec call */
+		em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO);
 		em->pdeath_signal = 0;
 		em->flags = 0;
 		em->robust_futexes = NULL;
 		if (flags & LINUX_CLONE_THREAD) {
-			/* handled later in the code */
 			LIN_SDT_PROBE0(emul, proc_init, create_thread);
-		} else {
-			struct linux_emuldata_shared *s;
 
+			em->em_tid = newtd->td_tid;
+		} else {
 			LIN_SDT_PROBE0(emul, proc_init, fork);
 
-			s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
-			s->refs = 1;
-			s->group_pid = child;
-
-			LIST_INIT(&s->threads);
-			em->shared = s;
+			em->em_tid = newtd->td_proc->p_pid;
 		}
+		newtd->td_emuldata = em;
 	} else {
 		/* exec */
 		LIN_SDT_PROBE0(emul, proc_init, exec);
 
 		/* lookup the old one */
-		em = em_find(td->td_proc, EMUL_DOLOCK);
+		em = em_find(td);
 		KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
+
+		em->em_tid = td->td_proc->p_pid;
 	}
 
 	em->child_clear_tid = NULL;
 	em->child_set_tid = NULL;
 
+	LIN_SDT_PROBE0(emul, proc_init, return);
+}
+
+void 
+linux_proc_exit(void *arg __unused, struct proc *p)
+{
+	struct thread *td = curthread;
+
+	if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX)) {
+		LIN_SDT_PROBE1(emul, proc_exit, entry, p);
+		(p->p_sysent->sv_thread_detach)(td);
+	}
+}
+
+int 
+linux_common_execve(struct thread *td, struct image_args *eargs)
+{
+	struct linux_emuldata *em;
+	struct proc *p;
+	int error;
+
+	p = td->td_proc;
+
 	/*
-	 * allocate the shared struct only in clone()/fork cases in the case
-	 * of clone() td = calling proc and child = pid of the newly created
-	 * proc
+	 * Unlike FreeBSD abort all other threads before
+	 * proceeding exec.
 	 */
-	if (child != 0) {
-		if (flags & LINUX_CLONE_THREAD) {
-			/* lookup the parent */
-			/* 
-			 * we dont have to lock the p_em because
-			 * its waiting for us in linux_clone so
-			 * there is no chance of it changing the
-			 * p_em->shared address
-			 */
-			p_em = em_find(td->td_proc, EMUL_DONTLOCK);
-			KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
-			em->shared = p_em->shared;
-			EMUL_SHARED_WLOCK(&emul_shared_lock);
-			em->shared->refs++;
-			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
-		} else {
-			/*
-			 * handled earlier to avoid malloc(M_WAITOK) with
-			 * rwlock held
-			 */
-		}
+	PROC_LOCK(p);
+	/* See exit1() comments. */
+	thread_suspend_check(0);
+	while (p->p_flag & P_HADTHREADS) {
+		if (!thread_single(p, SINGLE_EXIT))
+			break;
+		thread_suspend_check(0);
+	}
+	PROC_UNLOCK(p);
 
-		EMUL_SHARED_WLOCK(&emul_shared_lock);
-		LIST_INSERT_HEAD(&em->shared->threads, em, threads);
-		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
-
-		p = pfind(child);
-		KASSERT(p != NULL, ("process not found in proc_init\n"));
-		p->p_emuldata = em;
+	error = kern_execve(td, eargs, NULL);
+	if (error != 0)
+		return (error);
+
+	/*
+	 * In a case of transition from Linux binary execing to
+	 * FreeBSD binary we destroy linux emuldata thread entry.
+	 */
+	if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
+		PROC_LOCK(p);
+		em = em_find(td);
+		KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
+		td->td_emuldata = NULL;
 		PROC_UNLOCK(p);
-	} else
-		EMUL_UNLOCK(&emul_lock);
 
-	LIN_SDT_PROBE0(emul, proc_init, return);
+		free(em, M_TEMP);
+	}
 	return (0);
 }
 
-void
-linux_proc_exit(void *arg __unused, struct proc *p)
+void 
+linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
 {
-	struct linux_emuldata *em;
-	int error, shared_flags, shared_xstat;
-	struct thread *td = FIRST_THREAD_IN_PROC(p);
-	int *child_clear_tid;
-	struct proc *q, *nq;
-
-	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
-		return;
+	struct thread *td = curthread;
 
-	LIN_SDT_PROBE1(emul, proc_exit, entry, p);
+	/*
+	 * In a case of execing to linux binary we create linux
+	 * emuldata thread entry.
+	 */
+	if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) ==
+	    SV_ABI_LINUX)) {
+		LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp);
+		if (SV_PROC_ABI(p) == SV_ABI_LINUX)
+			linux_proc_init(td, NULL, 0);
+		else
+			linux_proc_init(td, td, 0);
 
-	release_futexes(p);
+		LIN_SDT_PROBE0(emul, proc_exec, return);
+	}
+}
 
-	/* find the emuldata */
-	em = em_find(p, EMUL_DOLOCK);
+void
+linux_thread_detach(struct thread *td)
+{
+	struct linux_sys_futex_args cup;
+	struct linux_emuldata *em;
+	int *child_clear_tid;
+	int null = 0;
+	int error;
 
-	KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
+	LIN_SDT_PROBE1(emul, linux_thread_detach, entry, td);
 
-	/* reparent all procs that are not a thread leader to initproc */
-	if (em->shared->group_pid != p->p_pid) {
-		LIN_SDT_PROBE3(emul, proc_exit, reparent,
-		    em->shared->group_pid, p->p_pid, p);
+	em = em_find(td);
+	KASSERT(em != NULL, ("thread_detach: emuldata not found.\n"));
 
-		child_clear_tid = em->child_clear_tid;
-		EMUL_UNLOCK(&emul_lock);
-		sx_xlock(&proctree_lock);
-		wakeup(initproc);
-		PROC_LOCK(p);
-		proc_reparent(p, initproc);
-		p->p_sigparent = SIGCHLD;
-		PROC_UNLOCK(p);
-		sx_xunlock(&proctree_lock);
-	} else {
-		child_clear_tid = em->child_clear_tid;
-		EMUL_UNLOCK(&emul_lock);	
-	}
+	LINUX_CTR1(exit, "thread detach(%d)", em->em_tid);
 
-	EMUL_SHARED_WLOCK(&emul_shared_lock);
-	shared_flags = em->shared->flags;
-	shared_xstat = em->shared->xstat;
-	LIST_REMOVE(em, threads);
-
-	em->shared->refs--;
-	if (em->shared->refs == 0) {
-		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
-		free(em->shared, M_LINUX);
-	} else	
-		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
+	release_futexes(td, em);
 
-	if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0)
-		p->p_xstat = shared_xstat;
+	child_clear_tid = em->child_clear_tid;
 
 	if (child_clear_tid != NULL) {
-		struct linux_sys_futex_args cup;
-		int null = 0;
 
+		LINUX_CTR2(exit, "thread detach(%d) %p",
+		    em->em_tid, child_clear_tid);
+	
 		error = copyout(&null, child_clear_tid, sizeof(null));
 		if (error) {
-			LIN_SDT_PROBE1(emul, proc_exit,
+			LIN_SDT_PROBE1(emul, linux_thread_detach,
 			    child_clear_tid_error, error);
 
-			free(em, M_LINUX);
-
-			LIN_SDT_PROBE0(emul, proc_exit, return);
+			LIN_SDT_PROBE0(emul, linux_thread_detach, return);
 			return;
 		}
 
-		/* futexes stuff */
 		cup.uaddr = child_clear_tid;
 		cup.op = LINUX_FUTEX_WAKE;
 		cup.val = 0x7fffffff;	/* Awake everyone */
 		cup.timeout = NULL;
 		cup.uaddr2 = NULL;
 		cup.val3 = 0;
-		error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
+		error = linux_sys_futex(td, &cup);
 		/*
 		 * this cannot happen at the moment and if this happens it
 		 * probably means there is a user space bug
 		 */
 		if (error) {
-			LIN_SDT_PROBE0(emul, proc_exit, futex_failed);
-			printf(LMSG("futex stuff in proc_exit failed.\n"));
-		}
-	}
-
-	/* clean the stuff up */
-	free(em, M_LINUX);
-
-	/* this is a little weird but rewritten from exit1() */
-	sx_xlock(&proctree_lock);
-	q = LIST_FIRST(&p->p_children);
-	for (; q != NULL; q = nq) {
-		nq = LIST_NEXT(q, p_sibling);
-		if (q->p_flag & P_WEXIT)
-			continue;
-		if (__predict_false(q->p_sysent != &elf_linux_sysvec))
-			continue;
-		em = em_find(q, EMUL_DOLOCK);
-		KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
-		PROC_LOCK(q);
-		if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) {
-			kern_psignal(q, em->pdeath_signal);
+			LIN_SDT_PROBE0(emul, linux_thread_detach, futex_failed);
+			printf(LMSG("futex stuff in thread_detach failed.\n"));
 		}
-		PROC_UNLOCK(q);
-		EMUL_UNLOCK(&emul_lock);
 	}
-	sx_xunlock(&proctree_lock);
 
-	LIN_SDT_PROBE0(emul, proc_exit, return);
+	LIN_SDT_PROBE0(emul, linux_thread_detach, return);
 }
 
-/*
- * This is used in a case of transition from FreeBSD binary execing to linux binary
- * in this case we create linux emuldata proc entry with the pid of the currently running
- * process.
- */
-void 
-linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
+void
+linux_thread_dtor(void *arg __unused, struct thread *td)
 {
-	if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
-		LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp);
-	}
-	if (__predict_false(imgp->sysent == &elf_linux_sysvec
-	    && p->p_sysent != &elf_linux_sysvec))
-		linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
-	if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) ==
-	    SV_ABI_LINUX))
-		/* Kill threads regardless of imgp->sysent value */
-		linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL);
-	if (__predict_false(imgp->sysent != &elf_linux_sysvec
-	    && p->p_sysent == &elf_linux_sysvec)) {
-		struct linux_emuldata *em;
-
-		/* 
-		 * XXX:There's a race because here we assign p->p_emuldata NULL
-		 * but the process is still counted as linux one for a short
- 		 * time so some other process might reference it and try to
- 		 * access its p->p_emuldata and panicing on a NULL reference.
-		 */
-		em = em_find(p, EMUL_DONTLOCK);
-
-		KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
-
-		EMUL_SHARED_WLOCK(&emul_shared_lock);
-		LIST_REMOVE(em, threads);
-
-		PROC_LOCK(p);
-		p->p_emuldata = NULL;
-		PROC_UNLOCK(p);
+	struct linux_emuldata *em;
 
-		em->shared->refs--;
-		if (em->shared->refs == 0) {
-			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
-			free(em->shared, M_LINUX);
-		} else
-			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
+	em = em_find(td);
+	if (em == NULL)
+		return;
+	td->td_emuldata = NULL;
 
-		free(em, M_LINUX);
-	}
+	LINUX_CTR1(exit, "thread dtor(%d)", em->em_tid);
 
-	if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
-		LIN_SDT_PROBE0(emul, proc_exec, return);
-	}
+	free(em, M_TEMP);
 }
 
 void
@@ -396,30 +311,28 @@ linux_schedtail(struct thread *td)
 	int error = 0;
 	int *child_set_tid;
 
-	p = td->td_proc;
-
-	LIN_SDT_PROBE1(emul, linux_schedtail, entry, p);
+	LIN_SDT_PROBE1(emul, linux_schedtail, entry, td);
 
-	/* find the emuldata */
-	em = em_find(p, EMUL_DOLOCK);
+	p = td->td_proc;
 
+	em = em_find(td);
 	KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
 	child_set_tid = em->child_set_tid;
-	EMUL_UNLOCK(&emul_lock);
 
 	if (child_set_tid != NULL) {
-		error = copyout(&p->p_pid, (int *)child_set_tid,
-		    sizeof(p->p_pid));
+		error = copyout(&em->em_tid, (int *)child_set_tid,
+		    sizeof(em->em_tid));
+		LINUX_CTR4(clone, "schedtail(%d) %p stored %d error %d",
+		    td->td_tid, child_set_tid, em->em_tid, error);
 
 		if (error != 0) {
 			LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error,
 			    error);
 		}
-	}
+	} else
+		LINUX_CTR1(clone, "schedtail(%d)", em->em_tid);
 
 	LIN_SDT_PROBE0(emul, linux_schedtail, return);
-
-	return;
 }
 
 int
@@ -429,45 +342,16 @@ linux_set_tid_address(struct thread *td,
 
 	LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr);
 
-	/* find the emuldata */
-	em = em_find(td->td_proc, EMUL_DOLOCK);
-
+	em = em_find(td);
 	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
 
 	em->child_clear_tid = args->tidptr;
-	td->td_retval[0] = td->td_proc->p_pid;
 
-	EMUL_UNLOCK(&emul_lock);
+	td->td_retval[0] = em->em_tid;
 
-	LIN_SDT_PROBE0(emul, linux_set_tid_address, return);
-	return 0;
-}
-
-void
-linux_kill_threads(struct thread *td, int sig)
-{
-	struct linux_emuldata *em, *td_em, *tmp_em;
-	struct proc *sp;
-
-	LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig);
-
-	td_em = em_find(td->td_proc, EMUL_DONTLOCK);
+	LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d",
+	    em->em_tid, args->tidptr, td->td_retval[0]);
 
-	KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n"));
-
-	EMUL_SHARED_RLOCK(&emul_shared_lock);
-	LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) {
-		if (em->pid == td_em->pid)
-			continue;
-
-		sp = pfind(em->pid);
-		if ((sp->p_flag & P_WEXIT) == 0)
-			kern_psignal(sp, sig);
-		PROC_UNLOCK(sp);
-
-		LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid);
-	}
-	EMUL_SHARED_RUNLOCK(&emul_shared_lock);
-
-	LIN_SDT_PROBE0(emul, linux_kill_threads, return);
+	LIN_SDT_PROBE0(emul, linux_set_tid_address, return);
+	return (0);
 }

Modified: head/sys/compat/linux/linux_emul.h
==============================================================================
--- head/sys/compat/linux/linux_emul.h	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/compat/linux/linux_emul.h	Sun May 24 14:53:16 2015	(r283383)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2006 Roman Divacky
+ * Copyright (c) 2013 Dmitry Chagin
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,91 +32,33 @@
 #ifndef _LINUX_EMUL_H_
 #define	_LINUX_EMUL_H_
 
-#define EMUL_SHARED_HASXSTAT	0x01
-
-struct linux_emuldata_shared {
-	int	refs;
-	int	flags;
-	int	xstat;
-	pid_t	group_pid;
-
-	LIST_HEAD(, linux_emuldata) threads; /* head of list of linux threads */
-};
-
 /*
  * modeled after similar structure in NetBSD
  * this will be extended as we need more functionality
  */
 struct linux_emuldata {
-	pid_t	pid;
-
 	int    *child_set_tid;	/* in clone(): Child's TID to set on clone */
 	int    *child_clear_tid;/* in clone(): Child's TID to clear on exit */
 
-	struct linux_emuldata_shared *shared;
-
 	int	pdeath_signal;		/* parent death signal */
 	int	flags;			/* different emuldata flags */
+	int	em_tid;			/* thread id */
 
 	struct	linux_robust_list_head	*robust_futexes;
-
-	LIST_ENTRY(linux_emuldata) threads;	/* list of linux threads */
 };
 
-struct linux_emuldata	*em_find(struct proc *, int locked);
-
-/*
- * DTrace probes for locks should be fired after locking and before releasing
- * to prevent races (to provide data/function stability in dtrace, see the
- * output of "dtrace -v ..." and the corresponding dtrace docs).
- */
-#define	EMUL_LOCK(l)		do { \
-				    mtx_lock(l); \
-				    LIN_SDT_PROBE1(locks, emul_lock, \
-					locked, l); \
-				} while (0)
-#define	EMUL_UNLOCK(l)		do { \
-				    LIN_SDT_PROBE1(locks, emul_lock, \
-					unlock, l); \
-				    mtx_unlock(l); \
-				} while (0)
-
-#define	EMUL_SHARED_RLOCK(l)	do { \
-				    sx_slock(l); \
-				    LIN_SDT_PROBE1(locks, emul_shared_rlock, \
-					locked, l); \
-				} while (0)
-#define	EMUL_SHARED_RUNLOCK(l)	do { \
-				    LIN_SDT_PROBE1(locks, emul_shared_rlock, \
-					unlock, l); \
-				    sx_sunlock(l); \
-				} while (0)
-#define	EMUL_SHARED_WLOCK(l)	do { \
-				    sx_xlock(l); \
-				    LIN_SDT_PROBE1(locks, emul_shared_wlock, \
-					locked, l); \
-				} while (0)
-#define	EMUL_SHARED_WUNLOCK(l)	do { \
-				    LIN_SDT_PROBE1(locks, emul_shared_wlock, \
-					unlock, l); \
-				    sx_xunlock(l); \
-				} while (0)
-
-/* for em_find use */
-#define	EMUL_DOLOCK		1
-#define	EMUL_DONTLOCK		0
+struct linux_emuldata	*em_find(struct thread *);
 
 /* emuldata flags */
 #define	LINUX_XDEPR_REQUEUEOP	0x00000001	/* uses deprecated
 						   futex REQUEUE op*/
 
-int	linux_proc_init(struct thread *, pid_t, int);
+void	linux_proc_init(struct thread *, struct thread *, int);
 void	linux_proc_exit(void *, struct proc *);
 void	linux_schedtail(struct thread *);
 void	linux_proc_exec(void *, struct proc *, struct image_params *);
-void	linux_kill_threads(struct thread *, int);
-
-extern struct sx	emul_shared_lock;
-extern struct mtx	emul_lock;
+void	linux_thread_dtor(void *arg __unused, struct thread *);
+void	linux_thread_detach(struct thread *);
+int	linux_common_execve(struct thread *, struct image_args *);
 
 #endif	/* !_LINUX_EMUL_H_ */

Modified: head/sys/compat/linux/linux_fork.c
==============================================================================
--- head/sys/compat/linux/linux_fork.c	Sun May 24 14:51:29 2015	(r283382)
+++ head/sys/compat/linux/linux_fork.c	Sun May 24 14:53:16 2015	(r283383)
@@ -34,15 +34,21 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/imgact.h>
+#include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/racct.h>
 #include <sys/sched.h>
-#include <sys/sdt.h>
+#include <sys/syscallsubr.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/wait.h>
 
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+
 #ifdef COMPAT_LINUX32
 #include <machine/../linux32/linux.h>
 #include <machine/../linux32/linux32_proto.h>
@@ -50,18 +56,10 @@ __FBSDID("$FreeBSD$");
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
 #endif
-#include <compat/linux/linux_dtrace.h>
 #include <compat/linux/linux_signal.h>
 #include <compat/linux/linux_emul.h>
 #include <compat/linux/linux_misc.h>
-
-/* DTrace init */
-LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE);
-
-/* Linuxulator-global DTrace probes */
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked);
-LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock);
-
+#include <compat/linux/linux_util.h>
 
 int
 linux_fork(struct thread *td, struct linux_fork_args *args)
@@ -79,14 +77,11 @@ linux_fork(struct thread *td, struct lin
 	    != 0)
 		return (error);
 
-	td->td_retval[0] = p2->p_pid;
-	td->td_retval[1] = 0;
+	td2 = FIRST_THREAD_IN_PROC(p2);
 
-	error = linux_proc_init(td, td->td_retval[0], 0);
-	if (error)
-		return (error);
+	linux_proc_init(td, td2, 0);
 
-	td2 = FIRST_THREAD_IN_PROC(p2);
+	td->td_retval[0] = p2->p_pid;
 
 	/*
 	 * Make this runnable after we are finished with it.
@@ -116,17 +111,16 @@ linux_vfork(struct thread *td, struct li
 	    NULL, 0)) != 0)
 		return (error);
 
-   	td->td_retval[0] = p2->p_pid;
 
-	error = linux_proc_init(td, td->td_retval[0], 0);
-	if (error)
-		return (error);
+	td2 = FIRST_THREAD_IN_PROC(p2);
+
+	linux_proc_init(td, td2, 0);
 
 	PROC_LOCK(p2);
 	p2->p_flag |= P_PPWAIT;
 	PROC_UNLOCK(p2);
 
-	td2 = FIRST_THREAD_IN_PROC(p2);
+   	td->td_retval[0] = p2->p_pid;
 
 	/*
 	 * Make this runnable after we are finished with it.
@@ -145,8 +139,8 @@ linux_vfork(struct thread *td, struct li
 	return (0);
 }
 
-int
-linux_clone(struct thread *td, struct linux_clone_args *args)
+static int
+linux_clone_proc(struct thread *td, struct linux_clone_args *args)
 {
 	int error, ff = RFPROC | RFSTOPPED;
 	struct proc *p2;
@@ -183,22 +177,6 @@ linux_clone(struct thread *td, struct li
 	if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS)))
 		ff |= RFFDG;
 
-	/*
-	 * Attempt to detect when linux_clone(2) is used for creating
-	 * kernel threads. Unfortunately despite the existence of the
-	 * CLONE_THREAD flag, version of linuxthreads package used in
-	 * most popular distros as of beginning of 2005 doesn't make
-	 * any use of it. Therefore, this detection relies on
-	 * empirical observation that linuxthreads sets certain
-	 * combination of flags, so that we can make more or less
-	 * precise detection and notify the FreeBSD kernel that several
-	 * processes are in fact part of the same threading group, so
-	 * that special treatment is necessary for signal delivery
-	 * between those processes and fd locking.
-	 */
-	if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS)
-		ff |= RFTHREAD;
-
 	if (args->flags & LINUX_CLONE_PARENT_SETTID)
 		if (args->parent_tidptr == NULL)
 			return (EINVAL);
@@ -207,29 +185,13 @@ linux_clone(struct thread *td, struct li
 	if (error)
 		return (error);
 
-	if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) {
-	   	sx_xlock(&proctree_lock);
-		PROC_LOCK(p2);
-		proc_reparent(p2, td->td_proc->p_pptr);
-		PROC_UNLOCK(p2);
-		sx_xunlock(&proctree_lock);
-	}
+	td2 = FIRST_THREAD_IN_PROC(p2);
 
 	/* create the emuldata */
-	error = linux_proc_init(td, p2->p_pid, args->flags);
-	/* reference it - no need to check this */
-	em = em_find(p2, EMUL_DOLOCK);
-	KASSERT(em != NULL, ("clone: emuldata not found."));
-	/* and adjust it */
+	linux_proc_init(td, td2, args->flags);
 
-	if (args->flags & LINUX_CLONE_THREAD) {
-#ifdef notyet
-	   	PROC_LOCK(p2);
-	   	p2->p_pgrp = td->td_proc->p_pgrp;
-	   	PROC_UNLOCK(p2);
-#endif
-		exit_signal = 0;
-	}
+	em = em_find(td2);
+	KASSERT(em != NULL, ("clone_proc: emuldata not found.\n"));
 
 	if (args->flags & LINUX_CLONE_CHILD_SETTID)
 		em->child_set_tid = args->child_tidptr;
@@ -241,8 +203,6 @@ linux_clone(struct thread *td, struct li
 	else
 	   	em->child_clear_tid = NULL;
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201505241453.t4OErHG2034385>